aboutsummaryrefslogtreecommitdiff
path: root/arch/tile
diff options
context:
space:
mode:
Diffstat (limited to 'arch/tile')
-rw-r--r--arch/tile/Kconfig183
-rw-r--r--arch/tile/Kconfig.debug21
-rw-r--r--arch/tile/Makefile18
-rw-r--r--arch/tile/configs/tilegx_defconfig243
-rw-r--r--arch/tile/configs/tilepro_defconfig87
-rw-r--r--arch/tile/gxio/Kconfig33
-rw-r--r--arch/tile/gxio/Makefile10
-rw-r--r--arch/tile/gxio/dma_queue.c176
-rw-r--r--arch/tile/gxio/iorpc_globals.c89
-rw-r--r--arch/tile/gxio/iorpc_mpipe.c593
-rw-r--r--arch/tile/gxio/iorpc_mpipe_info.c102
-rw-r--r--arch/tile/gxio/iorpc_trio.c350
-rw-r--r--arch/tile/gxio/iorpc_uart.c77
-rw-r--r--arch/tile/gxio/iorpc_usb_host.c99
-rw-r--r--arch/tile/gxio/kiorpc.c61
-rw-r--r--arch/tile/gxio/mpipe.c578
-rw-r--r--arch/tile/gxio/trio.c49
-rw-r--r--arch/tile/gxio/uart.c87
-rw-r--r--arch/tile/gxio/usb_host.c91
-rw-r--r--arch/tile/include/arch/Kbuild18
-rw-r--r--arch/tile/include/arch/chip_tile64.h258
-rw-r--r--arch/tile/include/arch/interrupts_32.h307
-rw-r--r--arch/tile/include/arch/interrupts_64.h276
-rw-r--r--arch/tile/include/arch/mpipe.h371
-rw-r--r--arch/tile/include/arch/mpipe_constants.h42
-rw-r--r--arch/tile/include/arch/mpipe_def.h39
-rw-r--r--arch/tile/include/arch/mpipe_shm.h521
-rw-r--r--arch/tile/include/arch/mpipe_shm_def.h23
-rw-r--r--arch/tile/include/arch/spr_def.h16
-rw-r--r--arch/tile/include/arch/trio.h111
-rw-r--r--arch/tile/include/arch/trio_constants.h36
-rw-r--r--arch/tile/include/arch/trio_def.h41
-rw-r--r--arch/tile/include/arch/trio_pcie_intfc.h229
-rw-r--r--arch/tile/include/arch/trio_pcie_intfc_def.h32
-rw-r--r--arch/tile/include/arch/trio_pcie_rc.h156
-rw-r--r--arch/tile/include/arch/trio_pcie_rc_def.h24
-rw-r--r--arch/tile/include/arch/trio_shm.h125
-rw-r--r--arch/tile/include/arch/trio_shm_def.h19
-rw-r--r--arch/tile/include/arch/uart.h300
-rw-r--r--arch/tile/include/arch/uart_def.h120
-rw-r--r--arch/tile/include/arch/usb_host.h26
-rw-r--r--arch/tile/include/arch/usb_host_def.h19
-rw-r--r--arch/tile/include/asm/Kbuild17
-rw-r--r--arch/tile/include/asm/atomic.h126
-rw-r--r--arch/tile/include/asm/atomic_32.h150
-rw-r--r--arch/tile/include/asm/atomic_64.h49
-rw-r--r--arch/tile/include/asm/barrier.h92
-rw-r--r--arch/tile/include/asm/bitops.h48
-rw-r--r--arch/tile/include/asm/bitops_32.h11
-rw-r--r--arch/tile/include/asm/bitops_64.h21
-rw-r--r--arch/tile/include/asm/byteorder.h1
-rw-r--r--arch/tile/include/asm/cache.h25
-rw-r--r--arch/tile/include/asm/cacheflush.h55
-rw-r--r--arch/tile/include/asm/checksum.h18
-rw-r--r--arch/tile/include/asm/cmpxchg.h134
-rw-r--r--arch/tile/include/asm/compat.h106
-rw-r--r--arch/tile/include/asm/device.h36
-rw-r--r--arch/tile/include/asm/dma-mapping.h160
-rw-r--r--arch/tile/include/asm/elf.h22
-rw-r--r--arch/tile/include/asm/fixmap.h55
-rw-r--r--arch/tile/include/asm/ftrace.h22
-rw-r--r--arch/tile/include/asm/futex.h144
-rw-r--r--arch/tile/include/asm/hardirq.h2
-rw-r--r--arch/tile/include/asm/hardwall.h41
-rw-r--r--arch/tile/include/asm/highmem.h2
-rw-r--r--arch/tile/include/asm/homecache.h30
-rw-r--r--arch/tile/include/asm/hugetlb.h26
-rw-r--r--arch/tile/include/asm/io.h282
-rw-r--r--arch/tile/include/asm/irq.h6
-rw-r--r--arch/tile/include/asm/irqflags.h89
-rw-r--r--arch/tile/include/asm/kdebug.h28
-rw-r--r--arch/tile/include/asm/kexec.h12
-rw-r--r--arch/tile/include/asm/kgdb.h71
-rw-r--r--arch/tile/include/asm/kmap_types.h31
-rw-r--r--arch/tile/include/asm/kprobes.h79
-rw-r--r--arch/tile/include/asm/memprof.h33
-rw-r--r--arch/tile/include/asm/mmu.h3
-rw-r--r--arch/tile/include/asm/mmu_context.h10
-rw-r--r--arch/tile/include/asm/mmzone.h2
-rw-r--r--arch/tile/include/asm/module.h40
-rw-r--r--arch/tile/include/asm/page.h83
-rw-r--r--arch/tile/include/asm/pci.h167
-rw-r--r--arch/tile/include/asm/percpu.h34
-rw-r--r--arch/tile/include/asm/perf_event.h22
-rw-r--r--arch/tile/include/asm/pgalloc.h92
-rw-r--r--arch/tile/include/asm/pgtable.h115
-rw-r--r--arch/tile/include/asm/pgtable_32.h56
-rw-r--r--arch/tile/include/asm/pgtable_64.h74
-rw-r--r--arch/tile/include/asm/pmc.h64
-rw-r--r--arch/tile/include/asm/processor.h108
-rw-r--r--arch/tile/include/asm/ptrace.h85
-rw-r--r--arch/tile/include/asm/sections.h10
-rw-r--r--arch/tile/include/asm/setup.h30
-rw-r--r--arch/tile/include/asm/signal.h12
-rw-r--r--arch/tile/include/asm/smp.h9
-rw-r--r--arch/tile/include/asm/spinlock_32.h1
-rw-r--r--arch/tile/include/asm/spinlock_64.h6
-rw-r--r--arch/tile/include/asm/stack.h1
-rw-r--r--arch/tile/include/asm/string.h2
-rw-r--r--arch/tile/include/asm/switch_to.h79
-rw-r--r--arch/tile/include/asm/syscall.h6
-rw-r--r--arch/tile/include/asm/syscalls.h20
-rw-r--r--arch/tile/include/asm/system.h261
-rw-r--r--arch/tile/include/asm/thread_info.h61
-rw-r--r--arch/tile/include/asm/timex.h2
-rw-r--r--arch/tile/include/asm/tlbflush.h17
-rw-r--r--arch/tile/include/asm/topology.h63
-rw-r--r--arch/tile/include/asm/traps.h19
-rw-r--r--arch/tile/include/asm/uaccess.h268
-rw-r--r--arch/tile/include/asm/unaligned.h29
-rw-r--r--arch/tile/include/asm/unistd.h31
-rw-r--r--arch/tile/include/asm/vdso.h49
-rw-r--r--arch/tile/include/gxio/common.h40
-rw-r--r--arch/tile/include/gxio/dma_queue.h161
-rw-r--r--arch/tile/include/gxio/iorpc_globals.h38
-rw-r--r--arch/tile/include/gxio/iorpc_mpipe.h144
-rw-r--r--arch/tile/include/gxio/iorpc_mpipe_info.h50
-rw-r--r--arch/tile/include/gxio/iorpc_trio.h104
-rw-r--r--arch/tile/include/gxio/iorpc_uart.h40
-rw-r--r--arch/tile/include/gxio/iorpc_usb_host.h46
-rw-r--r--arch/tile/include/gxio/kiorpc.h29
-rw-r--r--arch/tile/include/gxio/mpipe.h1871
-rw-r--r--arch/tile/include/gxio/trio.h298
-rw-r--r--arch/tile/include/gxio/uart.h105
-rw-r--r--arch/tile/include/gxio/usb_host.h87
-rw-r--r--arch/tile/include/hv/drv_mpipe_intf.h605
-rw-r--r--arch/tile/include/hv/drv_trio_intf.h199
-rw-r--r--arch/tile/include/hv/drv_uart_intf.h33
-rw-r--r--arch/tile/include/hv/drv_usb_host_intf.h39
-rw-r--r--arch/tile/include/hv/drv_xgbe_intf.h2
-rw-r--r--arch/tile/include/hv/hypervisor.h377
-rw-r--r--arch/tile/include/hv/iorpc.h714
-rw-r--r--arch/tile/include/uapi/arch/Kbuild17
-rw-r--r--arch/tile/include/uapi/arch/abi.h (renamed from arch/tile/include/arch/abi.h)0
-rw-r--r--arch/tile/include/uapi/arch/chip.h (renamed from arch/tile/include/arch/chip.h)4
-rw-r--r--arch/tile/include/uapi/arch/chip_tilegx.h (renamed from arch/tile/include/arch/chip_tilegx.h)0
-rw-r--r--arch/tile/include/uapi/arch/chip_tilepro.h (renamed from arch/tile/include/arch/chip_tilepro.h)0
-rw-r--r--arch/tile/include/uapi/arch/icache.h (renamed from arch/tile/include/arch/icache.h)0
-rw-r--r--arch/tile/include/uapi/arch/interrupts.h (renamed from arch/tile/include/arch/interrupts.h)0
-rw-r--r--arch/tile/include/uapi/arch/interrupts_32.h309
-rw-r--r--arch/tile/include/uapi/arch/interrupts_64.h278
-rw-r--r--arch/tile/include/uapi/arch/opcode.h (renamed from arch/tile/include/arch/opcode.h)0
-rw-r--r--arch/tile/include/uapi/arch/opcode_tilegx.h (renamed from arch/tile/include/arch/opcode_tilegx.h)1
-rw-r--r--arch/tile/include/uapi/arch/opcode_tilepro.h (renamed from arch/tile/include/arch/opcode_tilepro.h)1
-rw-r--r--arch/tile/include/uapi/arch/sim.h (renamed from arch/tile/include/arch/sim.h)0
-rw-r--r--arch/tile/include/uapi/arch/sim_def.h (renamed from arch/tile/include/arch/sim_def.h)0
-rw-r--r--arch/tile/include/uapi/arch/spr_def.h26
-rw-r--r--arch/tile/include/uapi/arch/spr_def_32.h (renamed from arch/tile/include/arch/spr_def_32.h)64
-rw-r--r--arch/tile/include/uapi/arch/spr_def_64.h (renamed from arch/tile/include/arch/spr_def_64.h)49
-rw-r--r--arch/tile/include/uapi/asm/Kbuild21
-rw-r--r--arch/tile/include/uapi/asm/auxvec.h (renamed from arch/tile/include/asm/auxvec.h)3
-rw-r--r--arch/tile/include/uapi/asm/bitsperlong.h (renamed from arch/tile/include/asm/bitsperlong.h)0
-rw-r--r--arch/tile/include/uapi/asm/byteorder.h21
-rw-r--r--arch/tile/include/uapi/asm/cachectl.h42
-rw-r--r--arch/tile/include/uapi/asm/hardwall.h51
-rw-r--r--arch/tile/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/tile/include/uapi/asm/mman.h (renamed from arch/tile/include/asm/mman.h)0
-rw-r--r--arch/tile/include/uapi/asm/ptrace.h94
-rw-r--r--arch/tile/include/uapi/asm/setup.h (renamed from arch/tile/include/asm/hw_irq.h)9
-rw-r--r--arch/tile/include/uapi/asm/sigcontext.h (renamed from arch/tile/include/asm/sigcontext.h)0
-rw-r--r--arch/tile/include/uapi/asm/siginfo.h (renamed from arch/tile/include/asm/siginfo.h)0
-rw-r--r--arch/tile/include/uapi/asm/signal.h27
-rw-r--r--arch/tile/include/uapi/asm/stat.h (renamed from arch/tile/include/asm/stat.h)0
-rw-r--r--arch/tile/include/uapi/asm/swab.h (renamed from arch/tile/include/asm/swab.h)0
-rw-r--r--arch/tile/include/uapi/asm/unistd.h36
-rw-r--r--arch/tile/kernel/Makefile26
-rw-r--r--arch/tile/kernel/asm-offsets.c52
-rw-r--r--arch/tile/kernel/backtrace.c9
-rw-r--r--arch/tile/kernel/compat.c99
-rw-r--r--arch/tile/kernel/compat_signal.c202
-rw-r--r--arch/tile/kernel/early_printk.c69
-rw-r--r--arch/tile/kernel/entry.S46
-rw-r--r--arch/tile/kernel/ftrace.c244
-rw-r--r--arch/tile/kernel/futex_64.S55
-rw-r--r--arch/tile/kernel/hardwall.c774
-rw-r--r--arch/tile/kernel/head_32.S27
-rw-r--r--arch/tile/kernel/head_64.S78
-rw-r--r--arch/tile/kernel/hvglue.S74
-rw-r--r--arch/tile/kernel/hvglue.lds58
-rw-r--r--arch/tile/kernel/hvglue_trace.c266
-rw-r--r--arch/tile/kernel/init_task.c59
-rw-r--r--arch/tile/kernel/intvec_32.S245
-rw-r--r--arch/tile/kernel/intvec_64.S556
-rw-r--r--arch/tile/kernel/irq.c56
-rw-r--r--arch/tile/kernel/kgdb.c499
-rw-r--r--arch/tile/kernel/kprobes.c528
-rw-r--r--arch/tile/kernel/machine_kexec.c42
-rw-r--r--arch/tile/kernel/mcount_64.S224
-rw-r--r--arch/tile/kernel/messaging.c6
-rw-r--r--arch/tile/kernel/module.c26
-rw-r--r--arch/tile/kernel/pci-dma.c580
-rw-r--r--arch/tile/kernel/pci.c110
-rw-r--r--arch/tile/kernel/pci_gx.c1610
-rw-r--r--arch/tile/kernel/perf_event.c1005
-rw-r--r--arch/tile/kernel/pmc.c121
-rw-r--r--arch/tile/kernel/proc.c10
-rw-r--r--arch/tile/kernel/process.c404
-rw-r--r--arch/tile/kernel/ptrace.c194
-rw-r--r--arch/tile/kernel/reboot.c4
-rw-r--r--arch/tile/kernel/regs_32.S6
-rw-r--r--arch/tile/kernel/regs_64.S6
-rw-r--r--arch/tile/kernel/relocate_kernel_32.S (renamed from arch/tile/kernel/relocate_kernel.S)27
-rw-r--r--arch/tile/kernel/relocate_kernel_64.S263
-rw-r--r--arch/tile/kernel/setup.c464
-rw-r--r--arch/tile/kernel/signal.c80
-rw-r--r--arch/tile/kernel/single_step.c162
-rw-r--r--arch/tile/kernel/smp.c51
-rw-r--r--arch/tile/kernel/smpboot.c32
-rw-r--r--arch/tile/kernel/stack.c284
-rw-r--r--arch/tile/kernel/sys.c20
-rw-r--r--arch/tile/kernel/sysfs.c84
-rw-r--r--arch/tile/kernel/time.c55
-rw-r--r--arch/tile/kernel/tlb.c19
-rw-r--r--arch/tile/kernel/traps.c136
-rw-r--r--arch/tile/kernel/unaligned.c1598
-rw-r--r--arch/tile/kernel/usb.c69
-rw-r--r--arch/tile/kernel/vdso.c212
-rw-r--r--arch/tile/kernel/vdso/Makefile118
-rw-r--r--arch/tile/kernel/vdso/vdso.S28
-rw-r--r--arch/tile/kernel/vdso/vdso.lds.S87
-rw-r--r--arch/tile/kernel/vdso/vdso32.S28
-rw-r--r--arch/tile/kernel/vdso/vgettimeofday.c107
-rw-r--r--arch/tile/kernel/vdso/vrt_sigreturn.S30
-rw-r--r--arch/tile/kernel/vmlinux.lds.S35
-rw-r--r--arch/tile/kvm/Kconfig2
-rw-r--r--arch/tile/lib/Makefile15
-rw-r--r--arch/tile/lib/atomic_32.c180
-rw-r--r--arch/tile/lib/atomic_asm_32.S1
-rw-r--r--arch/tile/lib/cacheflush.c48
-rw-r--r--arch/tile/lib/checksum.c15
-rw-r--r--arch/tile/lib/cpumask.c2
-rw-r--r--arch/tile/lib/exports.c19
-rw-r--r--arch/tile/lib/memchr_64.c10
-rw-r--r--arch/tile/lib/memcpy_32.S63
-rw-r--r--arch/tile/lib/memcpy_64.c287
-rw-r--r--arch/tile/lib/memcpy_tile64.c276
-rw-r--r--arch/tile/lib/memcpy_user_64.c10
-rw-r--r--arch/tile/lib/memset_32.c110
-rw-r--r--arch/tile/lib/memset_64.c9
-rw-r--r--arch/tile/lib/spinlock_32.c2
-rw-r--r--arch/tile/lib/spinlock_common.h2
-rw-r--r--arch/tile/lib/strchr_32.c2
-rw-r--r--arch/tile/lib/strchr_64.c17
-rw-r--r--arch/tile/lib/string-endian.h44
-rw-r--r--arch/tile/lib/strlen_32.c2
-rw-r--r--arch/tile/lib/strlen_64.c11
-rw-r--r--arch/tile/lib/strnlen_32.c47
-rw-r--r--arch/tile/lib/strnlen_64.c48
-rw-r--r--arch/tile/lib/uaccess.c8
-rw-r--r--arch/tile/lib/usercopy_32.S112
-rw-r--r--arch/tile/lib/usercopy_64.S85
-rw-r--r--arch/tile/mm/elf.c114
-rw-r--r--arch/tile/mm/fault.c217
-rw-r--r--arch/tile/mm/highmem.c8
-rw-r--r--arch/tile/mm/homecache.c196
-rw-r--r--arch/tile/mm/hugetlbpage.c396
-rw-r--r--arch/tile/mm/init.c256
-rw-r--r--arch/tile/mm/migrate.h6
-rw-r--r--arch/tile/mm/migrate_32.S40
-rw-r--r--arch/tile/mm/migrate_64.S38
-rw-r--r--arch/tile/mm/mmap.c26
-rw-r--r--arch/tile/mm/pgtable.c180
262 files changed, 24507 insertions, 6803 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 11270ca22c0..4f3006b600e 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -3,16 +3,30 @@
config TILE
def_bool y
+ select HAVE_PERF_EVENTS
+ select USE_PMC if PERF_EVENTS
+ select HAVE_DMA_ATTRS
+ select HAVE_DMA_API_DEBUG
select HAVE_KVM if !TILEGX
select GENERIC_FIND_FIRST_BIT
- select USE_GENERIC_SMP_HELPERS
+ select SYSCTL_EXCEPTION_TRACE
select CC_OPTIMIZE_FOR_SIZE
- select HAVE_GENERIC_HARDIRQS
+ select HAVE_DEBUG_KMEMLEAK
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
select GENERIC_IRQ_SHOW
+ select HAVE_DEBUG_BUGVERBOSE
+ select VIRT_TO_BUS
select SYS_HYPERVISOR
- select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386
+ select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select GENERIC_CLOCKEVENTS
+ select MODULES_USE_ELF_RELA
+ select HAVE_ARCH_TRACEHOOK
+ select HAVE_SYSCALL_TRACEPOINTS
+ select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+ select HAVE_DEBUG_STACKOVERFLOW
+ select ARCH_WANT_FRAME_POINTERS
# FIXME: investigate whether we need/want these options.
# select HAVE_IOREMAP_PROT
@@ -31,9 +45,6 @@ config MMU
config GENERIC_CSUM
def_bool y
-config SEMAPHORE_SLEEPERS
- def_bool y
-
config HAVE_ARCH_ALLOC_REMAP
def_bool y
@@ -46,19 +57,25 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK
config SYS_SUPPORTS_HUGETLBFS
def_bool y
-config GENERIC_CLOCKEVENTS
+# Support for additional huge page sizes besides HPAGE_SIZE.
+# The software support is currently only present in the TILE-Gx
+# hypervisor. TILEPro in any case does not support page sizes
+# larger than the default HPAGE_SIZE.
+config HUGETLB_SUPER_PAGES
+ depends on HUGETLB_PAGE && TILEGX
+ def_bool y
+
+config GENERIC_TIME_VSYSCALL
def_bool y
+# Enable PMC if PERF_EVENTS, OPROFILE, or WATCHPOINTS are enabled.
+config USE_PMC
+ bool
+
# FIXME: tilegx can implement a more efficient rwsem.
config RWSEM_GENERIC_SPINLOCK
def_bool y
-# We have a very flat architecture from a migration point of view,
-# so save boot time by presetting this (particularly useful on tile-sim).
-config DEFAULT_MIGRATION_COST
- int
- default "10000000"
-
# We only support gcc 4.4 and above, so this should work.
config ARCH_SUPPORTS_OPTIMIZED_INLINING
def_bool y
@@ -69,6 +86,12 @@ config ARCH_PHYS_ADDR_T_64BIT
config ARCH_DMA_ADDR_T_64BIT
def_bool y
+config NEED_DMA_MAP_STATE
+ def_bool y
+
+config ARCH_HAS_DMA_SET_COHERENT_MASK
+ bool
+
config LOCKDEP_SUPPORT
def_bool y
@@ -94,35 +117,40 @@ config STRICT_DEVMEM
config SMP
def_bool y
-# Allow checking for compile-time determined overflow errors in
-# copy_from_user(). There are still unprovable places in the
-# generic code as of 2.6.34, so this option is not really compatible
-# with -Werror, which is more useful in general.
-config DEBUG_COPY_FROM_USER
- def_bool n
-
config HVC_TILE
+ depends on TTY
select HVC_DRIVER
+ select HVC_IRQ if TILEGX
def_bool y
-# Please note: TILE-Gx support is not yet finalized; this is
-# the preliminary support. TILE-Gx drivers are only provided
-# with the alpha or beta test versions for Tilera customers.
config TILEGX
- depends on EXPERIMENTAL
- bool "Building with TILE-Gx (64-bit) compiler and toolchain"
+ bool "Building for TILE-Gx (64-bit) processor"
+ select SPARSE_IRQ
+ select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
+ select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_KPROBES
+ select HAVE_KRETPROBES
+ select HAVE_ARCH_KGDB
+
+config TILEPRO
+ def_bool !TILEGX
config 64BIT
- depends on TILEGX
- def_bool y
+ def_bool TILEGX
config ARCH_DEFCONFIG
string
- default "arch/tile/configs/tile_defconfig" if !TILEGX
+ default "arch/tile/configs/tilepro_defconfig" if !TILEGX
default "arch/tile/configs/tilegx_defconfig" if TILEGX
source "init/Kconfig"
+source "kernel/Kconfig.freezer"
+
menu "Tilera-specific configuration"
config NR_CPUS
@@ -135,7 +163,30 @@ config NR_CPUS
smaller kernel memory footprint results from using a smaller
value on chips with fewer tiles.
-source "kernel/time/Kconfig"
+if TILEGX
+
+choice
+ prompt "Kernel page size"
+ default PAGE_SIZE_64KB
+ help
+ This lets you select the page size of the kernel. For best
+ performance on memory-intensive applications, a page size of 64KB
+ is recommended. For workloads involving many small files, many
+ connections, etc., it may be better to select 16KB, which uses
+ memory more efficiently at some cost in TLB performance.
+
+ Note that this option is TILE-Gx specific; currently
+ TILEPro page size is set by rebuilding the hypervisor.
+
+config PAGE_SIZE_16KB
+ bool "16KB"
+
+config PAGE_SIZE_64KB
+ bool "64KB"
+
+endchoice
+
+endif
source "kernel/Kconfig.hz"
@@ -162,7 +213,7 @@ config SYSVIPC_COMPAT
def_bool y
depends on COMPAT && SYSVIPC
-# We do not currently support disabling HIGHMEM on tile64 and tilepro.
+# We do not currently support disabling HIGHMEM on tilepro.
config HIGHMEM
bool # "Support for more than 512 MB of RAM"
default !TILEGX
@@ -181,6 +232,22 @@ config HIGHMEM
If unsure, say "true".
+config ZONE_DMA
+ def_bool y
+
+config IOMMU_HELPER
+ bool
+
+config NEED_SG_DMA_LENGTH
+ bool
+
+config SWIOTLB
+ bool
+ default TILEGX
+ select IOMMU_HELPER
+ select NEED_SG_DMA_LENGTH
+ select ARCH_HAS_DMA_SET_COHERENT_MASK
+
# We do not currently support disabling NUMA.
config NUMA
bool # "NUMA Memory Allocation and Scheduler Support"
@@ -240,6 +307,7 @@ endchoice
config PAGE_OFFSET
hex
+ depends on !64BIT
default 0xF0000000 if VMSPLIT_3_75G
default 0xE0000000 if VMSPLIT_3_5G
default 0xB0000000 if VMSPLIT_2_75G
@@ -251,6 +319,8 @@ config PAGE_OFFSET
source "mm/Kconfig"
+source "kernel/Kconfig.preempt"
+
config CMDLINE_BOOL
bool "Built-in kernel command line"
default n
@@ -298,7 +368,7 @@ config CMDLINE_OVERRIDE
config VMALLOC_RESERVE
hex
- default 0x1000000
+ default 0x2000000
config HARDWALL
bool "Hardwall support to allow access to user dynamic network"
@@ -307,11 +377,19 @@ config HARDWALL
config KERNEL_PL
int "Processor protection level for kernel"
range 1 2
- default "1"
+ default 2 if TILEGX
+ default 1 if !TILEGX
---help---
- This setting determines the processor protection level the
- kernel will be built to run at. Generally you should use
- the default value here.
+ Since MDE 4.2, the Tilera hypervisor runs the kernel
+ at PL2 by default. If running under an older hypervisor,
+ or as a KVM guest, you must run at PL1. (The current
+ hypervisor may also be recompiled with "make HV_PL=2" to
+ allow it to run a kernel at PL1, but clients running at PL1
+ are not expected to be supported indefinitely.)
+
+ If you're not sure, don't change the default.
+
+source "arch/tile/gxio/Kconfig"
endmenu # Tilera-specific configuration
@@ -322,6 +400,8 @@ config PCI
default y
select PCI_DOMAINS
select GENERIC_PCI_IOMAP
+ select TILE_GXIO_TRIO if TILEGX
+ select PCI_MSI if TILEGX
---help---
Enable PCI root complex support, so PCIe endpoint devices can
be attached to the Tile chip. Many, but not all, PCI devices
@@ -333,18 +413,32 @@ config PCI_DOMAINS
config NO_IOMEM
def_bool !PCI
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool !PCI
+config TILE_PCI_IO
+ bool "PCI I/O space support"
+ default n
+ depends on PCI
+ depends on TILEGX
+ ---help---
+ Enable PCI I/O space support on TILEGx. Since the PCI I/O space
+ is used by few modern PCIe endpoint devices, its support is disabled
+ by default to save the TRIO PIO Region resource for other purposes.
+
source "drivers/pci/Kconfig"
-config HOTPLUG
- bool "Support for hot-pluggable devices"
+source "drivers/pci/pcie/Kconfig"
+
+config TILE_USB
+ tristate "Tilera USB host adapter support"
+ default y
+ depends on USB
+ depends on TILEGX
+ select TILE_GXIO_USB_HOST
---help---
- Say Y here if you want to plug devices into your computer while
- the system is running, and be able to use them quickly. In many
- cases, the devices can likewise be unplugged at any time too.
- One well-known example of this is USB.
+ Provides USB host adapter support for the built-in EHCI and OHCI
+ interfaces on TILE-Gx chips.
source "drivers/pci/hotplug/Kconfig"
@@ -352,11 +446,6 @@ endmenu
menu "Executable file formats"
-# only elf supported
-config KCORE_ELF
- def_bool y
- depends on PROC_FS
-
source "fs/Kconfig.binfmt"
endmenu
diff --git a/arch/tile/Kconfig.debug b/arch/tile/Kconfig.debug
index ddbfc3322d7..19734d3ab1e 100644
--- a/arch/tile/Kconfig.debug
+++ b/arch/tile/Kconfig.debug
@@ -14,21 +14,12 @@ config EARLY_PRINTK
with klogd/syslogd. You should normally N here,
unless you want to debug such a crash.
-config DEBUG_STACKOVERFLOW
- bool "Check for stack overflows"
- depends on DEBUG_KERNEL
+config TILE_HVGLUE_TRACE
+ bool "Provide wrapper functions for hypervisor ABI calls"
+ default n
help
- This option will cause messages to be printed if free stack space
- drops below a certain limit.
-
-config DEBUG_EXTRA_FLAGS
- string "Additional compiler arguments when building with '-g'"
- depends on DEBUG_INFO
- default ""
- help
- Debug info can be large, and flags like
- `-femit-struct-debug-baseonly' can reduce the kernel file
- size and build time noticeably. Such flags are often
- helpful if the main use of debug info is line number info.
+ Provide wrapper functions for the hypervisor ABI calls
+ defined in arch/tile/kernel/hvglue.S. This allows tracing
+ mechanisms, etc., to have visibility into those calls.
endmenu
diff --git a/arch/tile/Makefile b/arch/tile/Makefile
index 17acce70569..4dc380a519d 100644
--- a/arch/tile/Makefile
+++ b/arch/tile/Makefile
@@ -26,14 +26,20 @@ $(error Set TILERA_ROOT or CROSS_COMPILE when building $(ARCH) on $(HOST_ARCH))
endif
endif
-ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"")
-KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS)
-endif
+# The tile compiler may emit .eh_frame information for backtracing.
+# In kernel modules, this causes load failures due to unsupported relocations.
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
-LIBGCC_PATH := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
+LIBGCC_PATH := \
+ $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
# Provide the path to use for "make defconfig".
-KBUILD_DEFCONFIG := $(ARCH)_defconfig
+# We default to the newer TILE-Gx architecture if only "tile" is given.
+ifeq ($(ARCH),tile)
+ KBUILD_DEFCONFIG := tilegx_defconfig
+else
+ KBUILD_DEFCONFIG := $(ARCH)_defconfig
+endif
# Used as a file extension when useful, e.g. head_$(BITS).o
# Not needed for (e.g.) "$(CC) -m32" since the compiler automatically
@@ -53,7 +59,7 @@ libs-y += $(LIBGCC_PATH)
# See arch/tile/Kbuild for content of core part of the kernel
core-y += arch/tile/
-core-$(CONFIG_KVM) += arch/tile/kvm/
+core-$(CONFIG_TILE_GXIO) += arch/tile/gxio/
ifdef TILERA_ROOT
INSTALL_PATH ?= $(TILERA_ROOT)/tile/boot
diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig
index b8d99aca543..730e40d9cf6 100644
--- a/arch/tile/configs/tilegx_defconfig
+++ b/arch/tile/configs/tilegx_defconfig
@@ -1,16 +1,15 @@
CONFIG_TILEGX=y
-CONFIG_EXPERIMENTAL=y
-# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_FHANDLE=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_AUDIT=y
CONFIG_LOG_BUF_SHIFT=19
CONFIG_CGROUPS=y
CONFIG_CGROUP_DEBUG=y
@@ -18,18 +17,18 @@ CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
-CONFIG_CGROUP_MEM_RES_CTLR=y
-CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y
CONFIG_CGROUP_SCHED=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_BLK_CGROUP=y
CONFIG_NAMESPACES=y
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_RD_XZ=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_EMBEDDED=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
+CONFIG_KPROBES=y
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
@@ -45,12 +44,12 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_SGI_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_NR_CPUS=100
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_HZ_100=y
+# CONFIG_COMPACTION is not set
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_TILE_PCI_IO=y
CONFIG_PCI_DEBUG=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=y
@@ -108,151 +107,9 @@ CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_PIMSM_V2=y
CONFIG_NETLABEL=y
-CONFIG_NETFILTER=y
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_ZONES=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_DCCP=m
-CONFIG_NF_CT_PROTO_UDPLITE=m
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NETFILTER_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
-CONFIG_NETFILTER_XT_TARGET_TEE=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_IPVS=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_IPV6=y
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_PROTO_SCTP=y
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-CONFIG_NF_CONNTRACK_IPV4=m
-# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
-CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=y
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=y
-CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_QUEUE=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_IP6=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_ULOG=m
-CONFIG_BRIDGE_EBT_NFLOG=m
CONFIG_RDS=m
CONFIG_RDS_TCP=m
CONFIG_BRIDGE=m
-CONFIG_NET_DSA=y
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_PHONET=m
@@ -293,13 +150,13 @@ CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_CLS_IND=y
CONFIG_DCB=y
+CONFIG_DNS_RESOLVER=y
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
@@ -318,10 +175,12 @@ CONFIG_BLK_DEV_SD=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SAS_ATA=y
+CONFIG_ISCSI_TCP=m
CONFIG_SCSI_MVSAS=y
# CONFIG_SCSI_MVSAS_DEBUG is not set
CONFIG_SCSI_MVSAS_TASKLET=y
CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
CONFIG_SATA_SIL24=y
# CONFIG_ATA_SFF is not set
CONFIG_MD=y
@@ -331,7 +190,6 @@ CONFIG_MD_RAID0=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID10=m
CONFIG_MD_RAID456=m
-CONFIG_MULTICORE_RAID456=y
CONFIG_MD_FAULTY=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_DEBUG=y
@@ -345,6 +203,12 @@ CONFIG_DM_MULTIPATH_QL=m
CONFIG_DM_MULTIPATH_ST=m
CONFIG_DM_DELAY=m
CONFIG_DM_UEVENT=y
+CONFIG_TARGET_CORE=m
+CONFIG_TCM_IBLOCK=m
+CONFIG_TCM_FILEIO=m
+CONFIG_TCM_PSCSI=m
+CONFIG_LOOPBACK_TARGET=m
+CONFIG_ISCSI_TARGET=m
CONFIG_FUSION=y
CONFIG_FUSION_SAS=y
CONFIG_NETDEVICES=y
@@ -361,42 +225,8 @@ CONFIG_VETH=m
CONFIG_NET_DSA_MV88E6060=y
CONFIG_NET_DSA_MV88E6131=y
CONFIG_NET_DSA_MV88E6123_61_65=y
-# CONFIG_NET_VENDOR_3COM is not set
-# CONFIG_NET_VENDOR_ADAPTEC is not set
-# CONFIG_NET_VENDOR_ALTEON is not set
-# CONFIG_NET_VENDOR_AMD is not set
-# CONFIG_NET_VENDOR_ATHEROS is not set
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
-# CONFIG_NET_VENDOR_CHELSIO is not set
-# CONFIG_NET_VENDOR_CISCO is not set
-# CONFIG_NET_VENDOR_DEC is not set
-# CONFIG_NET_VENDOR_DLINK is not set
-# CONFIG_NET_VENDOR_EMULEX is not set
-# CONFIG_NET_VENDOR_EXAR is not set
-# CONFIG_NET_VENDOR_HP is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MELLANOX is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_MYRI is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_NVIDIA is not set
-# CONFIG_NET_VENDOR_OKI is not set
-# CONFIG_NET_PACKET_ENGINE is not set
-# CONFIG_NET_VENDOR_QLOGIC is not set
-# CONFIG_NET_VENDOR_REALTEK is not set
-# CONFIG_NET_VENDOR_RDC is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SILAN is not set
-# CONFIG_NET_VENDOR_SIS is not set
-# CONFIG_NET_VENDOR_SMSC is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SUN is not set
-# CONFIG_NET_VENDOR_TEHUTI is not set
-# CONFIG_NET_VENDOR_TI is not set
-# CONFIG_TILE_NET is not set
-# CONFIG_NET_VENDOR_VIA is not set
+CONFIG_SKY2=y
+CONFIG_PTP_1588_CLOCK_TILEGX=y
# CONFIG_WLAN is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
@@ -404,6 +234,7 @@ CONFIG_NET_DSA_MV88E6123_61_65=y
# CONFIG_SERIO is not set
# CONFIG_VT is not set
# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_TILEGX=y
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_TIMERIOMEM=m
CONFIG_I2C=y
@@ -412,13 +243,16 @@ CONFIG_I2C_CHARDEV=y
CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
# CONFIG_VGA_ARB is not set
-# CONFIG_HID_SUPPORT is not set
+CONFIG_DRM=m
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_MGA=m
+CONFIG_DRM_VIA=m
+CONFIG_DRM_SAVAGE=m
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_RTC_CLASS=y
@@ -466,9 +300,8 @@ CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
CONFIG_NFS_FS=m
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
CONFIG_NFS_V4_1=y
CONFIG_NFS_FSCACHE=y
CONFIG_NFSD=m
@@ -521,25 +354,28 @@ CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
CONFIG_DLM_DEBUG=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_REDUCED=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_MAGIC_SYSRQ=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_DEBUG_FS=y
CONFIG_HEADERS_CHECK=y
+# CONFIG_FRAME_POINTER is not set
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_REDUCED=y
-CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_CREDENTIALS=y
-CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-CONFIG_DYNAMIC_DEBUG=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_ASYNC_RAID6_TEST=m
-CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_KGDB=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
@@ -548,7 +384,6 @@ CONFIG_SECURITY_NETWORK_XFRM=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
@@ -561,14 +396,12 @@ CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32C=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=m
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig
index 2b1fd31894f..80fc32ed049 100644
--- a/arch/tile/configs/tilepro_defconfig
+++ b/arch/tile/configs/tilepro_defconfig
@@ -1,15 +1,14 @@
-CONFIG_EXPERIMENTAL=y
-# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_FHANDLE=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_AUDIT=y
CONFIG_LOG_BUF_SHIFT=19
CONFIG_CGROUPS=y
CONFIG_CGROUP_DEBUG=y
@@ -17,14 +16,13 @@ CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
-CONFIG_CGROUP_MEM_RES_CTLR=y
-CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y
CONFIG_CGROUP_SCHED=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_BLK_CGROUP=y
CONFIG_NAMESPACES=y
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_RD_XZ=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_EMBEDDED=y
# CONFIG_COMPAT_BRK is not set
@@ -44,11 +42,10 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_SGI_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_CFQ_GROUP_IOSCHED=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_HZ_100=y
+# CONFIG_COMPACTION is not set
+CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_PCI_DEBUG=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=y
@@ -122,11 +119,9 @@ CONFIG_NF_CONNTRACK_PPTP=m
CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NETFILTER_TPROXY=m
CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
CONFIG_NETFILTER_XT_TARGET_DSCP=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
@@ -190,14 +185,12 @@ CONFIG_IP_VS_SED=m
CONFIG_IP_VS_NQ=m
CONFIG_NF_CONNTRACK_IPV4=m
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
-CONFIG_IP_NF_QUEUE=m
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_TTL=m
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_LOG=m
CONFIG_IP_NF_TARGET_ULOG=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_ECN=m
@@ -208,8 +201,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_QUEUE=m
-CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
CONFIG_IP6_NF_MATCH_FRAG=m
@@ -219,7 +210,6 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
CONFIG_IP6_NF_MATCH_MH=m
CONFIG_IP6_NF_MATCH_RT=m
CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP6_NF_MANGLE=m
@@ -250,7 +240,6 @@ CONFIG_BRIDGE_EBT_NFLOG=m
CONFIG_RDS=m
CONFIG_RDS_TCP=m
CONFIG_BRIDGE=m
-CONFIG_NET_DSA=y
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_PHONET=m
@@ -298,6 +287,7 @@ CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_CLS_IND=y
CONFIG_DCB=y
+CONFIG_DNS_RESOLVER=y
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
@@ -325,7 +315,6 @@ CONFIG_MD_RAID0=m
CONFIG_MD_RAID1=m
CONFIG_MD_RAID10=m
CONFIG_MD_RAID456=m
-CONFIG_MULTICORE_RAID456=y
CONFIG_MD_FAULTY=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_DEBUG=y
@@ -356,40 +345,7 @@ CONFIG_NET_DSA_MV88E6060=y
CONFIG_NET_DSA_MV88E6131=y
CONFIG_NET_DSA_MV88E6123_61_65=y
# CONFIG_NET_VENDOR_3COM is not set
-# CONFIG_NET_VENDOR_ADAPTEC is not set
-# CONFIG_NET_VENDOR_ALTEON is not set
-# CONFIG_NET_VENDOR_AMD is not set
-# CONFIG_NET_VENDOR_ATHEROS is not set
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
-# CONFIG_NET_VENDOR_CHELSIO is not set
-# CONFIG_NET_VENDOR_CISCO is not set
-# CONFIG_NET_VENDOR_DEC is not set
-# CONFIG_NET_VENDOR_DLINK is not set
-# CONFIG_NET_VENDOR_EMULEX is not set
-# CONFIG_NET_VENDOR_EXAR is not set
-# CONFIG_NET_VENDOR_HP is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MELLANOX is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_MYRI is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_NVIDIA is not set
-# CONFIG_NET_VENDOR_OKI is not set
-# CONFIG_NET_PACKET_ENGINE is not set
-# CONFIG_NET_VENDOR_QLOGIC is not set
-# CONFIG_NET_VENDOR_REALTEK is not set
-# CONFIG_NET_VENDOR_RDC is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SILAN is not set
-# CONFIG_NET_VENDOR_SIS is not set
-# CONFIG_NET_VENDOR_SMSC is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SUN is not set
-# CONFIG_NET_VENDOR_TEHUTI is not set
-# CONFIG_NET_VENDOR_TI is not set
-# CONFIG_NET_VENDOR_VIA is not set
+CONFIG_E1000E=y
# CONFIG_WLAN is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
@@ -405,7 +361,6 @@ CONFIG_I2C_CHARDEV=y
CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
# CONFIG_VGA_ARB is not set
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
@@ -450,13 +405,13 @@ CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
CONFIG_NFS_FS=m
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
CONFIG_NFS_V4_1=y
CONFIG_NFS_FSCACHE=y
CONFIG_NFSD=m
@@ -510,26 +465,29 @@ CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
CONFIG_DLM_DEBUG=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_REDUCED=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_FRAME_WARN=2048
-CONFIG_MAGIC_SYSRQ=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_DEBUG_FS=y
CONFIG_HEADERS_CHECK=y
+# CONFIG_FRAME_POINTER is not set
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_REDUCED=y
-CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_CREDENTIALS=y
-CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-CONFIG_DYNAMIC_DEBUG=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_ASYNC_RAID6_TEST=m
-CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
@@ -538,7 +496,6 @@ CONFIG_SECURITY_NETWORK_XFRM=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
@@ -551,14 +508,12 @@ CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32C=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=m
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
diff --git a/arch/tile/gxio/Kconfig b/arch/tile/gxio/Kconfig
new file mode 100644
index 00000000000..d4e10d58071
--- /dev/null
+++ b/arch/tile/gxio/Kconfig
@@ -0,0 +1,33 @@
+# Support direct access to TILE-Gx hardware from user space, via the
+# gxio library, or from kernel space, via kernel IORPC support.
+config TILE_GXIO
+ bool
+ depends on TILEGX
+
+# Support direct access to the common I/O DMA facility within the
+# TILE-Gx mPIPE and Trio hardware from kernel space.
+config TILE_GXIO_DMA
+ bool
+ select TILE_GXIO
+
+# Support direct access to the TILE-Gx mPIPE hardware from kernel space.
+config TILE_GXIO_MPIPE
+ bool
+ select TILE_GXIO
+ select TILE_GXIO_DMA
+
+# Support direct access to the TILE-Gx TRIO hardware from kernel space.
+config TILE_GXIO_TRIO
+ bool
+ select TILE_GXIO
+ select TILE_GXIO_DMA
+
+# Support direct access to the TILE-Gx USB hardware from kernel space.
+config TILE_GXIO_USB_HOST
+ bool
+ select TILE_GXIO
+
+# Support direct access to the TILE-Gx UART hardware from kernel space.
+config TILE_GXIO_UART
+ bool
+ select TILE_GXIO
diff --git a/arch/tile/gxio/Makefile b/arch/tile/gxio/Makefile
new file mode 100644
index 00000000000..26ae2c72746
--- /dev/null
+++ b/arch/tile/gxio/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the Tile-Gx device access support.
+#
+
+obj-$(CONFIG_TILE_GXIO) += iorpc_globals.o kiorpc.o
+obj-$(CONFIG_TILE_GXIO_DMA) += dma_queue.o
+obj-$(CONFIG_TILE_GXIO_MPIPE) += mpipe.o iorpc_mpipe.o iorpc_mpipe_info.o
+obj-$(CONFIG_TILE_GXIO_TRIO) += trio.o iorpc_trio.o
+obj-$(CONFIG_TILE_GXIO_UART) += uart.o iorpc_uart.o
+obj-$(CONFIG_TILE_GXIO_USB_HOST) += usb_host.o iorpc_usb_host.o
diff --git a/arch/tile/gxio/dma_queue.c b/arch/tile/gxio/dma_queue.c
new file mode 100644
index 00000000000..baa60357f8b
--- /dev/null
+++ b/arch/tile/gxio/dma_queue.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/io.h>
+#include <linux/atomic.h>
+#include <linux/module.h>
+#include <gxio/dma_queue.h>
+
+/* Wait for a memory read to complete. */
+#define wait_for_value(val) \
+ __asm__ __volatile__("move %0, %0" :: "r"(val))
+
+/* The index is in the low 16. */
+#define DMA_QUEUE_INDEX_MASK ((1 << 16) - 1)
+
+/*
+ * The hardware descriptor-ring type.
+ * This matches the types used by mpipe (MPIPE_EDMA_POST_REGION_VAL_t)
+ * and trio (TRIO_PUSH_DMA_REGION_VAL_t or TRIO_PULL_DMA_REGION_VAL_t).
+ * See those types for more documentation on the individual fields.
+ */
+typedef union {
+ struct {
+#ifndef __BIG_ENDIAN__
+ uint64_t ring_idx:16;
+ uint64_t count:16;
+ uint64_t gen:1;
+ uint64_t __reserved:31;
+#else
+ uint64_t __reserved:31;
+ uint64_t gen:1;
+ uint64_t count:16;
+ uint64_t ring_idx:16;
+#endif
+ };
+ uint64_t word;
+} __gxio_ring_t;
+
+void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue,
+ void *post_region_addr, unsigned int num_entries)
+{
+ /*
+ * Limit 65536 entry rings to 65535 credits because we only have a
+ * 16 bit completion counter.
+ */
+ int64_t credits = (num_entries < 65536) ? num_entries : 65535;
+
+ memset(dma_queue, 0, sizeof(*dma_queue));
+
+ dma_queue->post_region_addr = post_region_addr;
+ dma_queue->hw_complete_count = 0;
+ dma_queue->credits_and_next_index = credits << DMA_QUEUE_CREDIT_SHIFT;
+}
+
+EXPORT_SYMBOL_GPL(__gxio_dma_queue_init);
+
+void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue)
+{
+ __gxio_ring_t val;
+ uint64_t count;
+ uint64_t delta;
+ uint64_t new_count;
+
+ /*
+ * Read the 64-bit completion count without touching the cache, so
+ * we later avoid having to evict any sharers of this cache line
+ * when we update it below.
+ */
+ uint64_t orig_hw_complete_count =
+ cmpxchg(&dma_queue->hw_complete_count,
+ -1, -1);
+
+ /* Make sure the load completes before we access the hardware. */
+ wait_for_value(orig_hw_complete_count);
+
+ /* Read the 16-bit count of how many packets it has completed. */
+ val.word = __gxio_mmio_read(dma_queue->post_region_addr);
+ count = val.count;
+
+ /*
+ * Calculate the number of completions since we last updated the
+ * 64-bit counter. It's safe to ignore the high bits because the
+ * maximum credit value is 65535.
+ */
+ delta = (count - orig_hw_complete_count) & 0xffff;
+ if (delta == 0)
+ return;
+
+ /*
+ * Try to write back the count, advanced by delta. If we race with
+ * another thread, this might fail, in which case we return
+ * immediately on the assumption that some credits are (or at least
+ * were) available.
+ */
+ new_count = orig_hw_complete_count + delta;
+ if (cmpxchg(&dma_queue->hw_complete_count,
+ orig_hw_complete_count,
+ new_count) != orig_hw_complete_count)
+ return;
+
+ /*
+ * We succeeded in advancing the completion count; add back the
+ * corresponding number of egress credits.
+ */
+ __insn_fetchadd(&dma_queue->credits_and_next_index,
+ (delta << DMA_QUEUE_CREDIT_SHIFT));
+}
+
+EXPORT_SYMBOL_GPL(__gxio_dma_queue_update_credits);
+
+/*
+ * A separate 'blocked' method for put() so that backtraces and
+ * profiles will clearly indicate that we're wasting time spinning on
+ * egress availability rather than actually posting commands.
+ */
+int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue,
+ int64_t modifier)
+{
+ int backoff = 16;
+ int64_t old;
+
+ do {
+ int i;
+ /* Back off to avoid spamming memory networks. */
+ for (i = backoff; i > 0; i--)
+ __insn_mfspr(SPR_PASS);
+
+ /* Check credits again. */
+ __gxio_dma_queue_update_credits(dma_queue);
+ old = __insn_fetchaddgez(&dma_queue->credits_and_next_index,
+ modifier);
+
+ /* Calculate bounded exponential backoff for next iteration. */
+ if (backoff < 256)
+ backoff *= 2;
+ } while (old + modifier < 0);
+
+ return old;
+}
+
+EXPORT_SYMBOL_GPL(__gxio_dma_queue_wait_for_credits);
+
+int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue,
+ unsigned int num, int wait)
+{
+ return __gxio_dma_queue_reserve(dma_queue, num, wait != 0, true);
+}
+
+EXPORT_SYMBOL_GPL(__gxio_dma_queue_reserve_aux);
+
+int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue,
+ int64_t completion_slot, int update)
+{
+ if (update) {
+ if (ACCESS_ONCE(dma_queue->hw_complete_count) >
+ completion_slot)
+ return 1;
+
+ __gxio_dma_queue_update_credits(dma_queue);
+ }
+
+ return ACCESS_ONCE(dma_queue->hw_complete_count) > completion_slot;
+}
+
+EXPORT_SYMBOL_GPL(__gxio_dma_queue_is_complete);
diff --git a/arch/tile/gxio/iorpc_globals.c b/arch/tile/gxio/iorpc_globals.c
new file mode 100644
index 00000000000..e178e90805a
--- /dev/null
+++ b/arch/tile/gxio/iorpc_globals.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#include "gxio/iorpc_globals.h"
+
+struct arm_pollfd_param {
+ union iorpc_pollfd pollfd;
+};
+
+int __iorpc_arm_pollfd(int fd, int pollfd_cookie)
+{
+ struct arm_pollfd_param temp;
+ struct arm_pollfd_param *params = &temp;
+
+ params->pollfd.kernel.cookie = pollfd_cookie;
+
+ return hv_dev_pwrite(fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ IORPC_OP_ARM_POLLFD);
+}
+
+EXPORT_SYMBOL(__iorpc_arm_pollfd);
+
+struct close_pollfd_param {
+ union iorpc_pollfd pollfd;
+};
+
+int __iorpc_close_pollfd(int fd, int pollfd_cookie)
+{
+ struct close_pollfd_param temp;
+ struct close_pollfd_param *params = &temp;
+
+ params->pollfd.kernel.cookie = pollfd_cookie;
+
+ return hv_dev_pwrite(fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ IORPC_OP_CLOSE_POLLFD);
+}
+
+EXPORT_SYMBOL(__iorpc_close_pollfd);
+
+struct get_mmio_base_param {
+ HV_PTE base;
+};
+
+int __iorpc_get_mmio_base(int fd, HV_PTE *base)
+{
+ int __result;
+ struct get_mmio_base_param temp;
+ struct get_mmio_base_param *params = &temp;
+
+ __result =
+ hv_dev_pread(fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ IORPC_OP_GET_MMIO_BASE);
+ *base = params->base;
+
+ return __result;
+}
+
+EXPORT_SYMBOL(__iorpc_get_mmio_base);
+
+struct check_mmio_offset_param {
+ unsigned long offset;
+ unsigned long size;
+};
+
+int __iorpc_check_mmio_offset(int fd, unsigned long offset, unsigned long size)
+{
+ struct check_mmio_offset_param temp;
+ struct check_mmio_offset_param *params = &temp;
+
+ params->offset = offset;
+ params->size = size;
+
+ return hv_dev_pwrite(fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ IORPC_OP_CHECK_MMIO_OFFSET);
+}
+
+EXPORT_SYMBOL(__iorpc_check_mmio_offset);
diff --git a/arch/tile/gxio/iorpc_mpipe.c b/arch/tile/gxio/iorpc_mpipe.c
new file mode 100644
index 00000000000..e19325c4c43
--- /dev/null
+++ b/arch/tile/gxio/iorpc_mpipe.c
@@ -0,0 +1,593 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#include "gxio/iorpc_mpipe.h"
+
+struct alloc_buffer_stacks_param {
+ unsigned int count;
+ unsigned int first;
+ unsigned int flags;
+};
+
+int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags)
+{
+ struct alloc_buffer_stacks_param temp;
+ struct alloc_buffer_stacks_param *params = &temp;
+
+ params->count = count;
+ params->first = first;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_OP_ALLOC_BUFFER_STACKS);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_alloc_buffer_stacks);
+
+struct init_buffer_stack_aux_param {
+ union iorpc_mem_buffer buffer;
+ unsigned int stack;
+ unsigned int buffer_size_enum;
+};
+
+int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context,
+ void *mem_va, size_t mem_size,
+ unsigned int mem_flags, unsigned int stack,
+ unsigned int buffer_size_enum)
+{
+ int __result;
+ unsigned long long __cpa;
+ pte_t __pte;
+ struct init_buffer_stack_aux_param temp;
+ struct init_buffer_stack_aux_param *params = &temp;
+
+ __result = va_to_cpa_and_pte(mem_va, &__cpa, &__pte);
+ if (__result != 0)
+ return __result;
+ params->buffer.kernel.cpa = __cpa;
+ params->buffer.kernel.size = mem_size;
+ params->buffer.kernel.pte = __pte;
+ params->buffer.kernel.flags = mem_flags;
+ params->stack = stack;
+ params->buffer_size_enum = buffer_size_enum;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_OP_INIT_BUFFER_STACK_AUX);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_init_buffer_stack_aux);
+
+
+struct alloc_notif_rings_param {
+ unsigned int count;
+ unsigned int first;
+ unsigned int flags;
+};
+
+int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags)
+{
+ struct alloc_notif_rings_param temp;
+ struct alloc_notif_rings_param *params = &temp;
+
+ params->count = count;
+ params->first = first;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_ALLOC_NOTIF_RINGS);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_alloc_notif_rings);
+
+struct init_notif_ring_aux_param {
+ union iorpc_mem_buffer buffer;
+ unsigned int ring;
+};
+
+int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
+ size_t mem_size, unsigned int mem_flags,
+ unsigned int ring)
+{
+ int __result;
+ unsigned long long __cpa;
+ pte_t __pte;
+ struct init_notif_ring_aux_param temp;
+ struct init_notif_ring_aux_param *params = &temp;
+
+ __result = va_to_cpa_and_pte(mem_va, &__cpa, &__pte);
+ if (__result != 0)
+ return __result;
+ params->buffer.kernel.cpa = __cpa;
+ params->buffer.kernel.size = mem_size;
+ params->buffer.kernel.pte = __pte;
+ params->buffer.kernel.flags = mem_flags;
+ params->ring = ring;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_OP_INIT_NOTIF_RING_AUX);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_init_notif_ring_aux);
+
+struct request_notif_ring_interrupt_param {
+ union iorpc_interrupt interrupt;
+ unsigned int ring;
+};
+
+int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context,
+ int inter_x, int inter_y,
+ int inter_ipi, int inter_event,
+ unsigned int ring)
+{
+ struct request_notif_ring_interrupt_param temp;
+ struct request_notif_ring_interrupt_param *params = &temp;
+
+ params->interrupt.kernel.x = inter_x;
+ params->interrupt.kernel.y = inter_y;
+ params->interrupt.kernel.ipi = inter_ipi;
+ params->interrupt.kernel.event = inter_event;
+ params->ring = ring;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_OP_REQUEST_NOTIF_RING_INTERRUPT);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_request_notif_ring_interrupt);
+
+struct enable_notif_ring_interrupt_param {
+ unsigned int ring;
+};
+
+int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context,
+ unsigned int ring)
+{
+ struct enable_notif_ring_interrupt_param temp;
+ struct enable_notif_ring_interrupt_param *params = &temp;
+
+ params->ring = ring;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_OP_ENABLE_NOTIF_RING_INTERRUPT);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_enable_notif_ring_interrupt);
+
+struct alloc_notif_groups_param {
+ unsigned int count;
+ unsigned int first;
+ unsigned int flags;
+};
+
+int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags)
+{
+ struct alloc_notif_groups_param temp;
+ struct alloc_notif_groups_param *params = &temp;
+
+ params->count = count;
+ params->first = first;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_ALLOC_NOTIF_GROUPS);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_alloc_notif_groups);
+
+struct init_notif_group_param {
+ unsigned int group;
+ gxio_mpipe_notif_group_bits_t bits;
+};
+
+int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context,
+ unsigned int group,
+ gxio_mpipe_notif_group_bits_t bits)
+{
+ struct init_notif_group_param temp;
+ struct init_notif_group_param *params = &temp;
+
+ params->group = group;
+ params->bits = bits;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_INIT_NOTIF_GROUP);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_init_notif_group);
+
+struct alloc_buckets_param {
+ unsigned int count;
+ unsigned int first;
+ unsigned int flags;
+};
+
+int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count,
+ unsigned int first, unsigned int flags)
+{
+ struct alloc_buckets_param temp;
+ struct alloc_buckets_param *params = &temp;
+
+ params->count = count;
+ params->first = first;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_ALLOC_BUCKETS);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_alloc_buckets);
+
+struct init_bucket_param {
+ unsigned int bucket;
+ MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info;
+};
+
+int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket,
+ MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info)
+{
+ struct init_bucket_param temp;
+ struct init_bucket_param *params = &temp;
+
+ params->bucket = bucket;
+ params->bucket_info = bucket_info;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_INIT_BUCKET);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_init_bucket);
+
+struct alloc_edma_rings_param {
+ unsigned int count;
+ unsigned int first;
+ unsigned int flags;
+};
+
+int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags)
+{
+ struct alloc_edma_rings_param temp;
+ struct alloc_edma_rings_param *params = &temp;
+
+ params->count = count;
+ params->first = first;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_ALLOC_EDMA_RINGS);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_alloc_edma_rings);
+
+struct init_edma_ring_aux_param {
+ union iorpc_mem_buffer buffer;
+ unsigned int ring;
+ unsigned int channel;
+};
+
+int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
+ size_t mem_size, unsigned int mem_flags,
+ unsigned int ring, unsigned int channel)
+{
+ int __result;
+ unsigned long long __cpa;
+ pte_t __pte;
+ struct init_edma_ring_aux_param temp;
+ struct init_edma_ring_aux_param *params = &temp;
+
+ __result = va_to_cpa_and_pte(mem_va, &__cpa, &__pte);
+ if (__result != 0)
+ return __result;
+ params->buffer.kernel.cpa = __cpa;
+ params->buffer.kernel.size = mem_size;
+ params->buffer.kernel.pte = __pte;
+ params->buffer.kernel.flags = mem_flags;
+ params->ring = ring;
+ params->channel = channel;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_INIT_EDMA_RING_AUX);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_init_edma_ring_aux);
+
+
+int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob,
+ size_t blob_size)
+{
+ const void *params = blob;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, blob_size,
+ GXIO_MPIPE_OP_COMMIT_RULES);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_commit_rules);
+
+struct register_client_memory_param {
+ unsigned int iotlb;
+ HV_PTE pte;
+ unsigned int flags;
+};
+
+int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context,
+ unsigned int iotlb, HV_PTE pte,
+ unsigned int flags)
+{
+ struct register_client_memory_param temp;
+ struct register_client_memory_param *params = &temp;
+
+ params->iotlb = iotlb;
+ params->pte = pte;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_OP_REGISTER_CLIENT_MEMORY);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_register_client_memory);
+
+struct link_open_aux_param {
+ _gxio_mpipe_link_name_t name;
+ unsigned int flags;
+};
+
+int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context,
+ _gxio_mpipe_link_name_t name, unsigned int flags)
+{
+ struct link_open_aux_param temp;
+ struct link_open_aux_param *params = &temp;
+
+ params->name = name;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_LINK_OPEN_AUX);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_link_open_aux);
+
+struct link_close_aux_param {
+ int mac;
+};
+
+int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac)
+{
+ struct link_close_aux_param temp;
+ struct link_close_aux_param *params = &temp;
+
+ params->mac = mac;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_LINK_CLOSE_AUX);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_link_close_aux);
+
+struct link_set_attr_aux_param {
+ int mac;
+ uint32_t attr;
+ int64_t val;
+};
+
+int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac,
+ uint32_t attr, int64_t val)
+{
+ struct link_set_attr_aux_param temp;
+ struct link_set_attr_aux_param *params = &temp;
+
+ params->mac = mac;
+ params->attr = attr;
+ params->val = val;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_LINK_SET_ATTR_AUX);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_link_set_attr_aux);
+
+struct get_timestamp_aux_param {
+ uint64_t sec;
+ uint64_t nsec;
+ uint64_t cycles;
+};
+
+int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec,
+ uint64_t *nsec, uint64_t *cycles)
+{
+ int __result;
+ struct get_timestamp_aux_param temp;
+ struct get_timestamp_aux_param *params = &temp;
+
+ __result =
+ hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ GXIO_MPIPE_OP_GET_TIMESTAMP_AUX);
+ *sec = params->sec;
+ *nsec = params->nsec;
+ *cycles = params->cycles;
+
+ return __result;
+}
+
+EXPORT_SYMBOL(gxio_mpipe_get_timestamp_aux);
+
+struct set_timestamp_aux_param {
+ uint64_t sec;
+ uint64_t nsec;
+ uint64_t cycles;
+};
+
+int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec,
+ uint64_t nsec, uint64_t cycles)
+{
+ struct set_timestamp_aux_param temp;
+ struct set_timestamp_aux_param *params = &temp;
+
+ params->sec = sec;
+ params->nsec = nsec;
+ params->cycles = cycles;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_SET_TIMESTAMP_AUX);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_set_timestamp_aux);
+
+struct adjust_timestamp_aux_param {
+ int64_t nsec;
+};
+
+int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec)
+{
+ struct adjust_timestamp_aux_param temp;
+ struct adjust_timestamp_aux_param *params = &temp;
+
+ params->nsec = nsec;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_OP_ADJUST_TIMESTAMP_AUX);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_aux);
+
+struct config_edma_ring_blks_param {
+ unsigned int ering;
+ unsigned int max_blks;
+ unsigned int min_snf_blks;
+ unsigned int db;
+};
+
+int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context,
+ unsigned int ering, unsigned int max_blks,
+ unsigned int min_snf_blks, unsigned int db)
+{
+ struct config_edma_ring_blks_param temp;
+ struct config_edma_ring_blks_param *params = &temp;
+
+ params->ering = ering;
+ params->max_blks = max_blks;
+ params->min_snf_blks = min_snf_blks;
+ params->db = db;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_OP_CONFIG_EDMA_RING_BLKS);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_config_edma_ring_blks);
+
+struct adjust_timestamp_freq_param {
+ int32_t ppb;
+};
+
+int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb)
+{
+ struct adjust_timestamp_freq_param temp;
+ struct adjust_timestamp_freq_param *params = &temp;
+
+ params->ppb = ppb;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq);
+
+struct arm_pollfd_param {
+ union iorpc_pollfd pollfd;
+};
+
+int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie)
+{
+ struct arm_pollfd_param temp;
+ struct arm_pollfd_param *params = &temp;
+
+ params->pollfd.kernel.cookie = pollfd_cookie;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_ARM_POLLFD);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_arm_pollfd);
+
+struct close_pollfd_param {
+ union iorpc_pollfd pollfd;
+};
+
+int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie)
+{
+ struct close_pollfd_param temp;
+ struct close_pollfd_param *params = &temp;
+
+ params->pollfd.kernel.cookie = pollfd_cookie;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_CLOSE_POLLFD);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_close_pollfd);
+
+struct get_mmio_base_param {
+ HV_PTE base;
+};
+
+int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base)
+{
+ int __result;
+ struct get_mmio_base_param temp;
+ struct get_mmio_base_param *params = &temp;
+
+ __result =
+ hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ GXIO_MPIPE_OP_GET_MMIO_BASE);
+ *base = params->base;
+
+ return __result;
+}
+
+EXPORT_SYMBOL(gxio_mpipe_get_mmio_base);
+
+struct check_mmio_offset_param {
+ unsigned long offset;
+ unsigned long size;
+};
+
+int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context,
+ unsigned long offset, unsigned long size)
+{
+ struct check_mmio_offset_param temp;
+ struct check_mmio_offset_param *params = &temp;
+
+ params->offset = offset;
+ params->size = size;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_OP_CHECK_MMIO_OFFSET);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_check_mmio_offset);
diff --git a/arch/tile/gxio/iorpc_mpipe_info.c b/arch/tile/gxio/iorpc_mpipe_info.c
new file mode 100644
index 00000000000..77019c6e9b4
--- /dev/null
+++ b/arch/tile/gxio/iorpc_mpipe_info.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#include "gxio/iorpc_mpipe_info.h"
+
+struct instance_aux_param {
+ _gxio_mpipe_link_name_t name;
+};
+
+int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context,
+ _gxio_mpipe_link_name_t name)
+{
+ struct instance_aux_param temp;
+ struct instance_aux_param *params = &temp;
+
+ params->name = name;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_MPIPE_INFO_OP_INSTANCE_AUX);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_info_instance_aux);
+
+struct enumerate_aux_param {
+ _gxio_mpipe_link_name_t name;
+ _gxio_mpipe_link_mac_t mac;
+};
+
+int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context,
+ unsigned int idx,
+ _gxio_mpipe_link_name_t *name,
+ _gxio_mpipe_link_mac_t *mac)
+{
+ int __result;
+ struct enumerate_aux_param temp;
+ struct enumerate_aux_param *params = &temp;
+
+ __result =
+ hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ (((uint64_t)idx << 32) |
+ GXIO_MPIPE_INFO_OP_ENUMERATE_AUX));
+ *name = params->name;
+ *mac = params->mac;
+
+ return __result;
+}
+
+EXPORT_SYMBOL(gxio_mpipe_info_enumerate_aux);
+
+struct get_mmio_base_param {
+ HV_PTE base;
+};
+
+int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context,
+ HV_PTE *base)
+{
+ int __result;
+ struct get_mmio_base_param temp;
+ struct get_mmio_base_param *params = &temp;
+
+ __result =
+ hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ GXIO_MPIPE_INFO_OP_GET_MMIO_BASE);
+ *base = params->base;
+
+ return __result;
+}
+
+EXPORT_SYMBOL(gxio_mpipe_info_get_mmio_base);
+
+struct check_mmio_offset_param {
+ unsigned long offset;
+ unsigned long size;
+};
+
+int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context,
+ unsigned long offset, unsigned long size)
+{
+ struct check_mmio_offset_param temp;
+ struct check_mmio_offset_param *params = &temp;
+
+ params->offset = offset;
+ params->size = size;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_MPIPE_INFO_OP_CHECK_MMIO_OFFSET);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_info_check_mmio_offset);
diff --git a/arch/tile/gxio/iorpc_trio.c b/arch/tile/gxio/iorpc_trio.c
new file mode 100644
index 00000000000..1d3cedb9aeb
--- /dev/null
+++ b/arch/tile/gxio/iorpc_trio.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#include "gxio/iorpc_trio.h"
+
+struct alloc_asids_param {
+ unsigned int count;
+ unsigned int first;
+ unsigned int flags;
+};
+
+int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count,
+ unsigned int first, unsigned int flags)
+{
+ struct alloc_asids_param temp;
+ struct alloc_asids_param *params = &temp;
+
+ params->count = count;
+ params->first = first;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_TRIO_OP_ALLOC_ASIDS);
+}
+
+EXPORT_SYMBOL(gxio_trio_alloc_asids);
+
+
+struct alloc_memory_maps_param {
+ unsigned int count;
+ unsigned int first;
+ unsigned int flags;
+};
+
+int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags)
+{
+ struct alloc_memory_maps_param temp;
+ struct alloc_memory_maps_param *params = &temp;
+
+ params->count = count;
+ params->first = first;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_TRIO_OP_ALLOC_MEMORY_MAPS);
+}
+
+EXPORT_SYMBOL(gxio_trio_alloc_memory_maps);
+
+struct alloc_scatter_queues_param {
+ unsigned int count;
+ unsigned int first;
+ unsigned int flags;
+};
+
+int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags)
+{
+ struct alloc_scatter_queues_param temp;
+ struct alloc_scatter_queues_param *params = &temp;
+
+ params->count = count;
+ params->first = first;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_TRIO_OP_ALLOC_SCATTER_QUEUES);
+}
+
+EXPORT_SYMBOL(gxio_trio_alloc_scatter_queues);
+
+struct alloc_pio_regions_param {
+ unsigned int count;
+ unsigned int first;
+ unsigned int flags;
+};
+
+int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags)
+{
+ struct alloc_pio_regions_param temp;
+ struct alloc_pio_regions_param *params = &temp;
+
+ params->count = count;
+ params->first = first;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_TRIO_OP_ALLOC_PIO_REGIONS);
+}
+
+EXPORT_SYMBOL(gxio_trio_alloc_pio_regions);
+
+struct init_pio_region_aux_param {
+ unsigned int pio_region;
+ unsigned int mac;
+ uint32_t bus_address_hi;
+ unsigned int flags;
+};
+
+int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context,
+ unsigned int pio_region, unsigned int mac,
+ uint32_t bus_address_hi, unsigned int flags)
+{
+ struct init_pio_region_aux_param temp;
+ struct init_pio_region_aux_param *params = &temp;
+
+ params->pio_region = pio_region;
+ params->mac = mac;
+ params->bus_address_hi = bus_address_hi;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_TRIO_OP_INIT_PIO_REGION_AUX);
+}
+
+EXPORT_SYMBOL(gxio_trio_init_pio_region_aux);
+
+
+struct init_memory_map_mmu_aux_param {
+ unsigned int map;
+ unsigned long va;
+ uint64_t size;
+ unsigned int asid;
+ unsigned int mac;
+ uint64_t bus_address;
+ unsigned int node;
+ unsigned int order_mode;
+};
+
+int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context,
+ unsigned int map, unsigned long va,
+ uint64_t size, unsigned int asid,
+ unsigned int mac, uint64_t bus_address,
+ unsigned int node,
+ unsigned int order_mode)
+{
+ struct init_memory_map_mmu_aux_param temp;
+ struct init_memory_map_mmu_aux_param *params = &temp;
+
+ params->map = map;
+ params->va = va;
+ params->size = size;
+ params->asid = asid;
+ params->mac = mac;
+ params->bus_address = bus_address;
+ params->node = node;
+ params->order_mode = order_mode;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX);
+}
+
+EXPORT_SYMBOL(gxio_trio_init_memory_map_mmu_aux);
+
+struct get_port_property_param {
+ struct pcie_trio_ports_property trio_ports;
+};
+
+int gxio_trio_get_port_property(gxio_trio_context_t *context,
+ struct pcie_trio_ports_property *trio_ports)
+{
+ int __result;
+ struct get_port_property_param temp;
+ struct get_port_property_param *params = &temp;
+
+ __result =
+ hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ GXIO_TRIO_OP_GET_PORT_PROPERTY);
+ *trio_ports = params->trio_ports;
+
+ return __result;
+}
+
+EXPORT_SYMBOL(gxio_trio_get_port_property);
+
+struct config_legacy_intr_param {
+ union iorpc_interrupt interrupt;
+ unsigned int mac;
+ unsigned int intx;
+};
+
+int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x,
+ int inter_y, int inter_ipi, int inter_event,
+ unsigned int mac, unsigned int intx)
+{
+ struct config_legacy_intr_param temp;
+ struct config_legacy_intr_param *params = &temp;
+
+ params->interrupt.kernel.x = inter_x;
+ params->interrupt.kernel.y = inter_y;
+ params->interrupt.kernel.ipi = inter_ipi;
+ params->interrupt.kernel.event = inter_event;
+ params->mac = mac;
+ params->intx = intx;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_TRIO_OP_CONFIG_LEGACY_INTR);
+}
+
+EXPORT_SYMBOL(gxio_trio_config_legacy_intr);
+
+struct config_msi_intr_param {
+ union iorpc_interrupt interrupt;
+ unsigned int mac;
+ unsigned int mem_map;
+ uint64_t mem_map_base;
+ uint64_t mem_map_limit;
+ unsigned int asid;
+};
+
+int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x,
+ int inter_y, int inter_ipi, int inter_event,
+ unsigned int mac, unsigned int mem_map,
+ uint64_t mem_map_base, uint64_t mem_map_limit,
+ unsigned int asid)
+{
+ struct config_msi_intr_param temp;
+ struct config_msi_intr_param *params = &temp;
+
+ params->interrupt.kernel.x = inter_x;
+ params->interrupt.kernel.y = inter_y;
+ params->interrupt.kernel.ipi = inter_ipi;
+ params->interrupt.kernel.event = inter_event;
+ params->mac = mac;
+ params->mem_map = mem_map;
+ params->mem_map_base = mem_map_base;
+ params->mem_map_limit = mem_map_limit;
+ params->asid = asid;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_TRIO_OP_CONFIG_MSI_INTR);
+}
+
+EXPORT_SYMBOL(gxio_trio_config_msi_intr);
+
+
+struct set_mps_mrs_param {
+ uint16_t mps;
+ uint16_t mrs;
+ unsigned int mac;
+};
+
+int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps,
+ uint16_t mrs, unsigned int mac)
+{
+ struct set_mps_mrs_param temp;
+ struct set_mps_mrs_param *params = &temp;
+
+ params->mps = mps;
+ params->mrs = mrs;
+ params->mac = mac;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_TRIO_OP_SET_MPS_MRS);
+}
+
+EXPORT_SYMBOL(gxio_trio_set_mps_mrs);
+
+struct force_rc_link_up_param {
+ unsigned int mac;
+};
+
+int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac)
+{
+ struct force_rc_link_up_param temp;
+ struct force_rc_link_up_param *params = &temp;
+
+ params->mac = mac;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_TRIO_OP_FORCE_RC_LINK_UP);
+}
+
+EXPORT_SYMBOL(gxio_trio_force_rc_link_up);
+
+struct force_ep_link_up_param {
+ unsigned int mac;
+};
+
+int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac)
+{
+ struct force_ep_link_up_param temp;
+ struct force_ep_link_up_param *params = &temp;
+
+ params->mac = mac;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_TRIO_OP_FORCE_EP_LINK_UP);
+}
+
+EXPORT_SYMBOL(gxio_trio_force_ep_link_up);
+
+struct get_mmio_base_param {
+ HV_PTE base;
+};
+
+int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base)
+{
+ int __result;
+ struct get_mmio_base_param temp;
+ struct get_mmio_base_param *params = &temp;
+
+ __result =
+ hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ GXIO_TRIO_OP_GET_MMIO_BASE);
+ *base = params->base;
+
+ return __result;
+}
+
+EXPORT_SYMBOL(gxio_trio_get_mmio_base);
+
+struct check_mmio_offset_param {
+ unsigned long offset;
+ unsigned long size;
+};
+
+int gxio_trio_check_mmio_offset(gxio_trio_context_t *context,
+ unsigned long offset, unsigned long size)
+{
+ struct check_mmio_offset_param temp;
+ struct check_mmio_offset_param *params = &temp;
+
+ params->offset = offset;
+ params->size = size;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_TRIO_OP_CHECK_MMIO_OFFSET);
+}
+
+EXPORT_SYMBOL(gxio_trio_check_mmio_offset);
diff --git a/arch/tile/gxio/iorpc_uart.c b/arch/tile/gxio/iorpc_uart.c
new file mode 100644
index 00000000000..b9a6d6193d7
--- /dev/null
+++ b/arch/tile/gxio/iorpc_uart.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#include "gxio/iorpc_uart.h"
+
+struct cfg_interrupt_param {
+ union iorpc_interrupt interrupt;
+};
+
+int gxio_uart_cfg_interrupt(gxio_uart_context_t *context, int inter_x,
+ int inter_y, int inter_ipi, int inter_event)
+{
+ struct cfg_interrupt_param temp;
+ struct cfg_interrupt_param *params = &temp;
+
+ params->interrupt.kernel.x = inter_x;
+ params->interrupt.kernel.y = inter_y;
+ params->interrupt.kernel.ipi = inter_ipi;
+ params->interrupt.kernel.event = inter_event;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_UART_OP_CFG_INTERRUPT);
+}
+
+EXPORT_SYMBOL(gxio_uart_cfg_interrupt);
+
+struct get_mmio_base_param {
+ HV_PTE base;
+};
+
+int gxio_uart_get_mmio_base(gxio_uart_context_t *context, HV_PTE *base)
+{
+ int __result;
+ struct get_mmio_base_param temp;
+ struct get_mmio_base_param *params = &temp;
+
+ __result =
+ hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ GXIO_UART_OP_GET_MMIO_BASE);
+ *base = params->base;
+
+ return __result;
+}
+
+EXPORT_SYMBOL(gxio_uart_get_mmio_base);
+
+struct check_mmio_offset_param {
+ unsigned long offset;
+ unsigned long size;
+};
+
+int gxio_uart_check_mmio_offset(gxio_uart_context_t *context,
+ unsigned long offset, unsigned long size)
+{
+ struct check_mmio_offset_param temp;
+ struct check_mmio_offset_param *params = &temp;
+
+ params->offset = offset;
+ params->size = size;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_UART_OP_CHECK_MMIO_OFFSET);
+}
+
+EXPORT_SYMBOL(gxio_uart_check_mmio_offset);
diff --git a/arch/tile/gxio/iorpc_usb_host.c b/arch/tile/gxio/iorpc_usb_host.c
new file mode 100644
index 00000000000..9c820073bfc
--- /dev/null
+++ b/arch/tile/gxio/iorpc_usb_host.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#include "gxio/iorpc_usb_host.h"
+
+struct cfg_interrupt_param {
+ union iorpc_interrupt interrupt;
+};
+
+int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x,
+ int inter_y, int inter_ipi, int inter_event)
+{
+ struct cfg_interrupt_param temp;
+ struct cfg_interrupt_param *params = &temp;
+
+ params->interrupt.kernel.x = inter_x;
+ params->interrupt.kernel.y = inter_y;
+ params->interrupt.kernel.ipi = inter_ipi;
+ params->interrupt.kernel.event = inter_event;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_USB_HOST_OP_CFG_INTERRUPT);
+}
+
+EXPORT_SYMBOL(gxio_usb_host_cfg_interrupt);
+
+struct register_client_memory_param {
+ HV_PTE pte;
+ unsigned int flags;
+};
+
+int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context,
+ HV_PTE pte, unsigned int flags)
+{
+ struct register_client_memory_param temp;
+ struct register_client_memory_param *params = &temp;
+
+ params->pte = pte;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_USB_HOST_OP_REGISTER_CLIENT_MEMORY);
+}
+
+EXPORT_SYMBOL(gxio_usb_host_register_client_memory);
+
+struct get_mmio_base_param {
+ HV_PTE base;
+};
+
+int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base)
+{
+ int __result;
+ struct get_mmio_base_param temp;
+ struct get_mmio_base_param *params = &temp;
+
+ __result =
+ hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ GXIO_USB_HOST_OP_GET_MMIO_BASE);
+ *base = params->base;
+
+ return __result;
+}
+
+EXPORT_SYMBOL(gxio_usb_host_get_mmio_base);
+
+struct check_mmio_offset_param {
+ unsigned long offset;
+ unsigned long size;
+};
+
+int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context,
+ unsigned long offset, unsigned long size)
+{
+ struct check_mmio_offset_param temp;
+ struct check_mmio_offset_param *params = &temp;
+
+ params->offset = offset;
+ params->size = size;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_USB_HOST_OP_CHECK_MMIO_OFFSET);
+}
+
+EXPORT_SYMBOL(gxio_usb_host_check_mmio_offset);
diff --git a/arch/tile/gxio/kiorpc.c b/arch/tile/gxio/kiorpc.c
new file mode 100644
index 00000000000..c8096aa5a3f
--- /dev/null
+++ b/arch/tile/gxio/kiorpc.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE-Gx IORPC support for kernel I/O drivers.
+ */
+
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <gxio/iorpc_globals.h>
+#include <gxio/kiorpc.h>
+
+#ifdef DEBUG_IORPC
+#define TRACE(FMT, ...) pr_info(SIMPLE_MSG_LINE FMT, ## __VA_ARGS__)
+#else
+#define TRACE(...)
+#endif
+
+/* Create kernel-VA-space MMIO mapping for an on-chip IO device. */
+void __iomem *iorpc_ioremap(int hv_fd, resource_size_t offset,
+ unsigned long size)
+{
+ pgprot_t mmio_base, prot = { 0 };
+ unsigned long pfn;
+ int err;
+
+ /* Look up the shim's lotar and base PA. */
+ err = __iorpc_get_mmio_base(hv_fd, &mmio_base);
+ if (err) {
+ TRACE("get_mmio_base() failure: %d\n", err);
+ return NULL;
+ }
+
+ /* Make sure the HV driver approves of our offset and size. */
+ err = __iorpc_check_mmio_offset(hv_fd, offset, size);
+ if (err) {
+ TRACE("check_mmio_offset() failure: %d\n", err);
+ return NULL;
+ }
+
+ /*
+ * mmio_base contains a base pfn and homing coordinates. Turn
+ * it into an MMIO pgprot and offset pfn.
+ */
+ prot = hv_pte_set_lotar(prot, hv_pte_get_lotar(mmio_base));
+ pfn = pte_pfn(mmio_base) + PFN_DOWN(offset);
+
+ return ioremap_prot(PFN_PHYS(pfn), size, prot);
+}
+
+EXPORT_SYMBOL(iorpc_ioremap);
diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c
new file mode 100644
index 00000000000..5301a9ffbae
--- /dev/null
+++ b/arch/tile/gxio/mpipe.c
@@ -0,0 +1,578 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ * Implementation of mpipe gxio calls.
+ */
+
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/module.h>
+
+#include <gxio/iorpc_globals.h>
+#include <gxio/iorpc_mpipe.h>
+#include <gxio/iorpc_mpipe_info.h>
+#include <gxio/kiorpc.h>
+#include <gxio/mpipe.h>
+
+/* HACK: Avoid pointless "shadow" warnings. */
+#define link link_shadow
+
+int gxio_mpipe_init(gxio_mpipe_context_t *context, unsigned int mpipe_index)
+{
+ char file[32];
+
+ int fd;
+ int i;
+
+ if (mpipe_index >= GXIO_MPIPE_INSTANCE_MAX)
+ return -EINVAL;
+
+ snprintf(file, sizeof(file), "mpipe/%d/iorpc", mpipe_index);
+ fd = hv_dev_open((HV_VirtAddr) file, 0);
+
+ context->fd = fd;
+
+ if (fd < 0) {
+ if (fd >= GXIO_ERR_MIN && fd <= GXIO_ERR_MAX)
+ return fd;
+ else
+ return -ENODEV;
+ }
+
+ /* Map in the MMIO space. */
+ context->mmio_cfg_base = (void __force *)
+ iorpc_ioremap(fd, HV_MPIPE_CONFIG_MMIO_OFFSET,
+ HV_MPIPE_CONFIG_MMIO_SIZE);
+ if (context->mmio_cfg_base == NULL)
+ goto cfg_failed;
+
+ context->mmio_fast_base = (void __force *)
+ iorpc_ioremap(fd, HV_MPIPE_FAST_MMIO_OFFSET,
+ HV_MPIPE_FAST_MMIO_SIZE);
+ if (context->mmio_fast_base == NULL)
+ goto fast_failed;
+
+ /* Initialize the stacks. */
+ for (i = 0; i < 8; i++)
+ context->__stacks.stacks[i] = 255;
+
+ context->instance = mpipe_index;
+
+ return 0;
+
+ fast_failed:
+ iounmap((void __force __iomem *)(context->mmio_cfg_base));
+ cfg_failed:
+ hv_dev_close(context->fd);
+ context->fd = -1;
+ return -ENODEV;
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_init);
+
+int gxio_mpipe_destroy(gxio_mpipe_context_t *context)
+{
+ iounmap((void __force __iomem *)(context->mmio_cfg_base));
+ iounmap((void __force __iomem *)(context->mmio_fast_base));
+ return hv_dev_close(context->fd);
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_destroy);
+
+static int16_t gxio_mpipe_buffer_sizes[8] =
+ { 128, 256, 512, 1024, 1664, 4096, 10368, 16384 };
+
+gxio_mpipe_buffer_size_enum_t gxio_mpipe_buffer_size_to_buffer_size_enum(size_t
+ size)
+{
+ int i;
+ for (i = 0; i < 7; i++)
+ if (size <= gxio_mpipe_buffer_sizes[i])
+ break;
+ return i;
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_buffer_size_to_buffer_size_enum);
+
+size_t gxio_mpipe_buffer_size_enum_to_buffer_size(gxio_mpipe_buffer_size_enum_t
+ buffer_size_enum)
+{
+ if (buffer_size_enum > 7)
+ buffer_size_enum = 7;
+
+ return gxio_mpipe_buffer_sizes[buffer_size_enum];
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_buffer_size_enum_to_buffer_size);
+
+size_t gxio_mpipe_calc_buffer_stack_bytes(unsigned long buffers)
+{
+ const int BUFFERS_PER_LINE = 12;
+
+ /* Count the number of cachlines. */
+ unsigned long lines =
+ (buffers + BUFFERS_PER_LINE - 1) / BUFFERS_PER_LINE;
+
+ /* Convert to bytes. */
+ return lines * CHIP_L2_LINE_SIZE();
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_calc_buffer_stack_bytes);
+
+int gxio_mpipe_init_buffer_stack(gxio_mpipe_context_t *context,
+ unsigned int stack,
+ gxio_mpipe_buffer_size_enum_t
+ buffer_size_enum, void *mem, size_t mem_size,
+ unsigned int mem_flags)
+{
+ int result;
+
+ memset(mem, 0, mem_size);
+
+ result = gxio_mpipe_init_buffer_stack_aux(context, mem, mem_size,
+ mem_flags, stack,
+ buffer_size_enum);
+ if (result < 0)
+ return result;
+
+ /* Save the stack. */
+ context->__stacks.stacks[buffer_size_enum] = stack;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_init_buffer_stack);
+
+int gxio_mpipe_init_notif_ring(gxio_mpipe_context_t *context,
+ unsigned int ring,
+ void *mem, size_t mem_size,
+ unsigned int mem_flags)
+{
+ return gxio_mpipe_init_notif_ring_aux(context, mem, mem_size,
+ mem_flags, ring);
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_init_notif_ring);
+
+int gxio_mpipe_init_notif_group_and_buckets(gxio_mpipe_context_t *context,
+ unsigned int group,
+ unsigned int ring,
+ unsigned int num_rings,
+ unsigned int bucket,
+ unsigned int num_buckets,
+ gxio_mpipe_bucket_mode_t mode)
+{
+ int i;
+ int result;
+
+ gxio_mpipe_bucket_info_t bucket_info = { {
+ .group = group,
+ .mode = mode,
+ }
+ };
+
+ gxio_mpipe_notif_group_bits_t bits = { {0} };
+
+ for (i = 0; i < num_rings; i++)
+ gxio_mpipe_notif_group_add_ring(&bits, ring + i);
+
+ result = gxio_mpipe_init_notif_group(context, group, bits);
+ if (result != 0)
+ return result;
+
+ for (i = 0; i < num_buckets; i++) {
+ bucket_info.notifring = ring + (i % num_rings);
+
+ result = gxio_mpipe_init_bucket(context, bucket + i,
+ bucket_info);
+ if (result != 0)
+ return result;
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_init_notif_group_and_buckets);
+
+int gxio_mpipe_init_edma_ring(gxio_mpipe_context_t *context,
+ unsigned int ring, unsigned int channel,
+ void *mem, size_t mem_size,
+ unsigned int mem_flags)
+{
+ memset(mem, 0, mem_size);
+
+ return gxio_mpipe_init_edma_ring_aux(context, mem, mem_size, mem_flags,
+ ring, channel);
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_init_edma_ring);
+
+void gxio_mpipe_rules_init(gxio_mpipe_rules_t *rules,
+ gxio_mpipe_context_t *context)
+{
+ rules->context = context;
+ memset(&rules->list, 0, sizeof(rules->list));
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_rules_init);
+
+int gxio_mpipe_rules_begin(gxio_mpipe_rules_t *rules,
+ unsigned int bucket, unsigned int num_buckets,
+ gxio_mpipe_rules_stacks_t *stacks)
+{
+ int i;
+ int stack = 255;
+
+ gxio_mpipe_rules_list_t *list = &rules->list;
+
+ /* Current rule. */
+ gxio_mpipe_rules_rule_t *rule =
+ (gxio_mpipe_rules_rule_t *) (list->rules + list->head);
+
+ unsigned int head = list->tail;
+
+ /*
+ * Align next rule properly.
+ *Note that "dmacs_and_vlans" will also be aligned.
+ */
+ unsigned int pad = 0;
+ while (((head + pad) % __alignof__(gxio_mpipe_rules_rule_t)) != 0)
+ pad++;
+
+ /*
+ * Verify room.
+ * ISSUE: Mark rules as broken on error?
+ */
+ if (head + pad + sizeof(*rule) >= sizeof(list->rules))
+ return GXIO_MPIPE_ERR_RULES_FULL;
+
+ /* Verify num_buckets is a power of 2. */
+ if (__builtin_popcount(num_buckets) != 1)
+ return GXIO_MPIPE_ERR_RULES_INVALID;
+
+ /* Add padding to previous rule. */
+ rule->size += pad;
+
+ /* Start a new rule. */
+ list->head = head + pad;
+
+ rule = (gxio_mpipe_rules_rule_t *) (list->rules + list->head);
+
+ /* Default some values. */
+ rule->headroom = 2;
+ rule->tailroom = 0;
+ rule->capacity = 16384;
+
+ /* Save the bucket info. */
+ rule->bucket_mask = num_buckets - 1;
+ rule->bucket_first = bucket;
+
+ for (i = 8 - 1; i >= 0; i--) {
+ int maybe =
+ stacks ? stacks->stacks[i] : rules->context->__stacks.
+ stacks[i];
+ if (maybe != 255)
+ stack = maybe;
+ rule->stacks.stacks[i] = stack;
+ }
+
+ if (stack == 255)
+ return GXIO_MPIPE_ERR_RULES_INVALID;
+
+ /* NOTE: Only entries at the end of the array can be 255. */
+ for (i = 8 - 1; i > 0; i--) {
+ if (rule->stacks.stacks[i] == 255) {
+ rule->stacks.stacks[i] = stack;
+ rule->capacity =
+ gxio_mpipe_buffer_size_enum_to_buffer_size(i -
+ 1);
+ }
+ }
+
+ rule->size = sizeof(*rule);
+ list->tail = list->head + rule->size;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_rules_begin);
+
+int gxio_mpipe_rules_add_channel(gxio_mpipe_rules_t *rules,
+ unsigned int channel)
+{
+ gxio_mpipe_rules_list_t *list = &rules->list;
+
+ gxio_mpipe_rules_rule_t *rule =
+ (gxio_mpipe_rules_rule_t *) (list->rules + list->head);
+
+ /* Verify channel. */
+ if (channel >= 32)
+ return GXIO_MPIPE_ERR_RULES_INVALID;
+
+ /* Verify begun. */
+ if (list->tail == 0)
+ return GXIO_MPIPE_ERR_RULES_EMPTY;
+
+ rule->channel_bits |= (1UL << channel);
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_rules_add_channel);
+
+int gxio_mpipe_rules_set_headroom(gxio_mpipe_rules_t *rules, uint8_t headroom)
+{
+ gxio_mpipe_rules_list_t *list = &rules->list;
+
+ gxio_mpipe_rules_rule_t *rule =
+ (gxio_mpipe_rules_rule_t *) (list->rules + list->head);
+
+ /* Verify begun. */
+ if (list->tail == 0)
+ return GXIO_MPIPE_ERR_RULES_EMPTY;
+
+ rule->headroom = headroom;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_rules_set_headroom);
+
+int gxio_mpipe_rules_commit(gxio_mpipe_rules_t *rules)
+{
+ gxio_mpipe_rules_list_t *list = &rules->list;
+ unsigned int size =
+ offsetof(gxio_mpipe_rules_list_t, rules) + list->tail;
+ return gxio_mpipe_commit_rules(rules->context, list, size);
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_rules_commit);
+
+int gxio_mpipe_iqueue_init(gxio_mpipe_iqueue_t *iqueue,
+ gxio_mpipe_context_t *context,
+ unsigned int ring,
+ void *mem, size_t mem_size, unsigned int mem_flags)
+{
+ /* The init call below will verify that "mem_size" is legal. */
+ unsigned int num_entries = mem_size / sizeof(gxio_mpipe_idesc_t);
+
+ iqueue->context = context;
+ iqueue->idescs = (gxio_mpipe_idesc_t *)mem;
+ iqueue->ring = ring;
+ iqueue->num_entries = num_entries;
+ iqueue->mask_num_entries = num_entries - 1;
+ iqueue->log2_num_entries = __builtin_ctz(num_entries);
+ iqueue->head = 1;
+#ifdef __BIG_ENDIAN__
+ iqueue->swapped = 0;
+#endif
+
+ /* Initialize the "tail". */
+ __gxio_mmio_write(mem, iqueue->head);
+
+ return gxio_mpipe_init_notif_ring(context, ring, mem, mem_size,
+ mem_flags);
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_iqueue_init);
+
+int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue,
+ gxio_mpipe_context_t *context,
+ unsigned int ering,
+ unsigned int channel,
+ void *mem, unsigned int mem_size,
+ unsigned int mem_flags)
+{
+ /* The init call below will verify that "mem_size" is legal. */
+ unsigned int num_entries = mem_size / sizeof(gxio_mpipe_edesc_t);
+
+ /* Offset used to read number of completed commands. */
+ MPIPE_EDMA_POST_REGION_ADDR_t offset;
+
+ int result = gxio_mpipe_init_edma_ring(context, ering, channel,
+ mem, mem_size, mem_flags);
+ if (result < 0)
+ return result;
+
+ memset(equeue, 0, sizeof(*equeue));
+
+ offset.word = 0;
+ offset.region =
+ MPIPE_MMIO_ADDR__REGION_VAL_EDMA -
+ MPIPE_MMIO_ADDR__REGION_VAL_IDMA;
+ offset.ring = ering;
+
+ __gxio_dma_queue_init(&equeue->dma_queue,
+ context->mmio_fast_base + offset.word,
+ num_entries);
+ equeue->edescs = mem;
+ equeue->mask_num_entries = num_entries - 1;
+ equeue->log2_num_entries = __builtin_ctz(num_entries);
+ equeue->context = context;
+ equeue->ering = ering;
+ equeue->channel = channel;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_equeue_init);
+
+int gxio_mpipe_set_timestamp(gxio_mpipe_context_t *context,
+ const struct timespec *ts)
+{
+ cycles_t cycles = get_cycles();
+ return gxio_mpipe_set_timestamp_aux(context, (uint64_t)ts->tv_sec,
+ (uint64_t)ts->tv_nsec,
+ (uint64_t)cycles);
+}
+
+int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context,
+ struct timespec *ts)
+{
+ int ret;
+ cycles_t cycles_prev, cycles_now, clock_rate;
+ cycles_prev = get_cycles();
+ ret = gxio_mpipe_get_timestamp_aux(context, (uint64_t *)&ts->tv_sec,
+ (uint64_t *)&ts->tv_nsec,
+ (uint64_t *)&cycles_now);
+ if (ret < 0) {
+ return ret;
+ }
+
+ clock_rate = get_clock_rate();
+ ts->tv_nsec -= (cycles_now - cycles_prev) * 1000000000LL / clock_rate;
+ if (ts->tv_nsec < 0) {
+ ts->tv_nsec += 1000000000LL;
+ ts->tv_sec -= 1;
+ }
+ return ret;
+}
+
+int gxio_mpipe_adjust_timestamp(gxio_mpipe_context_t *context, int64_t delta)
+{
+ return gxio_mpipe_adjust_timestamp_aux(context, delta);
+}
+
+/* Get our internal context used for link name access. This context is
+ * special in that it is not associated with an mPIPE service domain.
+ */
+static gxio_mpipe_context_t *_gxio_get_link_context(void)
+{
+ static gxio_mpipe_context_t context;
+ static gxio_mpipe_context_t *contextp;
+ static int tried_open = 0;
+ static DEFINE_MUTEX(mutex);
+
+ mutex_lock(&mutex);
+
+ if (!tried_open) {
+ int i = 0;
+ tried_open = 1;
+
+ /*
+ * "4" here is the maximum possible number of mPIPE shims; it's
+ * an exaggeration but we shouldn't ever go beyond 2 anyway.
+ */
+ for (i = 0; i < 4; i++) {
+ char file[80];
+
+ snprintf(file, sizeof(file), "mpipe/%d/iorpc_info", i);
+ context.fd = hv_dev_open((HV_VirtAddr) file, 0);
+ if (context.fd < 0)
+ continue;
+
+ contextp = &context;
+ break;
+ }
+ }
+
+ mutex_unlock(&mutex);
+
+ return contextp;
+}
+
+int gxio_mpipe_link_instance(const char *link_name)
+{
+ _gxio_mpipe_link_name_t name;
+ gxio_mpipe_context_t *context = _gxio_get_link_context();
+
+ if (!context)
+ return GXIO_ERR_NO_DEVICE;
+
+ strncpy(name.name, link_name, sizeof(name.name));
+ name.name[GXIO_MPIPE_LINK_NAME_LEN - 1] = '\0';
+
+ return gxio_mpipe_info_instance_aux(context, name);
+}
+
+int gxio_mpipe_link_enumerate_mac(int idx, char *link_name, uint8_t *link_mac)
+{
+ int rv;
+ _gxio_mpipe_link_name_t name;
+ _gxio_mpipe_link_mac_t mac;
+
+ gxio_mpipe_context_t *context = _gxio_get_link_context();
+ if (!context)
+ return GXIO_ERR_NO_DEVICE;
+
+ rv = gxio_mpipe_info_enumerate_aux(context, idx, &name, &mac);
+ if (rv >= 0) {
+ strncpy(link_name, name.name, sizeof(name.name));
+ memcpy(link_mac, mac.mac, sizeof(mac.mac));
+ }
+
+ return rv;
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_link_enumerate_mac);
+
+int gxio_mpipe_link_open(gxio_mpipe_link_t *link,
+ gxio_mpipe_context_t *context, const char *link_name,
+ unsigned int flags)
+{
+ _gxio_mpipe_link_name_t name;
+ int rv;
+
+ strncpy(name.name, link_name, sizeof(name.name));
+ name.name[GXIO_MPIPE_LINK_NAME_LEN - 1] = '\0';
+
+ rv = gxio_mpipe_link_open_aux(context, name, flags);
+ if (rv < 0)
+ return rv;
+
+ link->context = context;
+ link->channel = rv >> 8;
+ link->mac = rv & 0xFF;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_link_open);
+
+int gxio_mpipe_link_close(gxio_mpipe_link_t *link)
+{
+ return gxio_mpipe_link_close_aux(link->context, link->mac);
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_link_close);
+
+int gxio_mpipe_link_set_attr(gxio_mpipe_link_t *link, uint32_t attr,
+ int64_t val)
+{
+ return gxio_mpipe_link_set_attr_aux(link->context, link->mac, attr,
+ val);
+}
+
+EXPORT_SYMBOL_GPL(gxio_mpipe_link_set_attr);
diff --git a/arch/tile/gxio/trio.c b/arch/tile/gxio/trio.c
new file mode 100644
index 00000000000..69f0b8df3ce
--- /dev/null
+++ b/arch/tile/gxio/trio.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ * Implementation of trio gxio calls.
+ */
+
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/module.h>
+
+#include <gxio/trio.h>
+#include <gxio/iorpc_globals.h>
+#include <gxio/iorpc_trio.h>
+#include <gxio/kiorpc.h>
+
+int gxio_trio_init(gxio_trio_context_t *context, unsigned int trio_index)
+{
+ char file[32];
+ int fd;
+
+ snprintf(file, sizeof(file), "trio/%d/iorpc", trio_index);
+ fd = hv_dev_open((HV_VirtAddr) file, 0);
+ if (fd < 0) {
+ context->fd = -1;
+
+ if (fd >= GXIO_ERR_MIN && fd <= GXIO_ERR_MAX)
+ return fd;
+ else
+ return -ENODEV;
+ }
+
+ context->fd = fd;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_trio_init);
diff --git a/arch/tile/gxio/uart.c b/arch/tile/gxio/uart.c
new file mode 100644
index 00000000000..ba585175ef8
--- /dev/null
+++ b/arch/tile/gxio/uart.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ * Implementation of UART gxio calls.
+ */
+
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+
+#include <gxio/uart.h>
+#include <gxio/iorpc_globals.h>
+#include <gxio/iorpc_uart.h>
+#include <gxio/kiorpc.h>
+
+int gxio_uart_init(gxio_uart_context_t *context, int uart_index)
+{
+ char file[32];
+ int fd;
+
+ snprintf(file, sizeof(file), "uart/%d/iorpc", uart_index);
+ fd = hv_dev_open((HV_VirtAddr) file, 0);
+ if (fd < 0) {
+ if (fd >= GXIO_ERR_MIN && fd <= GXIO_ERR_MAX)
+ return fd;
+ else
+ return -ENODEV;
+ }
+
+ context->fd = fd;
+
+ /* Map in the MMIO space. */
+ context->mmio_base = (void __force *)
+ iorpc_ioremap(fd, HV_UART_MMIO_OFFSET, HV_UART_MMIO_SIZE);
+
+ if (context->mmio_base == NULL) {
+ hv_dev_close(context->fd);
+ context->fd = -1;
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_uart_init);
+
+int gxio_uart_destroy(gxio_uart_context_t *context)
+{
+ iounmap((void __force __iomem *)(context->mmio_base));
+ hv_dev_close(context->fd);
+
+ context->mmio_base = NULL;
+ context->fd = -1;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_uart_destroy);
+
+/* UART register write wrapper. */
+void gxio_uart_write(gxio_uart_context_t *context, uint64_t offset,
+ uint64_t word)
+{
+ __gxio_mmio_write(context->mmio_base + offset, word);
+}
+
+EXPORT_SYMBOL_GPL(gxio_uart_write);
+
+/* UART register read wrapper. */
+uint64_t gxio_uart_read(gxio_uart_context_t *context, uint64_t offset)
+{
+ return __gxio_mmio_read(context->mmio_base + offset);
+}
+
+EXPORT_SYMBOL_GPL(gxio_uart_read);
diff --git a/arch/tile/gxio/usb_host.c b/arch/tile/gxio/usb_host.c
new file mode 100644
index 00000000000..785afad7922
--- /dev/null
+++ b/arch/tile/gxio/usb_host.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ *
+ * Implementation of USB gxio calls.
+ */
+
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+
+#include <gxio/iorpc_globals.h>
+#include <gxio/iorpc_usb_host.h>
+#include <gxio/kiorpc.h>
+#include <gxio/usb_host.h>
+
+int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index,
+ int is_ehci)
+{
+ char file[32];
+ int fd;
+
+ if (is_ehci)
+ snprintf(file, sizeof(file), "usb_host/%d/iorpc/ehci",
+ usb_index);
+ else
+ snprintf(file, sizeof(file), "usb_host/%d/iorpc/ohci",
+ usb_index);
+
+ fd = hv_dev_open((HV_VirtAddr) file, 0);
+ if (fd < 0) {
+ if (fd >= GXIO_ERR_MIN && fd <= GXIO_ERR_MAX)
+ return fd;
+ else
+ return -ENODEV;
+ }
+
+ context->fd = fd;
+
+ // Map in the MMIO space.
+ context->mmio_base =
+ (void __force *)iorpc_ioremap(fd, 0, HV_USB_HOST_MMIO_SIZE);
+
+ if (context->mmio_base == NULL) {
+ hv_dev_close(context->fd);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_usb_host_init);
+
+int gxio_usb_host_destroy(gxio_usb_host_context_t *context)
+{
+ iounmap((void __force __iomem *)(context->mmio_base));
+ hv_dev_close(context->fd);
+
+ context->mmio_base = NULL;
+ context->fd = -1;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_usb_host_destroy);
+
+void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context)
+{
+ return context->mmio_base;
+}
+
+EXPORT_SYMBOL_GPL(gxio_usb_host_get_reg_start);
+
+size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context)
+{
+ return HV_USB_HOST_MMIO_SIZE;
+}
+
+EXPORT_SYMBOL_GPL(gxio_usb_host_get_reg_len);
diff --git a/arch/tile/include/arch/Kbuild b/arch/tile/include/arch/Kbuild
index 9c0ea24cc94..3751c9fabcf 100644
--- a/arch/tile/include/arch/Kbuild
+++ b/arch/tile/include/arch/Kbuild
@@ -1,17 +1 @@
-header-y += abi.h
-header-y += chip.h
-header-y += chip_tile64.h
-header-y += chip_tilegx.h
-header-y += chip_tilepro.h
-header-y += icache.h
-header-y += interrupts.h
-header-y += interrupts_32.h
-header-y += interrupts_64.h
-header-y += opcode.h
-header-y += opcode_tilegx.h
-header-y += opcode_tilepro.h
-header-y += sim.h
-header-y += sim_def.h
-header-y += spr_def.h
-header-y += spr_def_32.h
-header-y += spr_def_64.h
+# Tile arch headers
diff --git a/arch/tile/include/arch/chip_tile64.h b/arch/tile/include/arch/chip_tile64.h
deleted file mode 100644
index 261aaba092d..00000000000
--- a/arch/tile/include/arch/chip_tile64.h
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-/*
- * @file
- * Global header file.
- * This header file specifies defines for TILE64.
- */
-
-#ifndef __ARCH_CHIP_H__
-#define __ARCH_CHIP_H__
-
-/** Specify chip version.
- * When possible, prefer the CHIP_xxx symbols below for future-proofing.
- * This is intended for cross-compiling; native compilation should
- * use the predefined __tile_chip__ symbol.
- */
-#define TILE_CHIP 0
-
-/** Specify chip revision.
- * This provides for the case of a respin of a particular chip type;
- * the normal value for this symbol is "0".
- * This is intended for cross-compiling; native compilation should
- * use the predefined __tile_chip_rev__ symbol.
- */
-#define TILE_CHIP_REV 0
-
-/** The name of this architecture. */
-#define CHIP_ARCH_NAME "tile64"
-
-/** The ELF e_machine type for binaries for this chip. */
-#define CHIP_ELF_TYPE() EM_TILE64
-
-/** The alternate ELF e_machine type for binaries for this chip. */
-#define CHIP_COMPAT_ELF_TYPE() 0x2506
-
-/** What is the native word size of the machine? */
-#define CHIP_WORD_SIZE() 32
-
-/** How many bits of a virtual address are used. Extra bits must be
- * the sign extension of the low bits.
- */
-#define CHIP_VA_WIDTH() 32
-
-/** How many bits are in a physical address? */
-#define CHIP_PA_WIDTH() 36
-
-/** Size of the L2 cache, in bytes. */
-#define CHIP_L2_CACHE_SIZE() 65536
-
-/** Log size of an L2 cache line in bytes. */
-#define CHIP_L2_LOG_LINE_SIZE() 6
-
-/** Size of an L2 cache line, in bytes. */
-#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
-
-/** Associativity of the L2 cache. */
-#define CHIP_L2_ASSOC() 2
-
-/** Size of the L1 data cache, in bytes. */
-#define CHIP_L1D_CACHE_SIZE() 8192
-
-/** Log size of an L1 data cache line in bytes. */
-#define CHIP_L1D_LOG_LINE_SIZE() 4
-
-/** Size of an L1 data cache line, in bytes. */
-#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
-
-/** Associativity of the L1 data cache. */
-#define CHIP_L1D_ASSOC() 2
-
-/** Size of the L1 instruction cache, in bytes. */
-#define CHIP_L1I_CACHE_SIZE() 8192
-
-/** Log size of an L1 instruction cache line in bytes. */
-#define CHIP_L1I_LOG_LINE_SIZE() 6
-
-/** Size of an L1 instruction cache line, in bytes. */
-#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
-
-/** Associativity of the L1 instruction cache. */
-#define CHIP_L1I_ASSOC() 1
-
-/** Stride with which flush instructions must be issued. */
-#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
-
-/** Stride with which inv instructions must be issued. */
-#define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE()
-
-/** Stride with which finv instructions must be issued. */
-#define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE()
-
-/** Can the local cache coherently cache data that is homed elsewhere? */
-#define CHIP_HAS_COHERENT_LOCAL_CACHE() 0
-
-/** How many simultaneous outstanding victims can the L2 cache have? */
-#define CHIP_MAX_OUTSTANDING_VICTIMS() 2
-
-/** Does the TLB support the NC and NOALLOC bits? */
-#define CHIP_HAS_NC_AND_NOALLOC_BITS() 0
-
-/** Does the chip support hash-for-home caching? */
-#define CHIP_HAS_CBOX_HOME_MAP() 0
-
-/** Number of entries in the chip's home map tables. */
-/* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */
-
-/** Do uncacheable requests miss in the cache regardless of whether
- * there is matching data? */
-#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0
-
-/** Does the mf instruction wait for victims? */
-#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1
-
-/** Does the chip have an "inv" instruction that doesn't also flush? */
-#define CHIP_HAS_INV() 0
-
-/** Does the chip have a "wh64" instruction? */
-#define CHIP_HAS_WH64() 0
-
-/** Does this chip have a 'dword_align' instruction? */
-#define CHIP_HAS_DWORD_ALIGN() 0
-
-/** Number of performance counters. */
-#define CHIP_PERFORMANCE_COUNTERS() 2
-
-/** Does this chip have auxiliary performance counters? */
-#define CHIP_HAS_AUX_PERF_COUNTERS() 0
-
-/** Is the CBOX_MSR1 SPR supported? */
-#define CHIP_HAS_CBOX_MSR1() 0
-
-/** Is the TILE_RTF_HWM SPR supported? */
-#define CHIP_HAS_TILE_RTF_HWM() 0
-
-/** Is the TILE_WRITE_PENDING SPR supported? */
-#define CHIP_HAS_TILE_WRITE_PENDING() 0
-
-/** Is the PROC_STATUS SPR supported? */
-#define CHIP_HAS_PROC_STATUS_SPR() 0
-
-/** Is the DSTREAM_PF SPR supported? */
-#define CHIP_HAS_DSTREAM_PF() 0
-
-/** Log of the number of mshims we have. */
-#define CHIP_LOG_NUM_MSHIMS() 2
-
-/** Are the bases of the interrupt vector areas fixed? */
-#define CHIP_HAS_FIXED_INTVEC_BASE() 1
-
-/** Are the interrupt masks split up into 2 SPRs? */
-#define CHIP_HAS_SPLIT_INTR_MASK() 1
-
-/** Is the cycle count split up into 2 SPRs? */
-#define CHIP_HAS_SPLIT_CYCLE() 1
-
-/** Does the chip have a static network? */
-#define CHIP_HAS_SN() 1
-
-/** Does the chip have a static network processor? */
-#define CHIP_HAS_SN_PROC() 1
-
-/** Size of the L1 static network processor instruction cache, in bytes. */
-#define CHIP_L1SNI_CACHE_SIZE() 2048
-
-/** Does the chip have DMA support in each tile? */
-#define CHIP_HAS_TILE_DMA() 1
-
-/** Does the chip have the second revision of the directly accessible
- * dynamic networks? This encapsulates a number of characteristics,
- * including the absence of the catch-all, the absence of inline message
- * tags, the absence of support for network context-switching, and so on.
- */
-#define CHIP_HAS_REV1_XDN() 0
-
-/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
-#define CHIP_HAS_CMPEXCH() 0
-
-/** Does the chip have memory-mapped I/O support? */
-#define CHIP_HAS_MMIO() 0
-
-/** Does the chip have post-completion interrupts? */
-#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0
-
-/** Does the chip have native single step support? */
-#define CHIP_HAS_SINGLE_STEP() 0
-
-#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */
-
-/** How many entries are present in the instruction TLB? */
-#define CHIP_ITLB_ENTRIES() 8
-
-/** How many entries are present in the data TLB? */
-#define CHIP_DTLB_ENTRIES() 16
-
-/** How many MAF entries does the XAUI shim have? */
-#define CHIP_XAUI_MAF_ENTRIES() 16
-
-/** Does the memory shim have a source-id table? */
-#define CHIP_HAS_MSHIM_SRCID_TABLE() 1
-
-/** Does the L1 instruction cache clear on reset? */
-#define CHIP_HAS_L1I_CLEAR_ON_RESET() 0
-
-/** Does the chip come out of reset with valid coordinates on all tiles?
- * Note that if defined, this also implies that the upper left is 1,1.
- */
-#define CHIP_HAS_VALID_TILE_COORD_RESET() 0
-
-/** Does the chip have unified packet formats? */
-#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0
-
-/** Does the chip support write reordering? */
-#define CHIP_HAS_WRITE_REORDERING() 0
-
-/** Does the chip support Y-X routing as well as X-Y? */
-#define CHIP_HAS_Y_X_ROUTING() 0
-
-/** Is INTCTRL_3 managed with the correct MPL? */
-#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0
-
-/** Is it possible to configure the chip to be big-endian? */
-#define CHIP_HAS_BIG_ENDIAN_CONFIG() 0
-
-/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
-#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0
-
-/** Is the DIAG_TRACE_WAY SPR supported? */
-#define CHIP_HAS_DIAG_TRACE_WAY() 0
-
-/** Is the MEM_STRIPE_CONFIG SPR supported? */
-#define CHIP_HAS_MEM_STRIPE_CONFIG() 0
-
-/** Are the TLB_PERF SPRs supported? */
-#define CHIP_HAS_TLB_PERF() 0
-
-/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
-#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0
-
-/** Does the chip support rev1 DMA packets? */
-#define CHIP_HAS_REV1_DMA_PACKETS() 0
-
-/** Does the chip have an IPI shim? */
-#define CHIP_HAS_IPI() 0
-
-#endif /* !__OPEN_SOURCE__ */
-#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/arch/interrupts_32.h b/arch/tile/include/arch/interrupts_32.h
deleted file mode 100644
index 96b5710505b..00000000000
--- a/arch/tile/include/arch/interrupts_32.h
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-#ifndef __ARCH_INTERRUPTS_H__
-#define __ARCH_INTERRUPTS_H__
-
-/** Mask for an interrupt. */
-/* Note: must handle breaking interrupts into high and low words manually. */
-#define INT_MASK_LO(intno) (1 << (intno))
-#define INT_MASK_HI(intno) (1 << ((intno) - 32))
-
-#ifndef __ASSEMBLER__
-#define INT_MASK(intno) (1ULL << (intno))
-#endif
-
-
-/** Where a given interrupt executes */
-#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8))
-
-/** Where to store a vector for a given interrupt. */
-#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0)
-
-/** The base address of user-level interrupts. */
-#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0)
-
-
-/** Additional synthetic interrupt. */
-#define INT_BREAKPOINT (63)
-
-#define INT_ITLB_MISS 0
-#define INT_MEM_ERROR 1
-#define INT_ILL 2
-#define INT_GPV 3
-#define INT_SN_ACCESS 4
-#define INT_IDN_ACCESS 5
-#define INT_UDN_ACCESS 6
-#define INT_IDN_REFILL 7
-#define INT_UDN_REFILL 8
-#define INT_IDN_COMPLETE 9
-#define INT_UDN_COMPLETE 10
-#define INT_SWINT_3 11
-#define INT_SWINT_2 12
-#define INT_SWINT_1 13
-#define INT_SWINT_0 14
-#define INT_UNALIGN_DATA 15
-#define INT_DTLB_MISS 16
-#define INT_DTLB_ACCESS 17
-#define INT_DMATLB_MISS 18
-#define INT_DMATLB_ACCESS 19
-#define INT_SNITLB_MISS 20
-#define INT_SN_NOTIFY 21
-#define INT_SN_FIREWALL 22
-#define INT_IDN_FIREWALL 23
-#define INT_UDN_FIREWALL 24
-#define INT_TILE_TIMER 25
-#define INT_IDN_TIMER 26
-#define INT_UDN_TIMER 27
-#define INT_DMA_NOTIFY 28
-#define INT_IDN_CA 29
-#define INT_UDN_CA 30
-#define INT_IDN_AVAIL 31
-#define INT_UDN_AVAIL 32
-#define INT_PERF_COUNT 33
-#define INT_INTCTRL_3 34
-#define INT_INTCTRL_2 35
-#define INT_INTCTRL_1 36
-#define INT_INTCTRL_0 37
-#define INT_BOOT_ACCESS 38
-#define INT_WORLD_ACCESS 39
-#define INT_I_ASID 40
-#define INT_D_ASID 41
-#define INT_DMA_ASID 42
-#define INT_SNI_ASID 43
-#define INT_DMA_CPL 44
-#define INT_SN_CPL 45
-#define INT_DOUBLE_FAULT 46
-#define INT_SN_STATIC_ACCESS 47
-#define INT_AUX_PERF_COUNT 48
-
-#define NUM_INTERRUPTS 49
-
-#ifndef __ASSEMBLER__
-#define QUEUED_INTERRUPTS ( \
- INT_MASK(INT_MEM_ERROR) | \
- INT_MASK(INT_DMATLB_MISS) | \
- INT_MASK(INT_DMATLB_ACCESS) | \
- INT_MASK(INT_SNITLB_MISS) | \
- INT_MASK(INT_SN_NOTIFY) | \
- INT_MASK(INT_SN_FIREWALL) | \
- INT_MASK(INT_IDN_FIREWALL) | \
- INT_MASK(INT_UDN_FIREWALL) | \
- INT_MASK(INT_TILE_TIMER) | \
- INT_MASK(INT_IDN_TIMER) | \
- INT_MASK(INT_UDN_TIMER) | \
- INT_MASK(INT_DMA_NOTIFY) | \
- INT_MASK(INT_IDN_CA) | \
- INT_MASK(INT_UDN_CA) | \
- INT_MASK(INT_IDN_AVAIL) | \
- INT_MASK(INT_UDN_AVAIL) | \
- INT_MASK(INT_PERF_COUNT) | \
- INT_MASK(INT_INTCTRL_3) | \
- INT_MASK(INT_INTCTRL_2) | \
- INT_MASK(INT_INTCTRL_1) | \
- INT_MASK(INT_INTCTRL_0) | \
- INT_MASK(INT_BOOT_ACCESS) | \
- INT_MASK(INT_WORLD_ACCESS) | \
- INT_MASK(INT_I_ASID) | \
- INT_MASK(INT_D_ASID) | \
- INT_MASK(INT_DMA_ASID) | \
- INT_MASK(INT_SNI_ASID) | \
- INT_MASK(INT_DMA_CPL) | \
- INT_MASK(INT_SN_CPL) | \
- INT_MASK(INT_DOUBLE_FAULT) | \
- INT_MASK(INT_AUX_PERF_COUNT) | \
- 0)
-#define NONQUEUED_INTERRUPTS ( \
- INT_MASK(INT_ITLB_MISS) | \
- INT_MASK(INT_ILL) | \
- INT_MASK(INT_GPV) | \
- INT_MASK(INT_SN_ACCESS) | \
- INT_MASK(INT_IDN_ACCESS) | \
- INT_MASK(INT_UDN_ACCESS) | \
- INT_MASK(INT_IDN_REFILL) | \
- INT_MASK(INT_UDN_REFILL) | \
- INT_MASK(INT_IDN_COMPLETE) | \
- INT_MASK(INT_UDN_COMPLETE) | \
- INT_MASK(INT_SWINT_3) | \
- INT_MASK(INT_SWINT_2) | \
- INT_MASK(INT_SWINT_1) | \
- INT_MASK(INT_SWINT_0) | \
- INT_MASK(INT_UNALIGN_DATA) | \
- INT_MASK(INT_DTLB_MISS) | \
- INT_MASK(INT_DTLB_ACCESS) | \
- INT_MASK(INT_SN_STATIC_ACCESS) | \
- 0)
-#define CRITICAL_MASKED_INTERRUPTS ( \
- INT_MASK(INT_MEM_ERROR) | \
- INT_MASK(INT_DMATLB_MISS) | \
- INT_MASK(INT_DMATLB_ACCESS) | \
- INT_MASK(INT_SNITLB_MISS) | \
- INT_MASK(INT_SN_NOTIFY) | \
- INT_MASK(INT_SN_FIREWALL) | \
- INT_MASK(INT_IDN_FIREWALL) | \
- INT_MASK(INT_UDN_FIREWALL) | \
- INT_MASK(INT_TILE_TIMER) | \
- INT_MASK(INT_IDN_TIMER) | \
- INT_MASK(INT_UDN_TIMER) | \
- INT_MASK(INT_DMA_NOTIFY) | \
- INT_MASK(INT_IDN_CA) | \
- INT_MASK(INT_UDN_CA) | \
- INT_MASK(INT_IDN_AVAIL) | \
- INT_MASK(INT_UDN_AVAIL) | \
- INT_MASK(INT_PERF_COUNT) | \
- INT_MASK(INT_INTCTRL_3) | \
- INT_MASK(INT_INTCTRL_2) | \
- INT_MASK(INT_INTCTRL_1) | \
- INT_MASK(INT_INTCTRL_0) | \
- INT_MASK(INT_AUX_PERF_COUNT) | \
- 0)
-#define CRITICAL_UNMASKED_INTERRUPTS ( \
- INT_MASK(INT_ITLB_MISS) | \
- INT_MASK(INT_ILL) | \
- INT_MASK(INT_GPV) | \
- INT_MASK(INT_SN_ACCESS) | \
- INT_MASK(INT_IDN_ACCESS) | \
- INT_MASK(INT_UDN_ACCESS) | \
- INT_MASK(INT_IDN_REFILL) | \
- INT_MASK(INT_UDN_REFILL) | \
- INT_MASK(INT_IDN_COMPLETE) | \
- INT_MASK(INT_UDN_COMPLETE) | \
- INT_MASK(INT_SWINT_3) | \
- INT_MASK(INT_SWINT_2) | \
- INT_MASK(INT_SWINT_1) | \
- INT_MASK(INT_SWINT_0) | \
- INT_MASK(INT_UNALIGN_DATA) | \
- INT_MASK(INT_DTLB_MISS) | \
- INT_MASK(INT_DTLB_ACCESS) | \
- INT_MASK(INT_BOOT_ACCESS) | \
- INT_MASK(INT_WORLD_ACCESS) | \
- INT_MASK(INT_I_ASID) | \
- INT_MASK(INT_D_ASID) | \
- INT_MASK(INT_DMA_ASID) | \
- INT_MASK(INT_SNI_ASID) | \
- INT_MASK(INT_DMA_CPL) | \
- INT_MASK(INT_SN_CPL) | \
- INT_MASK(INT_DOUBLE_FAULT) | \
- INT_MASK(INT_SN_STATIC_ACCESS) | \
- 0)
-#define MASKABLE_INTERRUPTS ( \
- INT_MASK(INT_MEM_ERROR) | \
- INT_MASK(INT_IDN_REFILL) | \
- INT_MASK(INT_UDN_REFILL) | \
- INT_MASK(INT_IDN_COMPLETE) | \
- INT_MASK(INT_UDN_COMPLETE) | \
- INT_MASK(INT_DMATLB_MISS) | \
- INT_MASK(INT_DMATLB_ACCESS) | \
- INT_MASK(INT_SNITLB_MISS) | \
- INT_MASK(INT_SN_NOTIFY) | \
- INT_MASK(INT_SN_FIREWALL) | \
- INT_MASK(INT_IDN_FIREWALL) | \
- INT_MASK(INT_UDN_FIREWALL) | \
- INT_MASK(INT_TILE_TIMER) | \
- INT_MASK(INT_IDN_TIMER) | \
- INT_MASK(INT_UDN_TIMER) | \
- INT_MASK(INT_DMA_NOTIFY) | \
- INT_MASK(INT_IDN_CA) | \
- INT_MASK(INT_UDN_CA) | \
- INT_MASK(INT_IDN_AVAIL) | \
- INT_MASK(INT_UDN_AVAIL) | \
- INT_MASK(INT_PERF_COUNT) | \
- INT_MASK(INT_INTCTRL_3) | \
- INT_MASK(INT_INTCTRL_2) | \
- INT_MASK(INT_INTCTRL_1) | \
- INT_MASK(INT_INTCTRL_0) | \
- INT_MASK(INT_AUX_PERF_COUNT) | \
- 0)
-#define UNMASKABLE_INTERRUPTS ( \
- INT_MASK(INT_ITLB_MISS) | \
- INT_MASK(INT_ILL) | \
- INT_MASK(INT_GPV) | \
- INT_MASK(INT_SN_ACCESS) | \
- INT_MASK(INT_IDN_ACCESS) | \
- INT_MASK(INT_UDN_ACCESS) | \
- INT_MASK(INT_SWINT_3) | \
- INT_MASK(INT_SWINT_2) | \
- INT_MASK(INT_SWINT_1) | \
- INT_MASK(INT_SWINT_0) | \
- INT_MASK(INT_UNALIGN_DATA) | \
- INT_MASK(INT_DTLB_MISS) | \
- INT_MASK(INT_DTLB_ACCESS) | \
- INT_MASK(INT_BOOT_ACCESS) | \
- INT_MASK(INT_WORLD_ACCESS) | \
- INT_MASK(INT_I_ASID) | \
- INT_MASK(INT_D_ASID) | \
- INT_MASK(INT_DMA_ASID) | \
- INT_MASK(INT_SNI_ASID) | \
- INT_MASK(INT_DMA_CPL) | \
- INT_MASK(INT_SN_CPL) | \
- INT_MASK(INT_DOUBLE_FAULT) | \
- INT_MASK(INT_SN_STATIC_ACCESS) | \
- 0)
-#define SYNC_INTERRUPTS ( \
- INT_MASK(INT_ITLB_MISS) | \
- INT_MASK(INT_ILL) | \
- INT_MASK(INT_GPV) | \
- INT_MASK(INT_SN_ACCESS) | \
- INT_MASK(INT_IDN_ACCESS) | \
- INT_MASK(INT_UDN_ACCESS) | \
- INT_MASK(INT_IDN_REFILL) | \
- INT_MASK(INT_UDN_REFILL) | \
- INT_MASK(INT_IDN_COMPLETE) | \
- INT_MASK(INT_UDN_COMPLETE) | \
- INT_MASK(INT_SWINT_3) | \
- INT_MASK(INT_SWINT_2) | \
- INT_MASK(INT_SWINT_1) | \
- INT_MASK(INT_SWINT_0) | \
- INT_MASK(INT_UNALIGN_DATA) | \
- INT_MASK(INT_DTLB_MISS) | \
- INT_MASK(INT_DTLB_ACCESS) | \
- INT_MASK(INT_SN_STATIC_ACCESS) | \
- 0)
-#define NON_SYNC_INTERRUPTS ( \
- INT_MASK(INT_MEM_ERROR) | \
- INT_MASK(INT_DMATLB_MISS) | \
- INT_MASK(INT_DMATLB_ACCESS) | \
- INT_MASK(INT_SNITLB_MISS) | \
- INT_MASK(INT_SN_NOTIFY) | \
- INT_MASK(INT_SN_FIREWALL) | \
- INT_MASK(INT_IDN_FIREWALL) | \
- INT_MASK(INT_UDN_FIREWALL) | \
- INT_MASK(INT_TILE_TIMER) | \
- INT_MASK(INT_IDN_TIMER) | \
- INT_MASK(INT_UDN_TIMER) | \
- INT_MASK(INT_DMA_NOTIFY) | \
- INT_MASK(INT_IDN_CA) | \
- INT_MASK(INT_UDN_CA) | \
- INT_MASK(INT_IDN_AVAIL) | \
- INT_MASK(INT_UDN_AVAIL) | \
- INT_MASK(INT_PERF_COUNT) | \
- INT_MASK(INT_INTCTRL_3) | \
- INT_MASK(INT_INTCTRL_2) | \
- INT_MASK(INT_INTCTRL_1) | \
- INT_MASK(INT_INTCTRL_0) | \
- INT_MASK(INT_BOOT_ACCESS) | \
- INT_MASK(INT_WORLD_ACCESS) | \
- INT_MASK(INT_I_ASID) | \
- INT_MASK(INT_D_ASID) | \
- INT_MASK(INT_DMA_ASID) | \
- INT_MASK(INT_SNI_ASID) | \
- INT_MASK(INT_DMA_CPL) | \
- INT_MASK(INT_SN_CPL) | \
- INT_MASK(INT_DOUBLE_FAULT) | \
- INT_MASK(INT_AUX_PERF_COUNT) | \
- 0)
-#endif /* !__ASSEMBLER__ */
-#endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/arch/tile/include/arch/interrupts_64.h b/arch/tile/include/arch/interrupts_64.h
deleted file mode 100644
index 5bb58b2e4e6..00000000000
--- a/arch/tile/include/arch/interrupts_64.h
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-#ifndef __ARCH_INTERRUPTS_H__
-#define __ARCH_INTERRUPTS_H__
-
-/** Mask for an interrupt. */
-#ifdef __ASSEMBLER__
-/* Note: must handle breaking interrupts into high and low words manually. */
-#define INT_MASK(intno) (1 << (intno))
-#else
-#define INT_MASK(intno) (1ULL << (intno))
-#endif
-
-
-/** Where a given interrupt executes */
-#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8))
-
-/** Where to store a vector for a given interrupt. */
-#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0)
-
-/** The base address of user-level interrupts. */
-#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0)
-
-
-/** Additional synthetic interrupt. */
-#define INT_BREAKPOINT (63)
-
-#define INT_MEM_ERROR 0
-#define INT_SINGLE_STEP_3 1
-#define INT_SINGLE_STEP_2 2
-#define INT_SINGLE_STEP_1 3
-#define INT_SINGLE_STEP_0 4
-#define INT_IDN_COMPLETE 5
-#define INT_UDN_COMPLETE 6
-#define INT_ITLB_MISS 7
-#define INT_ILL 8
-#define INT_GPV 9
-#define INT_IDN_ACCESS 10
-#define INT_UDN_ACCESS 11
-#define INT_SWINT_3 12
-#define INT_SWINT_2 13
-#define INT_SWINT_1 14
-#define INT_SWINT_0 15
-#define INT_ILL_TRANS 16
-#define INT_UNALIGN_DATA 17
-#define INT_DTLB_MISS 18
-#define INT_DTLB_ACCESS 19
-#define INT_IDN_FIREWALL 20
-#define INT_UDN_FIREWALL 21
-#define INT_TILE_TIMER 22
-#define INT_AUX_TILE_TIMER 23
-#define INT_IDN_TIMER 24
-#define INT_UDN_TIMER 25
-#define INT_IDN_AVAIL 26
-#define INT_UDN_AVAIL 27
-#define INT_IPI_3 28
-#define INT_IPI_2 29
-#define INT_IPI_1 30
-#define INT_IPI_0 31
-#define INT_PERF_COUNT 32
-#define INT_AUX_PERF_COUNT 33
-#define INT_INTCTRL_3 34
-#define INT_INTCTRL_2 35
-#define INT_INTCTRL_1 36
-#define INT_INTCTRL_0 37
-#define INT_BOOT_ACCESS 38
-#define INT_WORLD_ACCESS 39
-#define INT_I_ASID 40
-#define INT_D_ASID 41
-#define INT_DOUBLE_FAULT 42
-
-#define NUM_INTERRUPTS 43
-
-#ifndef __ASSEMBLER__
-#define QUEUED_INTERRUPTS ( \
- INT_MASK(INT_MEM_ERROR) | \
- INT_MASK(INT_IDN_COMPLETE) | \
- INT_MASK(INT_UDN_COMPLETE) | \
- INT_MASK(INT_IDN_FIREWALL) | \
- INT_MASK(INT_UDN_FIREWALL) | \
- INT_MASK(INT_TILE_TIMER) | \
- INT_MASK(INT_AUX_TILE_TIMER) | \
- INT_MASK(INT_IDN_TIMER) | \
- INT_MASK(INT_UDN_TIMER) | \
- INT_MASK(INT_IDN_AVAIL) | \
- INT_MASK(INT_UDN_AVAIL) | \
- INT_MASK(INT_IPI_3) | \
- INT_MASK(INT_IPI_2) | \
- INT_MASK(INT_IPI_1) | \
- INT_MASK(INT_IPI_0) | \
- INT_MASK(INT_PERF_COUNT) | \
- INT_MASK(INT_AUX_PERF_COUNT) | \
- INT_MASK(INT_INTCTRL_3) | \
- INT_MASK(INT_INTCTRL_2) | \
- INT_MASK(INT_INTCTRL_1) | \
- INT_MASK(INT_INTCTRL_0) | \
- INT_MASK(INT_BOOT_ACCESS) | \
- INT_MASK(INT_WORLD_ACCESS) | \
- INT_MASK(INT_I_ASID) | \
- INT_MASK(INT_D_ASID) | \
- INT_MASK(INT_DOUBLE_FAULT) | \
- 0)
-#define NONQUEUED_INTERRUPTS ( \
- INT_MASK(INT_SINGLE_STEP_3) | \
- INT_MASK(INT_SINGLE_STEP_2) | \
- INT_MASK(INT_SINGLE_STEP_1) | \
- INT_MASK(INT_SINGLE_STEP_0) | \
- INT_MASK(INT_ITLB_MISS) | \
- INT_MASK(INT_ILL) | \
- INT_MASK(INT_GPV) | \
- INT_MASK(INT_IDN_ACCESS) | \
- INT_MASK(INT_UDN_ACCESS) | \
- INT_MASK(INT_SWINT_3) | \
- INT_MASK(INT_SWINT_2) | \
- INT_MASK(INT_SWINT_1) | \
- INT_MASK(INT_SWINT_0) | \
- INT_MASK(INT_ILL_TRANS) | \
- INT_MASK(INT_UNALIGN_DATA) | \
- INT_MASK(INT_DTLB_MISS) | \
- INT_MASK(INT_DTLB_ACCESS) | \
- 0)
-#define CRITICAL_MASKED_INTERRUPTS ( \
- INT_MASK(INT_MEM_ERROR) | \
- INT_MASK(INT_SINGLE_STEP_3) | \
- INT_MASK(INT_SINGLE_STEP_2) | \
- INT_MASK(INT_SINGLE_STEP_1) | \
- INT_MASK(INT_SINGLE_STEP_0) | \
- INT_MASK(INT_IDN_COMPLETE) | \
- INT_MASK(INT_UDN_COMPLETE) | \
- INT_MASK(INT_IDN_FIREWALL) | \
- INT_MASK(INT_UDN_FIREWALL) | \
- INT_MASK(INT_TILE_TIMER) | \
- INT_MASK(INT_AUX_TILE_TIMER) | \
- INT_MASK(INT_IDN_TIMER) | \
- INT_MASK(INT_UDN_TIMER) | \
- INT_MASK(INT_IDN_AVAIL) | \
- INT_MASK(INT_UDN_AVAIL) | \
- INT_MASK(INT_IPI_3) | \
- INT_MASK(INT_IPI_2) | \
- INT_MASK(INT_IPI_1) | \
- INT_MASK(INT_IPI_0) | \
- INT_MASK(INT_PERF_COUNT) | \
- INT_MASK(INT_AUX_PERF_COUNT) | \
- INT_MASK(INT_INTCTRL_3) | \
- INT_MASK(INT_INTCTRL_2) | \
- INT_MASK(INT_INTCTRL_1) | \
- INT_MASK(INT_INTCTRL_0) | \
- 0)
-#define CRITICAL_UNMASKED_INTERRUPTS ( \
- INT_MASK(INT_ITLB_MISS) | \
- INT_MASK(INT_ILL) | \
- INT_MASK(INT_GPV) | \
- INT_MASK(INT_IDN_ACCESS) | \
- INT_MASK(INT_UDN_ACCESS) | \
- INT_MASK(INT_SWINT_3) | \
- INT_MASK(INT_SWINT_2) | \
- INT_MASK(INT_SWINT_1) | \
- INT_MASK(INT_SWINT_0) | \
- INT_MASK(INT_ILL_TRANS) | \
- INT_MASK(INT_UNALIGN_DATA) | \
- INT_MASK(INT_DTLB_MISS) | \
- INT_MASK(INT_DTLB_ACCESS) | \
- INT_MASK(INT_BOOT_ACCESS) | \
- INT_MASK(INT_WORLD_ACCESS) | \
- INT_MASK(INT_I_ASID) | \
- INT_MASK(INT_D_ASID) | \
- INT_MASK(INT_DOUBLE_FAULT) | \
- 0)
-#define MASKABLE_INTERRUPTS ( \
- INT_MASK(INT_MEM_ERROR) | \
- INT_MASK(INT_SINGLE_STEP_3) | \
- INT_MASK(INT_SINGLE_STEP_2) | \
- INT_MASK(INT_SINGLE_STEP_1) | \
- INT_MASK(INT_SINGLE_STEP_0) | \
- INT_MASK(INT_IDN_COMPLETE) | \
- INT_MASK(INT_UDN_COMPLETE) | \
- INT_MASK(INT_IDN_FIREWALL) | \
- INT_MASK(INT_UDN_FIREWALL) | \
- INT_MASK(INT_TILE_TIMER) | \
- INT_MASK(INT_AUX_TILE_TIMER) | \
- INT_MASK(INT_IDN_TIMER) | \
- INT_MASK(INT_UDN_TIMER) | \
- INT_MASK(INT_IDN_AVAIL) | \
- INT_MASK(INT_UDN_AVAIL) | \
- INT_MASK(INT_IPI_3) | \
- INT_MASK(INT_IPI_2) | \
- INT_MASK(INT_IPI_1) | \
- INT_MASK(INT_IPI_0) | \
- INT_MASK(INT_PERF_COUNT) | \
- INT_MASK(INT_AUX_PERF_COUNT) | \
- INT_MASK(INT_INTCTRL_3) | \
- INT_MASK(INT_INTCTRL_2) | \
- INT_MASK(INT_INTCTRL_1) | \
- INT_MASK(INT_INTCTRL_0) | \
- 0)
-#define UNMASKABLE_INTERRUPTS ( \
- INT_MASK(INT_ITLB_MISS) | \
- INT_MASK(INT_ILL) | \
- INT_MASK(INT_GPV) | \
- INT_MASK(INT_IDN_ACCESS) | \
- INT_MASK(INT_UDN_ACCESS) | \
- INT_MASK(INT_SWINT_3) | \
- INT_MASK(INT_SWINT_2) | \
- INT_MASK(INT_SWINT_1) | \
- INT_MASK(INT_SWINT_0) | \
- INT_MASK(INT_ILL_TRANS) | \
- INT_MASK(INT_UNALIGN_DATA) | \
- INT_MASK(INT_DTLB_MISS) | \
- INT_MASK(INT_DTLB_ACCESS) | \
- INT_MASK(INT_BOOT_ACCESS) | \
- INT_MASK(INT_WORLD_ACCESS) | \
- INT_MASK(INT_I_ASID) | \
- INT_MASK(INT_D_ASID) | \
- INT_MASK(INT_DOUBLE_FAULT) | \
- 0)
-#define SYNC_INTERRUPTS ( \
- INT_MASK(INT_SINGLE_STEP_3) | \
- INT_MASK(INT_SINGLE_STEP_2) | \
- INT_MASK(INT_SINGLE_STEP_1) | \
- INT_MASK(INT_SINGLE_STEP_0) | \
- INT_MASK(INT_IDN_COMPLETE) | \
- INT_MASK(INT_UDN_COMPLETE) | \
- INT_MASK(INT_ITLB_MISS) | \
- INT_MASK(INT_ILL) | \
- INT_MASK(INT_GPV) | \
- INT_MASK(INT_IDN_ACCESS) | \
- INT_MASK(INT_UDN_ACCESS) | \
- INT_MASK(INT_SWINT_3) | \
- INT_MASK(INT_SWINT_2) | \
- INT_MASK(INT_SWINT_1) | \
- INT_MASK(INT_SWINT_0) | \
- INT_MASK(INT_ILL_TRANS) | \
- INT_MASK(INT_UNALIGN_DATA) | \
- INT_MASK(INT_DTLB_MISS) | \
- INT_MASK(INT_DTLB_ACCESS) | \
- 0)
-#define NON_SYNC_INTERRUPTS ( \
- INT_MASK(INT_MEM_ERROR) | \
- INT_MASK(INT_IDN_FIREWALL) | \
- INT_MASK(INT_UDN_FIREWALL) | \
- INT_MASK(INT_TILE_TIMER) | \
- INT_MASK(INT_AUX_TILE_TIMER) | \
- INT_MASK(INT_IDN_TIMER) | \
- INT_MASK(INT_UDN_TIMER) | \
- INT_MASK(INT_IDN_AVAIL) | \
- INT_MASK(INT_UDN_AVAIL) | \
- INT_MASK(INT_IPI_3) | \
- INT_MASK(INT_IPI_2) | \
- INT_MASK(INT_IPI_1) | \
- INT_MASK(INT_IPI_0) | \
- INT_MASK(INT_PERF_COUNT) | \
- INT_MASK(INT_AUX_PERF_COUNT) | \
- INT_MASK(INT_INTCTRL_3) | \
- INT_MASK(INT_INTCTRL_2) | \
- INT_MASK(INT_INTCTRL_1) | \
- INT_MASK(INT_INTCTRL_0) | \
- INT_MASK(INT_BOOT_ACCESS) | \
- INT_MASK(INT_WORLD_ACCESS) | \
- INT_MASK(INT_I_ASID) | \
- INT_MASK(INT_D_ASID) | \
- INT_MASK(INT_DOUBLE_FAULT) | \
- 0)
-#endif /* !__ASSEMBLER__ */
-#endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/arch/tile/include/arch/mpipe.h b/arch/tile/include/arch/mpipe.h
new file mode 100644
index 00000000000..904538e754d
--- /dev/null
+++ b/arch/tile/include/arch/mpipe.h
@@ -0,0 +1,371 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_MPIPE_H__
+#define __ARCH_MPIPE_H__
+
+#include <arch/abi.h>
+#include <arch/mpipe_def.h>
+
+#ifndef __ASSEMBLER__
+
+/*
+ * MMIO Ingress DMA Release Region Address.
+ * This is a description of the physical addresses used to manipulate ingress
+ * credit counters. Accesses to this address space should use an address of
+ * this form and a value like that specified in IDMA_RELEASE_REGION_VAL.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 3;
+ /* NotifRing to be released */
+ uint_reg_t ring : 8;
+ /* Bucket to be released */
+ uint_reg_t bucket : 13;
+ /* Enable NotifRing release */
+ uint_reg_t ring_enable : 1;
+ /* Enable Bucket release */
+ uint_reg_t bucket_enable : 1;
+ /*
+ * This field of the address selects the region (address space) to be
+ * accessed. For the iDMA release region, this field must be 4.
+ */
+ uint_reg_t region : 3;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 6;
+ /* This field of the address indexes the 32 entry service domain table. */
+ uint_reg_t svc_dom : 5;
+ /* Reserved. */
+ uint_reg_t __reserved_2 : 24;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_2 : 24;
+ uint_reg_t svc_dom : 5;
+ uint_reg_t __reserved_1 : 6;
+ uint_reg_t region : 3;
+ uint_reg_t bucket_enable : 1;
+ uint_reg_t ring_enable : 1;
+ uint_reg_t bucket : 13;
+ uint_reg_t ring : 8;
+ uint_reg_t __reserved_0 : 3;
+#endif
+ };
+
+ uint_reg_t word;
+} MPIPE_IDMA_RELEASE_REGION_ADDR_t;
+
+/*
+ * MMIO Ingress DMA Release Region Value - Release NotifRing and/or Bucket.
+ * Provides release of the associated NotifRing. The address of the MMIO
+ * operation is described in IDMA_RELEASE_REGION_ADDR.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /*
+ * Number of packets being released. The load balancer's count of
+ * inflight packets will be decremented by this amount for the associated
+ * Bucket and/or NotifRing
+ */
+ uint_reg_t count : 16;
+ /* Reserved. */
+ uint_reg_t __reserved : 48;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved : 48;
+ uint_reg_t count : 16;
+#endif
+ };
+
+ uint_reg_t word;
+} MPIPE_IDMA_RELEASE_REGION_VAL_t;
+
+/*
+ * MMIO Buffer Stack Manager Region Address.
+ * This MMIO region is used for posting or fetching buffers to/from the
+ * buffer stack manager. On an MMIO load, this pops a buffer descriptor from
+ * the top of stack if one is available. On an MMIO store, this pushes a
+ * buffer to the stack. The value read or written is described in
+ * BSM_REGION_VAL.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 3;
+ /* BufferStack being accessed. */
+ uint_reg_t stack : 5;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 18;
+ /*
+ * This field of the address selects the region (address space) to be
+ * accessed. For the buffer stack manager region, this field must be 6.
+ */
+ uint_reg_t region : 3;
+ /* Reserved. */
+ uint_reg_t __reserved_2 : 6;
+ /* This field of the address indexes the 32 entry service domain table. */
+ uint_reg_t svc_dom : 5;
+ /* Reserved. */
+ uint_reg_t __reserved_3 : 24;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_3 : 24;
+ uint_reg_t svc_dom : 5;
+ uint_reg_t __reserved_2 : 6;
+ uint_reg_t region : 3;
+ uint_reg_t __reserved_1 : 18;
+ uint_reg_t stack : 5;
+ uint_reg_t __reserved_0 : 3;
+#endif
+ };
+
+ uint_reg_t word;
+} MPIPE_BSM_REGION_ADDR_t;
+
+/*
+ * MMIO Buffer Stack Manager Region Value.
+ * This MMIO region is used for posting or fetching buffers to/from the
+ * buffer stack manager. On an MMIO load, this pops a buffer descriptor from
+ * the top of stack if one is available. On an MMIO store, this pushes a
+ * buffer to the stack. The address of the MMIO operation is described in
+ * BSM_REGION_ADDR.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 7;
+ /*
+ * Base virtual address of the buffer. Must be sign extended by consumer.
+ */
+ int_reg_t va : 35;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 6;
+ /*
+ * Index of the buffer stack to which this buffer belongs. Ignored on
+ * writes since the offset bits specify the stack being accessed.
+ */
+ uint_reg_t stack_idx : 5;
+ /* Reserved. */
+ uint_reg_t __reserved_2 : 3;
+ /*
+ * Instance ID. For devices that support automatic buffer return between
+ * mPIPE instances, this field indicates the buffer owner. If the INST
+ * field does not match the mPIPE's instance number when a packet is
+ * egressed, buffers with HWB set will be returned to the other mPIPE
+ * instance. Note that not all devices support multi-mPIPE buffer
+ * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates
+ * whether the INST field in the buffer descriptor is populated by iDMA
+ * hardware. This field is ignored on writes.
+ */
+ uint_reg_t inst : 2;
+ /*
+ * Reads as one to indicate that this is a hardware managed buffer.
+ * Ignored on writes since all buffers on a given stack are the same size.
+ */
+ uint_reg_t hwb : 1;
+ /*
+ * Encoded size of buffer (ignored on writes):
+ * 0 = 128 bytes
+ * 1 = 256 bytes
+ * 2 = 512 bytes
+ * 3 = 1024 bytes
+ * 4 = 1664 bytes
+ * 5 = 4096 bytes
+ * 6 = 10368 bytes
+ * 7 = 16384 bytes
+ */
+ uint_reg_t size : 3;
+ /*
+ * Valid indication for the buffer. Ignored on writes.
+ * 0 : Valid buffer descriptor popped from stack.
+ * 3 : Could not pop a buffer from the stack. Either the stack is empty,
+ * or the hardware's prefetch buffer is empty for this stack.
+ */
+ uint_reg_t c : 2;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t c : 2;
+ uint_reg_t size : 3;
+ uint_reg_t hwb : 1;
+ uint_reg_t inst : 2;
+ uint_reg_t __reserved_2 : 3;
+ uint_reg_t stack_idx : 5;
+ uint_reg_t __reserved_1 : 6;
+ int_reg_t va : 35;
+ uint_reg_t __reserved_0 : 7;
+#endif
+ };
+
+ uint_reg_t word;
+} MPIPE_BSM_REGION_VAL_t;
+
+/*
+ * MMIO Egress DMA Post Region Address.
+ * Used to post descriptor locations to the eDMA descriptor engine. The
+ * value to be written is described in EDMA_POST_REGION_VAL
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 3;
+ /* eDMA ring being accessed */
+ uint_reg_t ring : 6;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 17;
+ /*
+ * This field of the address selects the region (address space) to be
+ * accessed. For the egress DMA post region, this field must be 5.
+ */
+ uint_reg_t region : 3;
+ /* Reserved. */
+ uint_reg_t __reserved_2 : 6;
+ /* This field of the address indexes the 32 entry service domain table. */
+ uint_reg_t svc_dom : 5;
+ /* Reserved. */
+ uint_reg_t __reserved_3 : 24;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_3 : 24;
+ uint_reg_t svc_dom : 5;
+ uint_reg_t __reserved_2 : 6;
+ uint_reg_t region : 3;
+ uint_reg_t __reserved_1 : 17;
+ uint_reg_t ring : 6;
+ uint_reg_t __reserved_0 : 3;
+#endif
+ };
+
+ uint_reg_t word;
+} MPIPE_EDMA_POST_REGION_ADDR_t;
+
+/*
+ * MMIO Egress DMA Post Region Value.
+ * Used to post descriptor locations to the eDMA descriptor engine. The
+ * address is described in EDMA_POST_REGION_ADDR.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /*
+ * For writes, this specifies the current ring tail pointer prior to any
+ * post. For example, to post 1 or more descriptors starting at location
+ * 23, this would contain 23 (not 24). On writes, this index must be
+ * masked based on the ring size. The new tail pointer after this post
+ * is COUNT+RING_IDX (masked by the ring size).
+ *
+ * For reads, this provides the hardware descriptor fetcher's head
+ * pointer. The descriptors prior to the head pointer, however, may not
+ * yet have been processed so this indicator is only used to determine
+ * how full the ring is and if software may post more descriptors.
+ */
+ uint_reg_t ring_idx : 16;
+ /*
+ * For writes, this specifies number of contiguous descriptors that are
+ * being posted. Software may post up to RingSize descriptors with a
+ * single MMIO store. A zero in this field on a write will "wake up" an
+ * eDMA ring and cause it fetch descriptors regardless of the hardware's
+ * current view of the state of the tail pointer.
+ *
+ * For reads, this field provides a rolling count of the number of
+ * descriptors that have been completely processed. This may be used by
+ * software to determine when buffers associated with a descriptor may be
+ * returned or reused. When the ring's flush bit is cleared by software
+ * (after having been set by HW or SW), the COUNT will be cleared.
+ */
+ uint_reg_t count : 16;
+ /*
+ * For writes, this specifies the generation number of the tail being
+ * posted. Note that if tail+cnt wraps to the beginning of the ring, the
+ * eDMA hardware assumes that the descriptors posted at the beginning of
+ * the ring are also valid so it is okay to post around the wrap point.
+ *
+ * For reads, this is the current generation number. Valid descriptors
+ * will have the inverse of this generation number.
+ */
+ uint_reg_t gen : 1;
+ /* Reserved. */
+ uint_reg_t __reserved : 31;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved : 31;
+ uint_reg_t gen : 1;
+ uint_reg_t count : 16;
+ uint_reg_t ring_idx : 16;
+#endif
+ };
+
+ uint_reg_t word;
+} MPIPE_EDMA_POST_REGION_VAL_t;
+
+/*
+ * Load Balancer Bucket Status Data.
+ * Read/Write data for load balancer Bucket-Status Table. 4160 entries
+ * indexed by LBL_INIT_CTL.IDX when LBL_INIT_CTL.STRUCT_SEL is BSTS_TBL
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* NotifRing currently assigned to this bucket. */
+ uint_reg_t notifring : 8;
+ /* Current reference count. */
+ uint_reg_t count : 16;
+ /* Group associated with this bucket. */
+ uint_reg_t group : 5;
+ /* Mode select for this bucket. */
+ uint_reg_t mode : 3;
+ /* Reserved. */
+ uint_reg_t __reserved : 32;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved : 32;
+ uint_reg_t mode : 3;
+ uint_reg_t group : 5;
+ uint_reg_t count : 16;
+ uint_reg_t notifring : 8;
+#endif
+ };
+
+ uint_reg_t word;
+} MPIPE_LBL_INIT_DAT_BSTS_TBL_t;
+#endif /* !defined(__ASSEMBLER__) */
+
+#endif /* !defined(__ARCH_MPIPE_H__) */
diff --git a/arch/tile/include/arch/mpipe_constants.h b/arch/tile/include/arch/mpipe_constants.h
new file mode 100644
index 00000000000..84022ac5fe8
--- /dev/null
+++ b/arch/tile/include/arch/mpipe_constants.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+
+#ifndef __ARCH_MPIPE_CONSTANTS_H__
+#define __ARCH_MPIPE_CONSTANTS_H__
+
+#define MPIPE_NUM_CLASSIFIERS 16
+#define MPIPE_CLS_MHZ 1200
+
+#define MPIPE_NUM_EDMA_RINGS 64
+
+#define MPIPE_NUM_SGMII_MACS 16
+#define MPIPE_NUM_XAUI_MACS 16
+#define MPIPE_NUM_LOOPBACK_CHANNELS 4
+#define MPIPE_NUM_NON_LB_CHANNELS 28
+
+#define MPIPE_NUM_IPKT_BLOCKS 1536
+
+#define MPIPE_NUM_BUCKETS 4160
+
+#define MPIPE_NUM_NOTIF_RINGS 256
+
+#define MPIPE_NUM_NOTIF_GROUPS 32
+
+#define MPIPE_NUM_TLBS_PER_ASID 16
+#define MPIPE_TLB_IDX_WIDTH 4
+
+#define MPIPE_MMIO_NUM_SVC_DOM 32
+
+#endif /* __ARCH_MPIPE_CONSTANTS_H__ */
diff --git a/arch/tile/include/arch/mpipe_def.h b/arch/tile/include/arch/mpipe_def.h
new file mode 100644
index 00000000000..c3d30217fc6
--- /dev/null
+++ b/arch/tile/include/arch/mpipe_def.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_MPIPE_DEF_H__
+#define __ARCH_MPIPE_DEF_H__
+#define MPIPE_MMIO_ADDR__REGION_SHIFT 26
+#define MPIPE_MMIO_ADDR__REGION_VAL_CFG 0x0
+#define MPIPE_MMIO_ADDR__REGION_VAL_IDMA 0x4
+#define MPIPE_MMIO_ADDR__REGION_VAL_EDMA 0x5
+#define MPIPE_MMIO_ADDR__REGION_VAL_BSM 0x6
+#define MPIPE_BSM_REGION_VAL__VA_SHIFT 7
+#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_128 0x0
+#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_256 0x1
+#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_512 0x2
+#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1024 0x3
+#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1664 0x4
+#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_4096 0x5
+#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_10368 0x6
+#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_16384 0x7
+#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_DFA 0x0
+#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_FIXED 0x1
+#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_ALWAYS_PICK 0x2
+#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY 0x3
+#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY_RAND 0x7
+#define MPIPE_LBL_NR_STATE__FIRST_WORD 0x2138
+#endif /* !defined(__ARCH_MPIPE_DEF_H__) */
diff --git a/arch/tile/include/arch/mpipe_shm.h b/arch/tile/include/arch/mpipe_shm.h
new file mode 100644
index 00000000000..13b3c4300e5
--- /dev/null
+++ b/arch/tile/include/arch/mpipe_shm.h
@@ -0,0 +1,521 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+
+#ifndef __ARCH_MPIPE_SHM_H__
+#define __ARCH_MPIPE_SHM_H__
+
+#include <arch/abi.h>
+#include <arch/mpipe_shm_def.h>
+
+#ifndef __ASSEMBLER__
+/**
+ * MPIPE eDMA Descriptor.
+ * The eDMA descriptor is written by software and consumed by hardware. It
+ * is used to specify the location of egress packet data to be sent out of
+ * the chip via one of the packet interfaces.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+ /* Word 0 */
+
+#ifndef __BIG_ENDIAN__
+ /**
+ * Generation number. Used to indicate a valid descriptor in ring. When
+ * a new descriptor is written into the ring, software must toggle this
+ * bit. The net effect is that the GEN bit being written into new
+ * descriptors toggles each time the ring tail pointer wraps.
+ */
+ uint_reg_t gen : 1;
+ /**
+ * For devices with EDMA reorder support, this field allows the
+ * descriptor to select the egress FIFO. The associated DMA ring must
+ * have ALLOW_EFIFO_SEL enabled.
+ */
+ uint_reg_t efifo_sel : 6;
+ /** Reserved. Must be zero. */
+ uint_reg_t r0 : 1;
+ /** Checksum generation enabled for this transfer. */
+ uint_reg_t csum : 1;
+ /**
+ * Nothing to be sent. Used, for example, when software has dropped a
+ * packet but still wishes to return all of the associated buffers.
+ */
+ uint_reg_t ns : 1;
+ /**
+ * Notification interrupt will be delivered when packet has been egressed.
+ */
+ uint_reg_t notif : 1;
+ /**
+ * Boundary indicator. When 1, this transfer includes the EOP for this
+ * command. Must be clear on all but the last descriptor for an egress
+ * packet.
+ */
+ uint_reg_t bound : 1;
+ /** Reserved. Must be zero. */
+ uint_reg_t r1 : 4;
+ /**
+ * Number of bytes to be sent for this descriptor. When zero, no data
+ * will be moved and the buffer descriptor will be ignored. If the
+ * buffer descriptor indicates that it is chained, the low 7 bits of the
+ * VA indicate the offset within the first buffer (e.g. 127 bytes is the
+ * maximum offset into the first buffer). If the size exceeds a single
+ * buffer, subsequent buffer descriptors will be fetched prior to
+ * processing the next eDMA descriptor in the ring.
+ */
+ uint_reg_t xfer_size : 14;
+ /** Reserved. Must be zero. */
+ uint_reg_t r2 : 2;
+ /**
+ * Destination of checksum relative to CSUM_START relative to the first
+ * byte moved by this descriptor. Must be zero if CSUM=0 in this
+ * descriptor. Must be less than XFER_SIZE (e.g. the first byte of the
+ * CSUM_DEST must be within the span of this descriptor).
+ */
+ uint_reg_t csum_dest : 8;
+ /**
+ * Start byte of checksum relative to the first byte moved by this
+ * descriptor. If this is not the first descriptor for the egress
+ * packet, CSUM_START is still relative to the first byte in this
+ * descriptor. Must be zero if CSUM=0 in this descriptor.
+ */
+ uint_reg_t csum_start : 8;
+ /**
+ * Initial value for 16-bit 1's compliment checksum if enabled via CSUM.
+ * Specified in network order. That is, bits[7:0] will be added to the
+ * byte pointed to by CSUM_START and bits[15:8] will be added to the byte
+ * pointed to by CSUM_START+1 (with appropriate 1's compliment carries).
+ * Must be zero if CSUM=0 in this descriptor.
+ */
+ uint_reg_t csum_seed : 16;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t csum_seed : 16;
+ uint_reg_t csum_start : 8;
+ uint_reg_t csum_dest : 8;
+ uint_reg_t r2 : 2;
+ uint_reg_t xfer_size : 14;
+ uint_reg_t r1 : 4;
+ uint_reg_t bound : 1;
+ uint_reg_t notif : 1;
+ uint_reg_t ns : 1;
+ uint_reg_t csum : 1;
+ uint_reg_t r0 : 1;
+ uint_reg_t efifo_sel : 6;
+ uint_reg_t gen : 1;
+#endif
+
+ /* Word 1 */
+
+#ifndef __BIG_ENDIAN__
+ /** Virtual address. Must be sign extended by consumer. */
+ int_reg_t va : 42;
+ /** Reserved. */
+ uint_reg_t __reserved_0 : 6;
+ /** Index of the buffer stack to which this buffer belongs. */
+ uint_reg_t stack_idx : 5;
+ /** Reserved. */
+ uint_reg_t __reserved_1 : 3;
+ /**
+ * Instance ID. For devices that support automatic buffer return between
+ * mPIPE instances, this field indicates the buffer owner. If the INST
+ * field does not match the mPIPE's instance number when a packet is
+ * egressed, buffers with HWB set will be returned to the other mPIPE
+ * instance. Note that not all devices support multi-mPIPE buffer
+ * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates
+ * whether the INST field in the buffer descriptor is populated by iDMA
+ * hardware.
+ */
+ uint_reg_t inst : 2;
+ /**
+ * Always set to one by hardware in iDMA packet descriptors. For eDMA,
+ * indicates whether the buffer will be released to the buffer stack
+ * manager. When 0, software is responsible for releasing the buffer.
+ */
+ uint_reg_t hwb : 1;
+ /**
+ * Encoded size of buffer. Set by the ingress hardware for iDMA packet
+ * descriptors. For eDMA descriptors, indicates the buffer size if .c
+ * indicates a chained packet. If an eDMA descriptor is not chained and
+ * the .hwb bit is not set, this field is ignored and the size is
+ * specified by the .xfer_size field.
+ * 0 = 128 bytes
+ * 1 = 256 bytes
+ * 2 = 512 bytes
+ * 3 = 1024 bytes
+ * 4 = 1664 bytes
+ * 5 = 4096 bytes
+ * 6 = 10368 bytes
+ * 7 = 16384 bytes
+ */
+ uint_reg_t size : 3;
+ /**
+ * Chaining configuration for the buffer. Indicates that an ingress
+ * packet or egress command is chained across multiple buffers, with each
+ * buffer's size indicated by the .size field.
+ */
+ uint_reg_t c : 2;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t c : 2;
+ uint_reg_t size : 3;
+ uint_reg_t hwb : 1;
+ uint_reg_t inst : 2;
+ uint_reg_t __reserved_1 : 3;
+ uint_reg_t stack_idx : 5;
+ uint_reg_t __reserved_0 : 6;
+ int_reg_t va : 42;
+#endif
+
+ };
+
+ /** Word access */
+ uint_reg_t words[2];
+} MPIPE_EDMA_DESC_t;
+
+/**
+ * MPIPE Packet Descriptor.
+ * The packet descriptor is filled by the mPIPE's classification,
+ * load-balancing, and buffer management services. Some fields are consumed
+ * by mPIPE hardware, and others are consumed by Tile software.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+ /* Word 0 */
+
+#ifndef __BIG_ENDIAN__
+ /**
+ * Notification ring into which this packet descriptor is written.
+ * Typically written by load balancer, but can be overridden by
+ * classification program if NR is asserted.
+ */
+ uint_reg_t notif_ring : 8;
+ /** Source channel for this packet. Written by mPIPE DMA hardware. */
+ uint_reg_t channel : 5;
+ /** Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /**
+ * MAC Error.
+ * Generated by the MAC interface. Asserted if there was an overrun of
+ * the MAC's receive FIFO. This condition generally only occurs if the
+ * mPIPE clock is running too slowly.
+ */
+ uint_reg_t me : 1;
+ /**
+ * Truncation Error.
+ * Written by the iDMA hardware. Asserted if packet was truncated due to
+ * insufficient space in iPkt buffer
+ */
+ uint_reg_t tr : 1;
+ /**
+ * Written by the iDMA hardware. Indicates the number of bytes written
+ * to Tile memory. In general, this is the actual size of the packet as
+ * received from the MAC. But if the packet is truncated due to running
+ * out of buffers or due to the iPkt buffer filling up, then the L2_SIZE
+ * will be reduced to reflect the actual number of valid bytes written to
+ * Tile memory.
+ */
+ uint_reg_t l2_size : 14;
+ /**
+ * CRC Error.
+ * Generated by the MAC. Asserted if MAC indicated an L2 CRC error or
+ * other L2 error (bad length etc.) on the packet.
+ */
+ uint_reg_t ce : 1;
+ /**
+ * Cut Through.
+ * Written by the iDMA hardware. Asserted if packet was not completely
+ * received before being sent to classifier. L2_Size will indicate
+ * number of bytes received so far.
+ */
+ uint_reg_t ct : 1;
+ /**
+ * Written by the classification program. Used by the load balancer to
+ * select the ring into which this packet descriptor is written.
+ */
+ uint_reg_t bucket_id : 13;
+ /** Reserved. */
+ uint_reg_t __reserved_1 : 3;
+ /**
+ * Checksum.
+ * Written by classification program. When 1, the checksum engine will
+ * perform checksum based on the CSUM_SEED, CSUM_START, and CSUM_BYTES
+ * fields. The result will be placed in CSUM_VAL.
+ */
+ uint_reg_t cs : 1;
+ /**
+ * Notification Ring Select.
+ * Written by the classification program. When 1, the NotifRingIDX is
+ * set by classification program rather than being set by load balancer.
+ */
+ uint_reg_t nr : 1;
+ /**
+ * Written by classification program. Indicates whether packet and
+ * descriptor should both be dropped, both be delivered, or only the
+ * descriptor should be delivered.
+ */
+ uint_reg_t dest : 2;
+ /**
+ * General Purpose Sequence Number Enable.
+ * Written by the classification program. When 1, the GP_SQN_SEL field
+ * contains the sequence number selector and the GP_SQN field will be
+ * replaced with the associated sequence number. When clear, the GP_SQN
+ * field is left intact and be used as "Custom" bytes.
+ */
+ uint_reg_t sq : 1;
+ /**
+ * TimeStamp Enable.
+ * Enable TimeStamp insertion. When clear, timestamp field may be filled
+ * with custom data by classifier. When set, hardware inserts the
+ * timestamp when the start of packet is received from the MAC.
+ */
+ uint_reg_t ts : 1;
+ /**
+ * Packet Sequence Number Enable.
+ * Enable PacketSQN insertion. When clear, PacketSQN field may be filled
+ * with custom data by classifier. When set, hardware inserts the packet
+ * sequence number when the packet descriptor is written to a
+ * notification ring.
+ */
+ uint_reg_t ps : 1;
+ /**
+ * Buffer Error.
+ * Written by the iDMA hardware. Asserted if iDMA ran out of buffers
+ * while writing the packet. Software must still return any buffer
+ * descriptors whose C field indicates a valid descriptor was consumed.
+ */
+ uint_reg_t be : 1;
+ /**
+ * Written by the classification program. The associated counter is
+ * incremented when the packet is sent.
+ */
+ uint_reg_t ctr0 : 5;
+ /** Reserved. */
+ uint_reg_t __reserved_2 : 3;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_2 : 3;
+ uint_reg_t ctr0 : 5;
+ uint_reg_t be : 1;
+ uint_reg_t ps : 1;
+ uint_reg_t ts : 1;
+ uint_reg_t sq : 1;
+ uint_reg_t dest : 2;
+ uint_reg_t nr : 1;
+ uint_reg_t cs : 1;
+ uint_reg_t __reserved_1 : 3;
+ uint_reg_t bucket_id : 13;
+ uint_reg_t ct : 1;
+ uint_reg_t ce : 1;
+ uint_reg_t l2_size : 14;
+ uint_reg_t tr : 1;
+ uint_reg_t me : 1;
+ uint_reg_t __reserved_0 : 1;
+ uint_reg_t channel : 5;
+ uint_reg_t notif_ring : 8;
+#endif
+
+ /* Word 1 */
+
+#ifndef __BIG_ENDIAN__
+ /**
+ * Written by the classification program. The associated counter is
+ * incremented when the packet is sent.
+ */
+ uint_reg_t ctr1 : 5;
+ /** Reserved. */
+ uint_reg_t __reserved_3 : 3;
+ /**
+ * Written by classification program. Indicates the start byte for
+ * checksum. Relative to 1st byte received from MAC.
+ */
+ uint_reg_t csum_start : 8;
+ /**
+ * Checksum seed written by classification program. Overwritten with
+ * resultant checksum if CS bit is asserted. The endianness of the CSUM
+ * value bits when viewed by Tile software match the packet byte order.
+ * That is, bits[7:0] of the resulting checksum value correspond to
+ * earlier (more significant) bytes in the packet. To avoid classifier
+ * software from having to byte swap the CSUM_SEED, the iDMA checksum
+ * engine byte swaps the classifier's result before seeding the checksum
+ * calculation. Thus, the CSUM_START byte of packet data is added to
+ * bits[15:8] of the CSUM_SEED field generated by the classifier. This
+ * byte swap will be visible to Tile software if the CS bit is clear.
+ */
+ uint_reg_t csum_seed_val : 16;
+ /**
+ * Written by the classification program. Not interpreted by mPIPE
+ * hardware.
+ */
+ uint_reg_t custom0 : 32;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t custom0 : 32;
+ uint_reg_t csum_seed_val : 16;
+ uint_reg_t csum_start : 8;
+ uint_reg_t __reserved_3 : 3;
+ uint_reg_t ctr1 : 5;
+#endif
+
+ /* Word 2 */
+
+#ifndef __BIG_ENDIAN__
+ /**
+ * Written by the classification program. Not interpreted by mPIPE
+ * hardware.
+ */
+ uint_reg_t custom1 : 64;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t custom1 : 64;
+#endif
+
+ /* Word 3 */
+
+#ifndef __BIG_ENDIAN__
+ /**
+ * Written by the classification program. Not interpreted by mPIPE
+ * hardware.
+ */
+ uint_reg_t custom2 : 64;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t custom2 : 64;
+#endif
+
+ /* Word 4 */
+
+#ifndef __BIG_ENDIAN__
+ /**
+ * Written by the classification program. Not interpreted by mPIPE
+ * hardware.
+ */
+ uint_reg_t custom3 : 64;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t custom3 : 64;
+#endif
+
+ /* Word 5 */
+
+#ifndef __BIG_ENDIAN__
+ /**
+ * Sequence number applied when packet is distributed. Classifier
+ * selects which sequence number is to be applied by writing the 13-bit
+ * SQN-selector into this field. For devices that support EXT_SQN (as
+ * indicated in IDMA_INFO.EXT_SQN_SUPPORT), the GP_SQN can be extended to
+ * 32-bits via the IDMA_CTL.EXT_SQN register. In this case the
+ * PACKET_SQN will be reduced to 32 bits.
+ */
+ uint_reg_t gp_sqn : 16;
+ /**
+ * Written by notification hardware. The packet sequence number is
+ * incremented for each packet that wasn't dropped.
+ */
+ uint_reg_t packet_sqn : 48;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t packet_sqn : 48;
+ uint_reg_t gp_sqn : 16;
+#endif
+
+ /* Word 6 */
+
+#ifndef __BIG_ENDIAN__
+ /**
+ * Written by hardware when the start-of-packet is received by the mPIPE
+ * from the MAC. This is the nanoseconds part of the packet timestamp.
+ */
+ uint_reg_t time_stamp_ns : 32;
+ /**
+ * Written by hardware when the start-of-packet is received by the mPIPE
+ * from the MAC. This is the seconds part of the packet timestamp.
+ */
+ uint_reg_t time_stamp_sec : 32;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t time_stamp_sec : 32;
+ uint_reg_t time_stamp_ns : 32;
+#endif
+
+ /* Word 7 */
+
+#ifndef __BIG_ENDIAN__
+ /** Virtual address. Must be sign extended by consumer. */
+ int_reg_t va : 42;
+ /** Reserved. */
+ uint_reg_t __reserved_4 : 6;
+ /** Index of the buffer stack to which this buffer belongs. */
+ uint_reg_t stack_idx : 5;
+ /** Reserved. */
+ uint_reg_t __reserved_5 : 3;
+ /**
+ * Instance ID. For devices that support automatic buffer return between
+ * mPIPE instances, this field indicates the buffer owner. If the INST
+ * field does not match the mPIPE's instance number when a packet is
+ * egressed, buffers with HWB set will be returned to the other mPIPE
+ * instance. Note that not all devices support multi-mPIPE buffer
+ * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates
+ * whether the INST field in the buffer descriptor is populated by iDMA
+ * hardware.
+ */
+ uint_reg_t inst : 2;
+ /**
+ * Always set to one by hardware in iDMA packet descriptors. For eDMA,
+ * indicates whether the buffer will be released to the buffer stack
+ * manager. When 0, software is responsible for releasing the buffer.
+ */
+ uint_reg_t hwb : 1;
+ /**
+ * Encoded size of buffer. Set by the ingress hardware for iDMA packet
+ * descriptors. For eDMA descriptors, indicates the buffer size if .c
+ * indicates a chained packet. If an eDMA descriptor is not chained and
+ * the .hwb bit is not set, this field is ignored and the size is
+ * specified by the .xfer_size field.
+ * 0 = 128 bytes
+ * 1 = 256 bytes
+ * 2 = 512 bytes
+ * 3 = 1024 bytes
+ * 4 = 1664 bytes
+ * 5 = 4096 bytes
+ * 6 = 10368 bytes
+ * 7 = 16384 bytes
+ */
+ uint_reg_t size : 3;
+ /**
+ * Chaining configuration for the buffer. Indicates that an ingress
+ * packet or egress command is chained across multiple buffers, with each
+ * buffer's size indicated by the .size field.
+ */
+ uint_reg_t c : 2;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t c : 2;
+ uint_reg_t size : 3;
+ uint_reg_t hwb : 1;
+ uint_reg_t inst : 2;
+ uint_reg_t __reserved_5 : 3;
+ uint_reg_t stack_idx : 5;
+ uint_reg_t __reserved_4 : 6;
+ int_reg_t va : 42;
+#endif
+
+ };
+
+ /** Word access */
+ uint_reg_t words[8];
+} MPIPE_PDESC_t;
+#endif /* !defined(__ASSEMBLER__) */
+
+#endif /* !defined(__ARCH_MPIPE_SHM_H__) */
diff --git a/arch/tile/include/arch/mpipe_shm_def.h b/arch/tile/include/arch/mpipe_shm_def.h
new file mode 100644
index 00000000000..6124d39c831
--- /dev/null
+++ b/arch/tile/include/arch/mpipe_shm_def.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_MPIPE_SHM_DEF_H__
+#define __ARCH_MPIPE_SHM_DEF_H__
+#define MPIPE_EDMA_DESC_WORD1__C_VAL_UNCHAINED 0x0
+#define MPIPE_EDMA_DESC_WORD1__C_VAL_CHAINED 0x1
+#define MPIPE_EDMA_DESC_WORD1__C_VAL_NOT_RDY 0x2
+#define MPIPE_EDMA_DESC_WORD1__C_VAL_INVALID 0x3
+#endif /* !defined(__ARCH_MPIPE_SHM_DEF_H__) */
diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h
index f548efeb2de..2de83e7aff3 100644
--- a/arch/tile/include/arch/spr_def.h
+++ b/arch/tile/include/arch/spr_def.h
@@ -11,15 +11,11 @@
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
+#ifndef __ARCH_SPR_DEF_H__
+#define __ARCH_SPR_DEF_H__
-/* Include the proper base SPR definition file. */
-#ifdef __tilegx__
-#include <arch/spr_def_64.h>
-#else
-#include <arch/spr_def_32.h>
-#endif
+#include <uapi/arch/spr_def.h>
-#ifdef __KERNEL__
/*
* In addition to including the proper base SPR definition file, depending
@@ -60,8 +56,8 @@
_concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,)
#define SPR_IPI_EVENT_RESET_K \
_concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,)
-#define SPR_IPI_MASK_SET_K \
- _concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_EVENT_SET_K \
+ _concat4(SPR_IPI_EVENT_SET_, CONFIG_KERNEL_PL,,)
#define INT_IPI_K \
_concat4(INT_IPI_, CONFIG_KERNEL_PL,,)
@@ -110,4 +106,4 @@
#define INT_INTCTRL_K \
_concat4(INT_INTCTRL_, CONFIG_KERNEL_PL,,)
-#endif /* __KERNEL__ */
+#endif /* __ARCH_SPR_DEF_H__ */
diff --git a/arch/tile/include/arch/trio.h b/arch/tile/include/arch/trio.h
new file mode 100644
index 00000000000..c0ddedcae08
--- /dev/null
+++ b/arch/tile/include/arch/trio.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_TRIO_H__
+#define __ARCH_TRIO_H__
+
+#include <arch/abi.h>
+#include <arch/trio_def.h>
+
+#ifndef __ASSEMBLER__
+
+/*
+ * Map SQ Doorbell Format.
+ * This describes the format of the write-only doorbell register that exists
+ * in the last 8-bytes of the MAP_SQ_BASE/LIM range. This register is only
+ * writable from PCIe space. Writes to this register will not be written to
+ * Tile memory space and thus no IO VA translation is required if the last
+ * page of the BASE/LIM range is not otherwise written.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /*
+ * When written with a 1, the associated MAP_SQ region's doorbell
+ * interrupt will be triggered once all previous writes are visible to
+ * Tile software.
+ */
+ uint_reg_t doorbell : 1;
+ /*
+ * When written with a 1, the descriptor at the head of the associated
+ * MAP_SQ's FIFO will be dequeued.
+ */
+ uint_reg_t pop : 1;
+ /* Reserved. */
+ uint_reg_t __reserved : 62;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved : 62;
+ uint_reg_t pop : 1;
+ uint_reg_t doorbell : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} TRIO_MAP_SQ_DOORBELL_FMT_t;
+
+
+/*
+ * Tile PIO Region Configuration - CFG Address Format.
+ * This register describes the address format for PIO accesses when the
+ * associated region is setup with TYPE=CFG.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Register Address (full byte address). */
+ uint_reg_t reg_addr : 12;
+ /* Function Number */
+ uint_reg_t fn : 3;
+ /* Device Number */
+ uint_reg_t dev : 5;
+ /* BUS Number */
+ uint_reg_t bus : 8;
+ /* Config Type: 0 for access to directly-attached device. 1 otherwise. */
+ uint_reg_t type : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /*
+ * MAC select. This must match the configuration in
+ * TILE_PIO_REGION_SETUP.MAC.
+ */
+ uint_reg_t mac : 2;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 32;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_1 : 32;
+ uint_reg_t mac : 2;
+ uint_reg_t __reserved_0 : 1;
+ uint_reg_t type : 1;
+ uint_reg_t bus : 8;
+ uint_reg_t dev : 5;
+ uint_reg_t fn : 3;
+ uint_reg_t reg_addr : 12;
+#endif
+ };
+
+ uint_reg_t word;
+} TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR_t;
+#endif /* !defined(__ASSEMBLER__) */
+
+#endif /* !defined(__ARCH_TRIO_H__) */
diff --git a/arch/tile/include/arch/trio_constants.h b/arch/tile/include/arch/trio_constants.h
new file mode 100644
index 00000000000..85647e91a45
--- /dev/null
+++ b/arch/tile/include/arch/trio_constants.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+
+#ifndef __ARCH_TRIO_CONSTANTS_H__
+#define __ARCH_TRIO_CONSTANTS_H__
+
+#define TRIO_NUM_ASIDS 32
+#define TRIO_NUM_TLBS_PER_ASID 16
+
+#define TRIO_NUM_TPIO_REGIONS 8
+#define TRIO_LOG2_NUM_TPIO_REGIONS 3
+
+#define TRIO_NUM_MAP_MEM_REGIONS 32
+#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 5
+#define TRIO_NUM_MAP_SQ_REGIONS 8
+#define TRIO_LOG2_NUM_MAP_SQ_REGIONS 3
+
+#define TRIO_LOG2_NUM_SQ_FIFO_ENTRIES 6
+
+#define TRIO_NUM_PUSH_DMA_RINGS 64
+
+#define TRIO_NUM_PULL_DMA_RINGS 64
+
+#endif /* __ARCH_TRIO_CONSTANTS_H__ */
diff --git a/arch/tile/include/arch/trio_def.h b/arch/tile/include/arch/trio_def.h
new file mode 100644
index 00000000000..e80500317dc
--- /dev/null
+++ b/arch/tile/include/arch/trio_def.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_TRIO_DEF_H__
+#define __ARCH_TRIO_DEF_H__
+#define TRIO_CFG_REGION_ADDR__REG_SHIFT 0
+#define TRIO_CFG_REGION_ADDR__INTFC_SHIFT 16
+#define TRIO_CFG_REGION_ADDR__INTFC_VAL_TRIO 0x0
+#define TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE 0x1
+#define TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD 0x2
+#define TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_PROTECTED 0x3
+#define TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT 18
+#define TRIO_CFG_REGION_ADDR__PROT_SHIFT 20
+#define TRIO_PIO_REGIONS_ADDR__REGION_SHIFT 32
+#define TRIO_MAP_MEM_REG_INT0 0x1000000000
+#define TRIO_MAP_MEM_REG_INT1 0x1000000008
+#define TRIO_MAP_MEM_REG_INT2 0x1000000010
+#define TRIO_MAP_MEM_REG_INT3 0x1000000018
+#define TRIO_MAP_MEM_REG_INT4 0x1000000020
+#define TRIO_MAP_MEM_REG_INT5 0x1000000028
+#define TRIO_MAP_MEM_REG_INT6 0x1000000030
+#define TRIO_MAP_MEM_REG_INT7 0x1000000038
+#define TRIO_MAP_MEM_LIM__ADDR_SHIFT 12
+#define TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_UNORDERED 0x0
+#define TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_STRICT 0x1
+#define TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_REL_ORD 0x2
+#define TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT 30
+#endif /* !defined(__ARCH_TRIO_DEF_H__) */
diff --git a/arch/tile/include/arch/trio_pcie_intfc.h b/arch/tile/include/arch/trio_pcie_intfc.h
new file mode 100644
index 00000000000..0487fdb9d58
--- /dev/null
+++ b/arch/tile/include/arch/trio_pcie_intfc.h
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_TRIO_PCIE_INTFC_H__
+#define __ARCH_TRIO_PCIE_INTFC_H__
+
+#include <arch/abi.h>
+#include <arch/trio_pcie_intfc_def.h>
+
+#ifndef __ASSEMBLER__
+
+/*
+ * Port Configuration.
+ * Configuration of the PCIe Port
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Provides the state of the strapping pins for this port. */
+ uint_reg_t strap_state : 3;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /*
+ * When 1, the device type will be overridden using OVD_DEV_TYPE_VAL.
+ * When 0, the device type is determined based on the STRAP_STATE.
+ */
+ uint_reg_t ovd_dev_type : 1;
+ /* Provides the device type when OVD_DEV_TYPE is 1. */
+ uint_reg_t ovd_dev_type_val : 4;
+ /* Determines how link is trained. */
+ uint_reg_t train_mode : 2;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 1;
+ /*
+ * For PCIe, used to flip physical RX lanes that were not properly wired.
+ * This is not the same as lane reversal which is handled automatically
+ * during link training. When 0, RX Lane0 must be wired to the link
+ * partner (either to its Lane0 or it's LaneN). When RX_LANE_FLIP is 1,
+ * the highest numbered lane for this port becomes Lane0 and Lane0 does
+ * NOT have to be wired to the link partner.
+ */
+ uint_reg_t rx_lane_flip : 1;
+ /*
+ * For PCIe, used to flip physical TX lanes that were not properly wired.
+ * This is not the same as lane reversal which is handled automatically
+ * during link training. When 0, TX Lane0 must be wired to the link
+ * partner (either to its Lane0 or it's LaneN). When TX_LANE_FLIP is 1,
+ * the highest numbered lane for this port becomes Lane0 and Lane0 does
+ * NOT have to be wired to the link partner.
+ */
+ uint_reg_t tx_lane_flip : 1;
+ /*
+ * For StreamIO port, configures the width of the port when TRAIN_MODE is
+ * not STRAP.
+ */
+ uint_reg_t stream_width : 2;
+ /*
+ * For StreamIO port, configures the rate of the port when TRAIN_MODE is
+ * not STRAP.
+ */
+ uint_reg_t stream_rate : 2;
+ /* Reserved. */
+ uint_reg_t __reserved_2 : 46;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_2 : 46;
+ uint_reg_t stream_rate : 2;
+ uint_reg_t stream_width : 2;
+ uint_reg_t tx_lane_flip : 1;
+ uint_reg_t rx_lane_flip : 1;
+ uint_reg_t __reserved_1 : 1;
+ uint_reg_t train_mode : 2;
+ uint_reg_t ovd_dev_type_val : 4;
+ uint_reg_t ovd_dev_type : 1;
+ uint_reg_t __reserved_0 : 1;
+ uint_reg_t strap_state : 3;
+#endif
+ };
+
+ uint_reg_t word;
+} TRIO_PCIE_INTFC_PORT_CONFIG_t;
+
+/*
+ * Port Status.
+ * Status of the PCIe Port. This register applies to the StreamIO port when
+ * StreamIO is enabled.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /*
+ * Indicates the DL state of the port. When 1, the port is up and ready
+ * to receive traffic.
+ */
+ uint_reg_t dl_up : 1;
+ /*
+ * Indicates the number of times the link has gone down. Clears on read.
+ */
+ uint_reg_t dl_down_cnt : 7;
+ /* Indicates the SERDES PLL has spun up and is providing a valid clock. */
+ uint_reg_t clock_ready : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 7;
+ /* Device revision ID. */
+ uint_reg_t device_rev : 8;
+ /* Link state (PCIe). */
+ uint_reg_t ltssm_state : 6;
+ /* Link power management state (PCIe). */
+ uint_reg_t pm_state : 3;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 31;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_1 : 31;
+ uint_reg_t pm_state : 3;
+ uint_reg_t ltssm_state : 6;
+ uint_reg_t device_rev : 8;
+ uint_reg_t __reserved_0 : 7;
+ uint_reg_t clock_ready : 1;
+ uint_reg_t dl_down_cnt : 7;
+ uint_reg_t dl_up : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} TRIO_PCIE_INTFC_PORT_STATUS_t;
+
+/*
+ * Transmit FIFO Control.
+ * Contains TX FIFO thresholds. These registers are for diagnostics purposes
+ * only. Changing these values causes undefined behavior.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /*
+ * Almost-Empty level for TX0 data. Typically set to at least
+ * roundup(38.0*M/N) where N=tclk frequency and M=MAC symbol rate in MHz
+ * for a x4 port (250MHz).
+ */
+ uint_reg_t tx0_data_ae_lvl : 7;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /* Almost-Empty level for TX1 data. */
+ uint_reg_t tx1_data_ae_lvl : 7;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 1;
+ /* Almost-Full level for TX0 data. */
+ uint_reg_t tx0_data_af_lvl : 7;
+ /* Reserved. */
+ uint_reg_t __reserved_2 : 1;
+ /* Almost-Full level for TX1 data. */
+ uint_reg_t tx1_data_af_lvl : 7;
+ /* Reserved. */
+ uint_reg_t __reserved_3 : 1;
+ /* Almost-Full level for TX0 info. */
+ uint_reg_t tx0_info_af_lvl : 5;
+ /* Reserved. */
+ uint_reg_t __reserved_4 : 3;
+ /* Almost-Full level for TX1 info. */
+ uint_reg_t tx1_info_af_lvl : 5;
+ /* Reserved. */
+ uint_reg_t __reserved_5 : 3;
+ /*
+ * This register provides performance adjustment for high bandwidth
+ * flows. The MAC will assert almost-full to TRIO if non-posted credits
+ * fall below this level. Note that setting this larger than the initial
+ * PORT_CREDIT.NPH value will cause READS to never be sent. If the
+ * initial credit value from the link partner is smaller than this value
+ * when the link comes up, the value will be reset to the initial credit
+ * value to prevent lockup.
+ */
+ uint_reg_t min_np_credits : 8;
+ /*
+ * This register provides performance adjustment for high bandwidth
+ * flows. The MAC will assert almost-full to TRIO if posted credits fall
+ * below this level. Note that setting this larger than the initial
+ * PORT_CREDIT.PH value will cause WRITES to never be sent. If the
+ * initial credit value from the link partner is smaller than this value
+ * when the link comes up, the value will be reset to the initial credit
+ * value to prevent lockup.
+ */
+ uint_reg_t min_p_credits : 8;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t min_p_credits : 8;
+ uint_reg_t min_np_credits : 8;
+ uint_reg_t __reserved_5 : 3;
+ uint_reg_t tx1_info_af_lvl : 5;
+ uint_reg_t __reserved_4 : 3;
+ uint_reg_t tx0_info_af_lvl : 5;
+ uint_reg_t __reserved_3 : 1;
+ uint_reg_t tx1_data_af_lvl : 7;
+ uint_reg_t __reserved_2 : 1;
+ uint_reg_t tx0_data_af_lvl : 7;
+ uint_reg_t __reserved_1 : 1;
+ uint_reg_t tx1_data_ae_lvl : 7;
+ uint_reg_t __reserved_0 : 1;
+ uint_reg_t tx0_data_ae_lvl : 7;
+#endif
+ };
+
+ uint_reg_t word;
+} TRIO_PCIE_INTFC_TX_FIFO_CTL_t;
+#endif /* !defined(__ASSEMBLER__) */
+
+#endif /* !defined(__ARCH_TRIO_PCIE_INTFC_H__) */
diff --git a/arch/tile/include/arch/trio_pcie_intfc_def.h b/arch/tile/include/arch/trio_pcie_intfc_def.h
new file mode 100644
index 00000000000..d3fd6781fb2
--- /dev/null
+++ b/arch/tile/include/arch/trio_pcie_intfc_def.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_TRIO_PCIE_INTFC_DEF_H__
+#define __ARCH_TRIO_PCIE_INTFC_DEF_H__
+#define TRIO_PCIE_INTFC_MAC_INT_STS 0x0000
+#define TRIO_PCIE_INTFC_MAC_INT_STS__INT_LEVEL_MASK 0xf000
+#define TRIO_PCIE_INTFC_PORT_CONFIG 0x0018
+#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_DISABLED 0x0
+#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT 0x1
+#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC 0x2
+#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT_G1 0x3
+#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC_G1 0x4
+#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_XLINK 0x5
+#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_STREAM_X1 0x6
+#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_STREAM_X4 0x7
+#define TRIO_PCIE_INTFC_PORT_STATUS 0x0020
+#define TRIO_PCIE_INTFC_TX_FIFO_CTL 0x0050
+#endif /* !defined(__ARCH_TRIO_PCIE_INTFC_DEF_H__) */
diff --git a/arch/tile/include/arch/trio_pcie_rc.h b/arch/tile/include/arch/trio_pcie_rc.h
new file mode 100644
index 00000000000..6a25d0aca85
--- /dev/null
+++ b/arch/tile/include/arch/trio_pcie_rc.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_TRIO_PCIE_RC_H__
+#define __ARCH_TRIO_PCIE_RC_H__
+
+#include <arch/abi.h>
+#include <arch/trio_pcie_rc_def.h>
+
+#ifndef __ASSEMBLER__
+
+/* Device Capabilities Register. */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /*
+ * Max_Payload_Size Supported, writablethrough the MAC_STANDARD interface
+ */
+ uint_reg_t mps_sup : 3;
+ /*
+ * This field is writable through the MAC_STANDARD interface. However,
+ * Phantom Function is not supported. Therefore, the application must
+ * not write any value other than 0x0 to this field.
+ */
+ uint_reg_t phantom_function_supported : 2;
+ /* This bit is writable through the MAC_STANDARD interface. */
+ uint_reg_t ext_tag_field_supported : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 3;
+ /* Endpoint L1 Acceptable Latency Must be 0x0 for non-Endpoint devices. */
+ uint_reg_t l1_lat : 3;
+ /*
+ * Undefined since PCI Express 1.1 (Was Attention Button Present for PCI
+ * Express 1.0a)
+ */
+ uint_reg_t r1 : 1;
+ /*
+ * Undefined since PCI Express 1.1 (Was Attention Indicator Present for
+ * PCI Express 1.0a)
+ */
+ uint_reg_t r2 : 1;
+ /*
+ * Undefined since PCI Express 1.1 (Was Power Indicator Present for PCI
+ * Express 1.0a)
+ */
+ uint_reg_t r3 : 1;
+ /*
+ * Role-Based Error Reporting, writable through the MAC_STANDARD
+ * interface. Required to be set for device compliant to 1.1 spec and
+ * later.
+ */
+ uint_reg_t rer : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 2;
+ /* Captured Slot Power Limit Value Upstream port only. */
+ uint_reg_t slot_pwr_lim : 8;
+ /* Captured Slot Power Limit Scale Upstream port only. */
+ uint_reg_t slot_pwr_scale : 2;
+ /* Reserved. */
+ uint_reg_t __reserved_2 : 4;
+ /* Endpoint L0s Acceptable LatencyMust be 0x0 for non-Endpoint devices. */
+ uint_reg_t l0s_lat : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_3 : 31;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_3 : 31;
+ uint_reg_t l0s_lat : 1;
+ uint_reg_t __reserved_2 : 4;
+ uint_reg_t slot_pwr_scale : 2;
+ uint_reg_t slot_pwr_lim : 8;
+ uint_reg_t __reserved_1 : 2;
+ uint_reg_t rer : 1;
+ uint_reg_t r3 : 1;
+ uint_reg_t r2 : 1;
+ uint_reg_t r1 : 1;
+ uint_reg_t l1_lat : 3;
+ uint_reg_t __reserved_0 : 3;
+ uint_reg_t ext_tag_field_supported : 1;
+ uint_reg_t phantom_function_supported : 2;
+ uint_reg_t mps_sup : 3;
+#endif
+ };
+
+ uint_reg_t word;
+} TRIO_PCIE_RC_DEVICE_CAP_t;
+
+/* Device Control Register. */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Correctable Error Reporting Enable */
+ uint_reg_t cor_err_ena : 1;
+ /* Non-Fatal Error Reporting Enable */
+ uint_reg_t nf_err_ena : 1;
+ /* Fatal Error Reporting Enable */
+ uint_reg_t fatal_err_ena : 1;
+ /* Unsupported Request Reporting Enable */
+ uint_reg_t ur_ena : 1;
+ /* Relaxed orderring enable */
+ uint_reg_t ro_ena : 1;
+ /* Max Payload Size */
+ uint_reg_t max_payload_size : 3;
+ /* Extended Tag Field Enable */
+ uint_reg_t ext_tag : 1;
+ /* Phantom Function Enable */
+ uint_reg_t ph_fn_ena : 1;
+ /* AUX Power PM Enable */
+ uint_reg_t aux_pm_ena : 1;
+ /* Enable NoSnoop */
+ uint_reg_t no_snoop : 1;
+ /* Max read request size */
+ uint_reg_t max_read_req_sz : 3;
+ /* Reserved. */
+ uint_reg_t __reserved : 49;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved : 49;
+ uint_reg_t max_read_req_sz : 3;
+ uint_reg_t no_snoop : 1;
+ uint_reg_t aux_pm_ena : 1;
+ uint_reg_t ph_fn_ena : 1;
+ uint_reg_t ext_tag : 1;
+ uint_reg_t max_payload_size : 3;
+ uint_reg_t ro_ena : 1;
+ uint_reg_t ur_ena : 1;
+ uint_reg_t fatal_err_ena : 1;
+ uint_reg_t nf_err_ena : 1;
+ uint_reg_t cor_err_ena : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} TRIO_PCIE_RC_DEVICE_CONTROL_t;
+#endif /* !defined(__ASSEMBLER__) */
+
+#endif /* !defined(__ARCH_TRIO_PCIE_RC_H__) */
diff --git a/arch/tile/include/arch/trio_pcie_rc_def.h b/arch/tile/include/arch/trio_pcie_rc_def.h
new file mode 100644
index 00000000000..74081a65b6f
--- /dev/null
+++ b/arch/tile/include/arch/trio_pcie_rc_def.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_TRIO_PCIE_RC_DEF_H__
+#define __ARCH_TRIO_PCIE_RC_DEF_H__
+#define TRIO_PCIE_RC_DEVICE_CAP 0x0074
+#define TRIO_PCIE_RC_DEVICE_CONTROL 0x0078
+#define TRIO_PCIE_RC_DEVICE_ID_VEN_ID 0x0000
+#define TRIO_PCIE_RC_DEVICE_ID_VEN_ID__DEV_ID_SHIFT 16
+#define TRIO_PCIE_RC_REVISION_ID 0x0008
+#endif /* !defined(__ARCH_TRIO_PCIE_RC_DEF_H__) */
diff --git a/arch/tile/include/arch/trio_shm.h b/arch/tile/include/arch/trio_shm.h
new file mode 100644
index 00000000000..3382e38245a
--- /dev/null
+++ b/arch/tile/include/arch/trio_shm.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+
+#ifndef __ARCH_TRIO_SHM_H__
+#define __ARCH_TRIO_SHM_H__
+
+#include <arch/abi.h>
+#include <arch/trio_shm_def.h>
+
+#ifndef __ASSEMBLER__
+/**
+ * TRIO DMA Descriptor.
+ * The TRIO DMA descriptor is written by software and consumed by hardware.
+ * It is used to specify the location of transaction data in the IO and Tile
+ * domains.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+ /* Word 0 */
+
+#ifndef __BIG_ENDIAN__
+ /** Tile side virtual address. */
+ int_reg_t va : 42;
+ /**
+ * Encoded size of buffer used on push DMA when C=1:
+ * 0 = 128 bytes
+ * 1 = 256 bytes
+ * 2 = 512 bytes
+ * 3 = 1024 bytes
+ * 4 = 1664 bytes
+ * 5 = 4096 bytes
+ * 6 = 10368 bytes
+ * 7 = 16384 bytes
+ */
+ uint_reg_t bsz : 3;
+ /**
+ * Chaining designation. Always zero for pull DMA
+ * 0 : Unchained buffer pointer
+ * 1 : Chained buffer pointer. Next buffer descriptor (e.g. VA) stored
+ * in 1st 8-bytes in buffer. For chained buffers, first 8-bytes of each
+ * buffer contain the next buffer descriptor formatted exactly like a PDE
+ * buffer descriptor. This allows a chained PDE buffer to be sent using
+ * push DMA.
+ */
+ uint_reg_t c : 1;
+ /**
+ * Notification interrupt will be delivered when the transaction has
+ * completed (all data has been read from or written to the Tile-side
+ * buffer).
+ */
+ uint_reg_t notif : 1;
+ /**
+ * When 0, the XSIZE field specifies the total byte count for the
+ * transaction. When 1, the XSIZE field is encoded as 2^(N+14) for N in
+ * {0..6}:
+ * 0 = 16KB
+ * 1 = 32KB
+ * 2 = 64KB
+ * 3 = 128KB
+ * 4 = 256KB
+ * 5 = 512KB
+ * 6 = 1MB
+ * All other encodings of the XSIZE field are reserved when SMOD=1
+ */
+ uint_reg_t smod : 1;
+ /**
+ * Total number of bytes to move for this transaction. When SMOD=1,
+ * this field is encoded - see SMOD description.
+ */
+ uint_reg_t xsize : 14;
+ /** Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /**
+ * Generation number. Used to indicate a valid descriptor in ring. When
+ * a new descriptor is written into the ring, software must toggle this
+ * bit. The net effect is that the GEN bit being written into new
+ * descriptors toggles each time the ring tail pointer wraps.
+ */
+ uint_reg_t gen : 1;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t gen : 1;
+ uint_reg_t __reserved_0 : 1;
+ uint_reg_t xsize : 14;
+ uint_reg_t smod : 1;
+ uint_reg_t notif : 1;
+ uint_reg_t c : 1;
+ uint_reg_t bsz : 3;
+ int_reg_t va : 42;
+#endif
+
+ /* Word 1 */
+
+#ifndef __BIG_ENDIAN__
+ /** IO-side address */
+ uint_reg_t io_address : 64;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t io_address : 64;
+#endif
+
+ };
+
+ /** Word access */
+ uint_reg_t words[2];
+} TRIO_DMA_DESC_t;
+#endif /* !defined(__ASSEMBLER__) */
+
+#endif /* !defined(__ARCH_TRIO_SHM_H__) */
diff --git a/arch/tile/include/arch/trio_shm_def.h b/arch/tile/include/arch/trio_shm_def.h
new file mode 100644
index 00000000000..72a59c88b06
--- /dev/null
+++ b/arch/tile/include/arch/trio_shm_def.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_TRIO_SHM_DEF_H__
+#define __ARCH_TRIO_SHM_DEF_H__
+#endif /* !defined(__ARCH_TRIO_SHM_DEF_H__) */
diff --git a/arch/tile/include/arch/uart.h b/arch/tile/include/arch/uart.h
new file mode 100644
index 00000000000..07966970ada
--- /dev/null
+++ b/arch/tile/include/arch/uart.h
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_UART_H__
+#define __ARCH_UART_H__
+
+#include <arch/abi.h>
+#include <arch/uart_def.h>
+
+#ifndef __ASSEMBLER__
+
+/* Divisor. */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /*
+ * Baud Rate Divisor. Desired_baud_rate = REF_CLK frequency / (baud *
+ * 16).
+ * Note: REF_CLK is always 125 MHz, the default
+ * divisor = 68, baud rate = 125M/(68*16) = 115200 baud.
+ */
+ uint_reg_t divisor : 12;
+ /* Reserved. */
+ uint_reg_t __reserved : 52;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved : 52;
+ uint_reg_t divisor : 12;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_DIVISOR_t;
+
+/* FIFO Count. */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /*
+ * n: n active entries in the receive FIFO (max is 2**8). Each entry has
+ * 8 bits.
+ * 0: no active entry in the receive FIFO (that is empty).
+ */
+ uint_reg_t rfifo_count : 9;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 7;
+ /*
+ * n: n active entries in the transmit FIFO (max is 2**8). Each entry has
+ * 8 bits.
+ * 0: no active entry in the transmit FIFO (that is empty).
+ */
+ uint_reg_t tfifo_count : 9;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 7;
+ /*
+ * n: n active entries in the write FIFO (max is 2**2). Each entry has 8
+ * bits.
+ * 0: no active entry in the write FIFO (that is empty).
+ */
+ uint_reg_t wfifo_count : 3;
+ /* Reserved. */
+ uint_reg_t __reserved_2 : 29;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_2 : 29;
+ uint_reg_t wfifo_count : 3;
+ uint_reg_t __reserved_1 : 7;
+ uint_reg_t tfifo_count : 9;
+ uint_reg_t __reserved_0 : 7;
+ uint_reg_t rfifo_count : 9;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_FIFO_COUNT_t;
+
+/* FLAG. */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /* 1: receive FIFO is empty */
+ uint_reg_t rfifo_empty : 1;
+ /* 1: write FIFO is empty. */
+ uint_reg_t wfifo_empty : 1;
+ /* 1: transmit FIFO is empty. */
+ uint_reg_t tfifo_empty : 1;
+ /* 1: receive FIFO is full. */
+ uint_reg_t rfifo_full : 1;
+ /* 1: write FIFO is full. */
+ uint_reg_t wfifo_full : 1;
+ /* 1: transmit FIFO is full. */
+ uint_reg_t tfifo_full : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 57;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_1 : 57;
+ uint_reg_t tfifo_full : 1;
+ uint_reg_t wfifo_full : 1;
+ uint_reg_t rfifo_full : 1;
+ uint_reg_t tfifo_empty : 1;
+ uint_reg_t wfifo_empty : 1;
+ uint_reg_t rfifo_empty : 1;
+ uint_reg_t __reserved_0 : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_FLAG_t;
+
+/*
+ * Interrupt Vector Mask.
+ * Each bit in this register corresponds to a specific interrupt. When set,
+ * the associated interrupt will not be dispatched.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Read data FIFO read and no data available */
+ uint_reg_t rdat_err : 1;
+ /* Write FIFO was written but it was full */
+ uint_reg_t wdat_err : 1;
+ /* Stop bit not found when current data was received */
+ uint_reg_t frame_err : 1;
+ /* Parity error was detected when current data was received */
+ uint_reg_t parity_err : 1;
+ /* Data was received but the receive FIFO was full */
+ uint_reg_t rfifo_overflow : 1;
+ /*
+ * An almost full event is reached when data is to be written to the
+ * receive FIFO, and the receive FIFO has more than or equal to
+ * BUFFER_THRESHOLD.RFIFO_AFULL bytes.
+ */
+ uint_reg_t rfifo_afull : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /* An entry in the transmit FIFO was popped */
+ uint_reg_t tfifo_re : 1;
+ /* An entry has been pushed into the receive FIFO */
+ uint_reg_t rfifo_we : 1;
+ /* An entry of the write FIFO has been popped */
+ uint_reg_t wfifo_re : 1;
+ /* Rshim read receive FIFO in protocol mode */
+ uint_reg_t rfifo_err : 1;
+ /*
+ * An almost empty event is reached when data is to be read from the
+ * transmit FIFO, and the transmit FIFO has less than or equal to
+ * BUFFER_THRESHOLD.TFIFO_AEMPTY bytes.
+ */
+ uint_reg_t tfifo_aempty : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 52;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_1 : 52;
+ uint_reg_t tfifo_aempty : 1;
+ uint_reg_t rfifo_err : 1;
+ uint_reg_t wfifo_re : 1;
+ uint_reg_t rfifo_we : 1;
+ uint_reg_t tfifo_re : 1;
+ uint_reg_t __reserved_0 : 1;
+ uint_reg_t rfifo_afull : 1;
+ uint_reg_t rfifo_overflow : 1;
+ uint_reg_t parity_err : 1;
+ uint_reg_t frame_err : 1;
+ uint_reg_t wdat_err : 1;
+ uint_reg_t rdat_err : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_INTERRUPT_MASK_t;
+
+/*
+ * Interrupt vector, write-one-to-clear.
+ * Each bit in this register corresponds to a specific interrupt. Hardware
+ * sets the bit when the associated condition has occurred. Writing a 1
+ * clears the status bit.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Read data FIFO read and no data available */
+ uint_reg_t rdat_err : 1;
+ /* Write FIFO was written but it was full */
+ uint_reg_t wdat_err : 1;
+ /* Stop bit not found when current data was received */
+ uint_reg_t frame_err : 1;
+ /* Parity error was detected when current data was received */
+ uint_reg_t parity_err : 1;
+ /* Data was received but the receive FIFO was full */
+ uint_reg_t rfifo_overflow : 1;
+ /*
+ * Data was received and the receive FIFO is now almost full (more than
+ * BUFFER_THRESHOLD.RFIFO_AFULL bytes in it)
+ */
+ uint_reg_t rfifo_afull : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /* An entry in the transmit FIFO was popped */
+ uint_reg_t tfifo_re : 1;
+ /* An entry has been pushed into the receive FIFO */
+ uint_reg_t rfifo_we : 1;
+ /* An entry of the write FIFO has been popped */
+ uint_reg_t wfifo_re : 1;
+ /* Rshim read receive FIFO in protocol mode */
+ uint_reg_t rfifo_err : 1;
+ /*
+ * Data was read from the transmit FIFO and now it is almost empty (less
+ * than or equal to BUFFER_THRESHOLD.TFIFO_AEMPTY bytes in it).
+ */
+ uint_reg_t tfifo_aempty : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 52;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_1 : 52;
+ uint_reg_t tfifo_aempty : 1;
+ uint_reg_t rfifo_err : 1;
+ uint_reg_t wfifo_re : 1;
+ uint_reg_t rfifo_we : 1;
+ uint_reg_t tfifo_re : 1;
+ uint_reg_t __reserved_0 : 1;
+ uint_reg_t rfifo_afull : 1;
+ uint_reg_t rfifo_overflow : 1;
+ uint_reg_t parity_err : 1;
+ uint_reg_t frame_err : 1;
+ uint_reg_t wdat_err : 1;
+ uint_reg_t rdat_err : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_INTERRUPT_STATUS_t;
+
+/* Type. */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Number of stop bits, rx and tx */
+ uint_reg_t sbits : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /* Data word size, rx and tx */
+ uint_reg_t dbits : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 1;
+ /* Parity selection, rx and tx */
+ uint_reg_t ptype : 3;
+ /* Reserved. */
+ uint_reg_t __reserved_2 : 57;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_2 : 57;
+ uint_reg_t ptype : 3;
+ uint_reg_t __reserved_1 : 1;
+ uint_reg_t dbits : 1;
+ uint_reg_t __reserved_0 : 1;
+ uint_reg_t sbits : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_TYPE_t;
+#endif /* !defined(__ASSEMBLER__) */
+
+#endif /* !defined(__ARCH_UART_H__) */
diff --git a/arch/tile/include/arch/uart_def.h b/arch/tile/include/arch/uart_def.h
new file mode 100644
index 00000000000..42bcaf53537
--- /dev/null
+++ b/arch/tile/include/arch/uart_def.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_UART_DEF_H__
+#define __ARCH_UART_DEF_H__
+#define UART_DIVISOR 0x0158
+#define UART_FIFO_COUNT 0x0110
+#define UART_FLAG 0x0108
+#define UART_INTERRUPT_MASK 0x0208
+#define UART_INTERRUPT_MASK__RDAT_ERR_SHIFT 0
+#define UART_INTERRUPT_MASK__RDAT_ERR_WIDTH 1
+#define UART_INTERRUPT_MASK__RDAT_ERR_RESET_VAL 1
+#define UART_INTERRUPT_MASK__RDAT_ERR_RMASK 0x1
+#define UART_INTERRUPT_MASK__RDAT_ERR_MASK 0x1
+#define UART_INTERRUPT_MASK__RDAT_ERR_FIELD 0,0
+#define UART_INTERRUPT_MASK__WDAT_ERR_SHIFT 1
+#define UART_INTERRUPT_MASK__WDAT_ERR_WIDTH 1
+#define UART_INTERRUPT_MASK__WDAT_ERR_RESET_VAL 1
+#define UART_INTERRUPT_MASK__WDAT_ERR_RMASK 0x1
+#define UART_INTERRUPT_MASK__WDAT_ERR_MASK 0x2
+#define UART_INTERRUPT_MASK__WDAT_ERR_FIELD 1,1
+#define UART_INTERRUPT_MASK__FRAME_ERR_SHIFT 2
+#define UART_INTERRUPT_MASK__FRAME_ERR_WIDTH 1
+#define UART_INTERRUPT_MASK__FRAME_ERR_RESET_VAL 1
+#define UART_INTERRUPT_MASK__FRAME_ERR_RMASK 0x1
+#define UART_INTERRUPT_MASK__FRAME_ERR_MASK 0x4
+#define UART_INTERRUPT_MASK__FRAME_ERR_FIELD 2,2
+#define UART_INTERRUPT_MASK__PARITY_ERR_SHIFT 3
+#define UART_INTERRUPT_MASK__PARITY_ERR_WIDTH 1
+#define UART_INTERRUPT_MASK__PARITY_ERR_RESET_VAL 1
+#define UART_INTERRUPT_MASK__PARITY_ERR_RMASK 0x1
+#define UART_INTERRUPT_MASK__PARITY_ERR_MASK 0x8
+#define UART_INTERRUPT_MASK__PARITY_ERR_FIELD 3,3
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_SHIFT 4
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_WIDTH 1
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_RESET_VAL 1
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_RMASK 0x1
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_MASK 0x10
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_FIELD 4,4
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_SHIFT 5
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_WIDTH 1
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_RESET_VAL 1
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_RMASK 0x1
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_MASK 0x20
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_FIELD 5,5
+#define UART_INTERRUPT_MASK__TFIFO_RE_SHIFT 7
+#define UART_INTERRUPT_MASK__TFIFO_RE_WIDTH 1
+#define UART_INTERRUPT_MASK__TFIFO_RE_RESET_VAL 1
+#define UART_INTERRUPT_MASK__TFIFO_RE_RMASK 0x1
+#define UART_INTERRUPT_MASK__TFIFO_RE_MASK 0x80
+#define UART_INTERRUPT_MASK__TFIFO_RE_FIELD 7,7
+#define UART_INTERRUPT_MASK__RFIFO_WE_SHIFT 8
+#define UART_INTERRUPT_MASK__RFIFO_WE_WIDTH 1
+#define UART_INTERRUPT_MASK__RFIFO_WE_RESET_VAL 1
+#define UART_INTERRUPT_MASK__RFIFO_WE_RMASK 0x1
+#define UART_INTERRUPT_MASK__RFIFO_WE_MASK 0x100
+#define UART_INTERRUPT_MASK__RFIFO_WE_FIELD 8,8
+#define UART_INTERRUPT_MASK__WFIFO_RE_SHIFT 9
+#define UART_INTERRUPT_MASK__WFIFO_RE_WIDTH 1
+#define UART_INTERRUPT_MASK__WFIFO_RE_RESET_VAL 1
+#define UART_INTERRUPT_MASK__WFIFO_RE_RMASK 0x1
+#define UART_INTERRUPT_MASK__WFIFO_RE_MASK 0x200
+#define UART_INTERRUPT_MASK__WFIFO_RE_FIELD 9,9
+#define UART_INTERRUPT_MASK__RFIFO_ERR_SHIFT 10
+#define UART_INTERRUPT_MASK__RFIFO_ERR_WIDTH 1
+#define UART_INTERRUPT_MASK__RFIFO_ERR_RESET_VAL 1
+#define UART_INTERRUPT_MASK__RFIFO_ERR_RMASK 0x1
+#define UART_INTERRUPT_MASK__RFIFO_ERR_MASK 0x400
+#define UART_INTERRUPT_MASK__RFIFO_ERR_FIELD 10,10
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_SHIFT 11
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_WIDTH 1
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_RESET_VAL 1
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_RMASK 0x1
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_MASK 0x800
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_FIELD 11,11
+#define UART_INTERRUPT_STATUS 0x0200
+#define UART_RECEIVE_DATA 0x0148
+#define UART_TRANSMIT_DATA 0x0140
+#define UART_TYPE 0x0160
+#define UART_TYPE__SBITS_SHIFT 0
+#define UART_TYPE__SBITS_WIDTH 1
+#define UART_TYPE__SBITS_RESET_VAL 1
+#define UART_TYPE__SBITS_RMASK 0x1
+#define UART_TYPE__SBITS_MASK 0x1
+#define UART_TYPE__SBITS_FIELD 0,0
+#define UART_TYPE__SBITS_VAL_ONE_SBITS 0x0
+#define UART_TYPE__SBITS_VAL_TWO_SBITS 0x1
+#define UART_TYPE__DBITS_SHIFT 2
+#define UART_TYPE__DBITS_WIDTH 1
+#define UART_TYPE__DBITS_RESET_VAL 0
+#define UART_TYPE__DBITS_RMASK 0x1
+#define UART_TYPE__DBITS_MASK 0x4
+#define UART_TYPE__DBITS_FIELD 2,2
+#define UART_TYPE__DBITS_VAL_EIGHT_DBITS 0x0
+#define UART_TYPE__DBITS_VAL_SEVEN_DBITS 0x1
+#define UART_TYPE__PTYPE_SHIFT 4
+#define UART_TYPE__PTYPE_WIDTH 3
+#define UART_TYPE__PTYPE_RESET_VAL 3
+#define UART_TYPE__PTYPE_RMASK 0x7
+#define UART_TYPE__PTYPE_MASK 0x70
+#define UART_TYPE__PTYPE_FIELD 4,6
+#define UART_TYPE__PTYPE_VAL_NONE 0x0
+#define UART_TYPE__PTYPE_VAL_MARK 0x1
+#define UART_TYPE__PTYPE_VAL_SPACE 0x2
+#define UART_TYPE__PTYPE_VAL_EVEN 0x3
+#define UART_TYPE__PTYPE_VAL_ODD 0x4
+#endif /* !defined(__ARCH_UART_DEF_H__) */
diff --git a/arch/tile/include/arch/usb_host.h b/arch/tile/include/arch/usb_host.h
new file mode 100644
index 00000000000..d09f3268396
--- /dev/null
+++ b/arch/tile/include/arch/usb_host.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_USB_HOST_H__
+#define __ARCH_USB_HOST_H__
+
+#include <arch/abi.h>
+#include <arch/usb_host_def.h>
+
+#ifndef __ASSEMBLER__
+#endif /* !defined(__ASSEMBLER__) */
+
+#endif /* !defined(__ARCH_USB_HOST_H__) */
diff --git a/arch/tile/include/arch/usb_host_def.h b/arch/tile/include/arch/usb_host_def.h
new file mode 100644
index 00000000000..aeed7753e8e
--- /dev/null
+++ b/arch/tile/include/arch/usb_host_def.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_USB_HOST_DEF_H__
+#define __ARCH_USB_HOST_DEF_H__
+#endif /* !defined(__ARCH_USB_HOST_DEF_H__) */
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 0bb42642343..0aa5675e702 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -1,33 +1,32 @@
-include include/asm-generic/Kbuild.asm
header-y += ../arch/
-header-y += ucontext.h
-header-y += hardwall.h
-
generic-y += bug.h
generic-y += bugs.h
+generic-y += clkdev.h
generic-y += cputime.h
-generic-y += device.h
generic-y += div64.h
generic-y += emergency-restart.h
generic-y += errno.h
+generic-y += exec.h
generic-y += fb.h
generic-y += fcntl.h
+generic-y += hash.h
+generic-y += hw_irq.h
generic-y += ioctl.h
generic-y += ioctls.h
-generic-y += ipc.h
generic-y += ipcbuf.h
generic-y += irq_regs.h
-generic-y += kdebug.h
generic-y += local.h
-generic-y += module.h
+generic-y += local64.h
+generic-y += mcs_spinlock.h
generic-y += msgbuf.h
generic-y += mutex.h
generic-y += param.h
generic-y += parport.h
generic-y += poll.h
generic-y += posix_types.h
+generic-y += preempt.h
generic-y += resource.h
generic-y += scatterlist.h
generic-y += sembuf.h
@@ -39,6 +38,6 @@ generic-y += sockios.h
generic-y += statfs.h
generic-y += termbits.h
generic-y += termios.h
+generic-y += trace_clock.h
generic-y += types.h
-generic-y += ucontext.h
generic-y += xor.h
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index 921dbeb8a70..70979846076 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -17,10 +17,12 @@
#ifndef _ASM_TILE_ATOMIC_H
#define _ASM_TILE_ATOMIC_H
+#include <asm/cmpxchg.h>
+
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
-#include <asm/system.h>
+#include <linux/types.h>
#define ATOMIC_INIT(i) { (i) }
@@ -112,6 +114,32 @@ static inline int atomic_read(const atomic_t *v)
#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
/**
+ * atomic_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline int atomic_xchg(atomic_t *v, int n)
+{
+ return xchg(&v->counter, n);
+}
+
+/**
+ * atomic_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+ return cmpxchg(&v->counter, o, n);
+}
+
+/**
* atomic_add_negative - add and test if negative
* @v: pointer of type atomic_t
* @i: integer value to add
@@ -121,54 +149,6 @@ static inline int atomic_read(const atomic_t *v)
*/
#define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0)
-/* Nonexistent functions intended to cause link errors. */
-extern unsigned long __xchg_called_with_bad_pointer(void);
-extern unsigned long __cmpxchg_called_with_bad_pointer(void);
-
-#define xchg(ptr, x) \
- ({ \
- typeof(*(ptr)) __x; \
- switch (sizeof(*(ptr))) { \
- case 4: \
- __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \
- (atomic_t *)(ptr), \
- (u32)(typeof((x)-(x)))(x)); \
- break; \
- case 8: \
- __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \
- (atomic64_t *)(ptr), \
- (u64)(typeof((x)-(x)))(x)); \
- break; \
- default: \
- __xchg_called_with_bad_pointer(); \
- } \
- __x; \
- })
-
-#define cmpxchg(ptr, o, n) \
- ({ \
- typeof(*(ptr)) __x; \
- switch (sizeof(*(ptr))) { \
- case 4: \
- __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \
- (atomic_t *)(ptr), \
- (u32)(typeof((o)-(o)))(o), \
- (u32)(typeof((n)-(n)))(n)); \
- break; \
- case 8: \
- __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \
- (atomic64_t *)(ptr), \
- (u64)(typeof((o)-(o)))(o), \
- (u64)(typeof((n)-(n)))(n)); \
- break; \
- default: \
- __cmpxchg_called_with_bad_pointer(); \
- } \
- __x; \
- })
-
-#define tas(ptr) (xchg((ptr), 1))
-
#endif /* __ASSEMBLY__ */
#ifndef __tilegx__
@@ -177,4 +157,52 @@ extern unsigned long __cmpxchg_called_with_bad_pointer(void);
#include <asm/atomic_64.h>
#endif
+#ifndef __ASSEMBLY__
+
+/**
+ * atomic64_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic64_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline long long atomic64_xchg(atomic64_t *v, long long n)
+{
+ return xchg64(&v->counter, n);
+}
+
+/**
+ * atomic64_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic64_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline long long atomic64_cmpxchg(atomic64_t *v, long long o,
+ long long n)
+{
+ return cmpxchg64(&v->counter, o, n);
+}
+
+static inline long long atomic64_dec_if_positive(atomic64_t *v)
+{
+ long long c, old, dec;
+
+ c = atomic64_read(v);
+ for (;;) {
+ dec = c - 1;
+ if (unlikely(dec < 0))
+ break;
+ old = atomic64_cmpxchg((v), c, dec);
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return dec;
+}
+
+#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_TILE_ATOMIC_H */
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index c03349e0ca9..1b109fad9ff 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -17,44 +17,11 @@
#ifndef _ASM_TILE_ATOMIC_32_H
#define _ASM_TILE_ATOMIC_32_H
+#include <asm/barrier.h>
#include <arch/chip.h>
#ifndef __ASSEMBLY__
-/* Tile-specific routines to support <linux/atomic.h>. */
-int _atomic_xchg(atomic_t *v, int n);
-int _atomic_xchg_add(atomic_t *v, int i);
-int _atomic_xchg_add_unless(atomic_t *v, int a, int u);
-int _atomic_cmpxchg(atomic_t *v, int o, int n);
-
-/**
- * atomic_xchg - atomically exchange contents of memory with a new value
- * @v: pointer of type atomic_t
- * @i: integer value to store in memory
- *
- * Atomically sets @v to @i and returns old @v
- */
-static inline int atomic_xchg(atomic_t *v, int n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic_xchg(v, n);
-}
-
-/**
- * atomic_cmpxchg - atomically exchange contents of memory if it matches
- * @v: pointer of type atomic_t
- * @o: old value that memory should have
- * @n: new value to write to memory if it matches
- *
- * Atomically checks if @v holds @o and replaces it with @n if so.
- * Returns the old value at @v.
- */
-static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic_cmpxchg(v, o, n);
-}
-
/**
* atomic_add - add integer to atomic variable
* @i: integer value to add
@@ -64,7 +31,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
*/
static inline void atomic_add(int i, atomic_t *v)
{
- _atomic_xchg_add(v, i);
+ _atomic_xchg_add(&v->counter, i);
}
/**
@@ -77,7 +44,7 @@ static inline void atomic_add(int i, atomic_t *v)
static inline int atomic_add_return(int i, atomic_t *v)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic_xchg_add(v, i) + i;
+ return _atomic_xchg_add(&v->counter, i) + i;
}
/**
@@ -92,7 +59,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
static inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic_xchg_add_unless(v, a, u);
+ return _atomic_xchg_add_unless(&v->counter, a, u);
}
/**
@@ -107,64 +74,31 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
*/
static inline void atomic_set(atomic_t *v, int n)
{
- _atomic_xchg(v, n);
+ _atomic_xchg(&v->counter, n);
}
/* A 64bit atomic type */
typedef struct {
- u64 __aligned(8) counter;
+ long long counter;
} atomic64_t;
#define ATOMIC64_INIT(val) { (val) }
-u64 _atomic64_xchg(atomic64_t *v, u64 n);
-u64 _atomic64_xchg_add(atomic64_t *v, u64 i);
-u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u);
-u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n);
-
/**
* atomic64_read - read atomic variable
* @v: pointer of type atomic64_t
*
* Atomically reads the value of @v.
*/
-static inline u64 atomic64_read(const atomic64_t *v)
+static inline long long atomic64_read(const atomic64_t *v)
{
/*
* Requires an atomic op to read both 32-bit parts consistently.
* Casting away const is safe since the atomic support routines
* do not write to memory if the value has not been modified.
*/
- return _atomic64_xchg_add((atomic64_t *)v, 0);
-}
-
-/**
- * atomic64_xchg - atomically exchange contents of memory with a new value
- * @v: pointer of type atomic64_t
- * @i: integer value to store in memory
- *
- * Atomically sets @v to @i and returns old @v
- */
-static inline u64 atomic64_xchg(atomic64_t *v, u64 n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic64_xchg(v, n);
-}
-
-/**
- * atomic64_cmpxchg - atomically exchange contents of memory if it matches
- * @v: pointer of type atomic64_t
- * @o: old value that memory should have
- * @n: new value to write to memory if it matches
- *
- * Atomically checks if @v holds @o and replaces it with @n if so.
- * Returns the old value at @v.
- */
-static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic64_cmpxchg(v, o, n);
+ return _atomic64_xchg_add((long long *)&v->counter, 0);
}
/**
@@ -174,9 +108,9 @@ static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
*
* Atomically adds @i to @v.
*/
-static inline void atomic64_add(u64 i, atomic64_t *v)
+static inline void atomic64_add(long long i, atomic64_t *v)
{
- _atomic64_xchg_add(v, i);
+ _atomic64_xchg_add(&v->counter, i);
}
/**
@@ -186,10 +120,10 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
*
* Atomically adds @i to @v and returns @i + @v
*/
-static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic64_xchg_add(v, i) + i;
+ return _atomic64_xchg_add(&v->counter, i) + i;
}
/**
@@ -199,12 +133,13 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
* @u: ...unless v is equal to u.
*
* Atomically adds @a to @v, so long as @v was not already @u.
- * Returns the old value of @v.
+ * Returns non-zero if @v was not @u, and zero otherwise.
*/
-static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
+static inline long long atomic64_add_unless(atomic64_t *v, long long a,
+ long long u)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic64_xchg_add_unless(v, a, u) != u;
+ return _atomic64_xchg_add_unless(&v->counter, a, u) != u;
}
/**
@@ -217,9 +152,9 @@ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
* atomic64_set() can't be just a raw store, since it would be lost if it
* fell between the load and store of one of the other atomic ops.
*/
-static inline void atomic64_set(atomic64_t *v, u64 n)
+static inline void atomic64_set(atomic64_t *v, long long n)
{
- _atomic64_xchg(v, n);
+ _atomic64_xchg(&v->counter, n);
}
#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
@@ -234,16 +169,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
-/*
- * We need to barrier before modifying the word, since the _atomic_xxx()
- * routines just tns the lock and then read/modify/write of the word.
- * But after the word is updated, the routine issues an "mf" before returning,
- * and since it's a function call, we don't even need a compiler barrier.
- */
-#define smp_mb__before_atomic_dec() smp_mb()
-#define smp_mb__before_atomic_inc() smp_mb()
-#define smp_mb__after_atomic_dec() do { } while (0)
-#define smp_mb__after_atomic_inc() do { } while (0)
#endif /* !__ASSEMBLY__ */
@@ -251,21 +176,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
* Internal definitions only beyond this point.
*/
-#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \
- (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP))
-
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
-
-/* Number of entries in atomic_lock_ptr[]. */
-#define ATOMIC_HASH_L1_SHIFT 6
-#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT)
-
-/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */
-#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2)
-#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT)
-
-#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/*
* Number of atomic locks in atomic_locks[]. Must be a power of two.
* There is no reason for more than PAGE_SIZE / 8 entries, since that
@@ -280,8 +190,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
extern int atomic_locks[];
#endif
-#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/*
* All the code that may fault while holding an atomic lock must
* place the pointer to the lock in ATOMIC_LOCK_REG so the fault code
@@ -302,7 +210,14 @@ void __init_atomic_per_cpu(void);
void __atomic_fault_unlock(int *lock_ptr);
#endif
+/* Return a pointer to the lock for the given address. */
+int *__atomic_hashed_lock(volatile void *v);
+
/* Private helper routines in lib/atomic_asm_32.S */
+struct __get_user {
+ unsigned long val;
+ int err;
+};
extern struct __get_user __atomic_cmpxchg(volatile int *p,
int *lock, int o, int n);
extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
@@ -312,11 +227,16 @@ extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
-extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n);
-extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n);
-extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n);
-extern u64 __atomic64_xchg_add_unless(volatile u64 *p,
- int *lock, u64 o, u64 n);
+extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
+ long long o, long long n);
+extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_xchg_add(volatile long long *p, int *lock,
+ long long n);
+extern long long __atomic64_xchg_add_unless(volatile long long *p,
+ int *lock, long long o, long long n);
+
+/* Return failure from the atomic wrappers. */
+struct __get_user __atomic_bad_address(int __user *addr);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 27fe667fddf..7b11c5fadd4 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -19,6 +19,7 @@
#ifndef __ASSEMBLY__
+#include <asm/barrier.h>
#include <arch/spr_def.h>
/* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */
@@ -31,25 +32,6 @@
* on any routine which updates memory and returns a value.
*/
-static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
-{
- int val;
- __insn_mtspr(SPR_CMPEXCH_VALUE, o);
- smp_mb(); /* barrier for proper semantics */
- val = __insn_cmpexch4((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
-static inline int atomic_xchg(atomic_t *v, int n)
-{
- int val;
- smp_mb(); /* barrier for proper semantics */
- val = __insn_exch4((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
static inline void atomic_add(int i, atomic_t *v)
{
__insn_fetchadd4((void *)&v->counter, i);
@@ -71,7 +53,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
if (oldval == u)
break;
guess = oldval;
- oldval = atomic_cmpxchg(v, guess, guess + a);
+ oldval = cmpxchg(&v->counter, guess, guess + a);
} while (guess != oldval);
return oldval;
}
@@ -83,25 +65,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
#define atomic64_read(v) ((v)->counter)
#define atomic64_set(v, i) ((v)->counter = (i))
-static inline long atomic64_cmpxchg(atomic64_t *v, long o, long n)
-{
- long val;
- smp_mb(); /* barrier for proper semantics */
- __insn_mtspr(SPR_CMPEXCH_VALUE, o);
- val = __insn_cmpexch((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
-static inline long atomic64_xchg(atomic64_t *v, long n)
-{
- long val;
- smp_mb(); /* barrier for proper semantics */
- val = __insn_exch((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
static inline void atomic64_add(long i, atomic64_t *v)
{
__insn_fetchadd((void *)&v->counter, i);
@@ -123,7 +86,7 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
if (oldval == u)
break;
guess = oldval;
- oldval = atomic64_cmpxchg(v, guess, guess + a);
+ oldval = cmpxchg(&v->counter, guess, guess + a);
} while (guess != oldval);
return oldval != u;
}
@@ -142,12 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-/* Atomic dec and inc don't implement barrier, so provide them if needed. */
-#define smp_mb__before_atomic_dec() smp_mb()
-#define smp_mb__after_atomic_dec() smp_mb()
-#define smp_mb__before_atomic_inc() smp_mb()
-#define smp_mb__after_atomic_inc() smp_mb()
-
/* Define this to indicate that cmpxchg is an efficient operation. */
#define __HAVE_ARCH_CMPXCHG
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
new file mode 100644
index 00000000000..96a42ae79f4
--- /dev/null
+++ b/arch/tile/include/asm/barrier.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_BARRIER_H
+#define _ASM_TILE_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <arch/chip.h>
+#include <arch/spr_def.h>
+#include <asm/timex.h>
+
+#define __sync() __insn_mf()
+
+#include <hv/syscall_public.h>
+/*
+ * Issue an uncacheable load to each memory controller, then
+ * wait until those loads have completed.
+ */
+static inline void __mb_incoherent(void)
+{
+ long clobber_r10;
+ asm volatile("swint2"
+ : "=R10" (clobber_r10)
+ : "R10" (HV_SYS_fence_incoherent)
+ : "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "r8", "r9",
+ "r11", "r12", "r13", "r14",
+ "r15", "r16", "r17", "r18", "r19",
+ "r20", "r21", "r22", "r23", "r24",
+ "r25", "r26", "r27", "r28", "r29");
+}
+
+/* Fence to guarantee visibility of stores to incoherent memory. */
+static inline void
+mb_incoherent(void)
+{
+ __insn_mf();
+
+ {
+#if CHIP_HAS_TILE_WRITE_PENDING()
+ const unsigned long WRITE_TIMEOUT_CYCLES = 400;
+ unsigned long start = get_cycles_low();
+ do {
+ if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0)
+ return;
+ } while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES);
+#endif /* CHIP_HAS_TILE_WRITE_PENDING() */
+ (void) __mb_incoherent();
+ }
+}
+
+#define fast_wmb() __sync()
+#define fast_rmb() __sync()
+#define fast_mb() __sync()
+#define fast_iob() mb_incoherent()
+
+#define wmb() fast_wmb()
+#define rmb() fast_rmb()
+#define mb() fast_mb()
+#define iob() fast_iob()
+
+#ifndef __tilegx__ /* 32 bit */
+/*
+ * We need to barrier before modifying the word, since the _atomic_xxx()
+ * routines just tns the lock and then read/modify/write of the word.
+ * But after the word is updated, the routine issues an "mf" before returning,
+ * and since it's a function call, we don't even need a compiler barrier.
+ */
+#define smp_mb__before_atomic() smp_mb()
+#define smp_mb__after_atomic() do { } while (0)
+#else /* 64 bit */
+#define smp_mb__before_atomic() smp_mb()
+#define smp_mb__after_atomic() smp_mb()
+#endif
+
+#include <asm-generic/barrier.h>
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_TILE_BARRIER_H */
diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h
index 16f1fa51fea..20caa346ac0 100644
--- a/arch/tile/include/asm/bitops.h
+++ b/arch/tile/include/asm/bitops.h
@@ -17,6 +17,7 @@
#define _ASM_TILE_BITOPS_H
#include <linux/types.h>
+#include <asm/barrier.h>
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
@@ -29,17 +30,6 @@
#endif
/**
- * __ffs - find first set bit in word
- * @word: The word to search
- *
- * Undefined if no set bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
- return __builtin_ctzl(word);
-}
-
-/**
* ffz - find first zero bit in word
* @word: The word to search
*
@@ -50,31 +40,9 @@ static inline unsigned long ffz(unsigned long word)
return __builtin_ctzl(~word);
}
-/**
- * __fls - find last set bit in word
- * @word: The word to search
- *
- * Undefined if no set bit exists, so code should check against 0 first.
- */
-static inline unsigned long __fls(unsigned long word)
-{
- return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
-}
-
-/**
- * ffs - find first set bit in word
- * @x: the word to search
- *
- * This is defined the same way as the libc and compiler builtin ffs
- * routines, therefore differs in spirit from the other bitops.
- *
- * ffs(value) returns 0 if value is 0 or the position of the first
- * set bit if value is nonzero. The first (least significant) bit
- * is at position 1.
- */
-static inline int ffs(int x)
+static inline int fls64(__u64 w)
{
- return __builtin_ffs(x);
+ return (sizeof(__u64) * 8) - __builtin_clzll(w);
}
/**
@@ -90,12 +58,7 @@ static inline int ffs(int x)
*/
static inline int fls(int x)
{
- return (sizeof(int) * 8) - __builtin_clz(x);
-}
-
-static inline int fls64(__u64 w)
-{
- return (sizeof(__u64) * 8) - __builtin_clzll(w);
+ return fls64((unsigned int) x);
}
static inline unsigned int __arch_hweight32(unsigned int w)
@@ -118,6 +81,9 @@ static inline unsigned long __arch_hweight64(__u64 w)
return __builtin_popcountll(w);
}
+#include <asm-generic/bitops/builtin-__ffs.h>
+#include <asm-generic/bitops/builtin-__fls.h>
+#include <asm-generic/bitops/builtin-ffs.h>
#include <asm-generic/bitops/const_hweight.h>
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/find.h>
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
index 571b118bfd9..bbf7b666f21 100644
--- a/arch/tile/include/asm/bitops_32.h
+++ b/arch/tile/include/asm/bitops_32.h
@@ -16,8 +16,7 @@
#define _ASM_TILE_BITOPS_32_H
#include <linux/compiler.h>
-#include <linux/atomic.h>
-#include <asm/system.h>
+#include <asm/barrier.h>
/* Tile-specific routines to support <asm/bitops.h>. */
unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
@@ -50,8 +49,8 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr)
* restricted to acting on a single-word quantity.
*
* clear_bit() may not contain a memory barrier, so if it is used for
- * locking purposes, you should call smp_mb__before_clear_bit() and/or
- * smp_mb__after_clear_bit() to ensure changes are visible on other cpus.
+ * locking purposes, you should call smp_mb__before_atomic() and/or
+ * smp_mb__after_atomic() to ensure changes are visible on other cpus.
*/
static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
{
@@ -122,10 +121,6 @@ static inline int test_and_change_bit(unsigned nr,
return (_atomic_xor(addr, mask) & mask) != 0;
}
-/* See discussion at smp_mb__before_atomic_dec() in <asm/atomic_32.h>. */
-#define smp_mb__before_clear_bit() smp_mb()
-#define smp_mb__after_clear_bit() do {} while (0)
-
#include <asm-generic/bitops/ext2-atomic.h>
#endif /* _ASM_TILE_BITOPS_32_H */
diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h
index e9c8e381ee0..bb1a29221fc 100644
--- a/arch/tile/include/asm/bitops_64.h
+++ b/arch/tile/include/asm/bitops_64.h
@@ -16,8 +16,7 @@
#define _ASM_TILE_BITOPS_64_H
#include <linux/compiler.h>
-#include <linux/atomic.h>
-#include <asm/system.h>
+#include <asm/cmpxchg.h>
/* See <asm/bitops.h> for API comments. */
@@ -33,20 +32,15 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
__insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask);
}
-#define smp_mb__before_clear_bit() smp_mb()
-#define smp_mb__after_clear_bit() smp_mb()
-
-
static inline void change_bit(unsigned nr, volatile unsigned long *addr)
{
- unsigned long old, mask = (1UL << (nr % BITS_PER_LONG));
- long guess, oldval;
+ unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+ unsigned long guess, oldval;
addr += nr / BITS_PER_LONG;
- old = *addr;
+ oldval = *addr;
do {
guess = oldval;
- oldval = atomic64_cmpxchg((atomic64_t *)addr,
- guess, guess ^ mask);
+ oldval = cmpxchg(addr, guess, guess ^ mask);
} while (guess != oldval);
}
@@ -86,13 +80,12 @@ static inline int test_and_change_bit(unsigned nr,
volatile unsigned long *addr)
{
unsigned long mask = (1UL << (nr % BITS_PER_LONG));
- long guess, oldval = *addr;
+ unsigned long guess, oldval;
addr += nr / BITS_PER_LONG;
oldval = *addr;
do {
guess = oldval;
- oldval = atomic64_cmpxchg((atomic64_t *)addr,
- guess, guess ^ mask);
+ oldval = cmpxchg(addr, guess, guess ^ mask);
} while (guess != oldval);
return (oldval & mask) != 0;
}
diff --git a/arch/tile/include/asm/byteorder.h b/arch/tile/include/asm/byteorder.h
deleted file mode 100644
index 9558416d578..00000000000
--- a/arch/tile/include/asm/byteorder.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/byteorder/little_endian.h>
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
index 392e5333dd8..6160761d5f6 100644
--- a/arch/tile/include/asm/cache.h
+++ b/arch/tile/include/asm/cache.h
@@ -27,11 +27,17 @@
#define L2_CACHE_ALIGN(x) (((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES)
/*
- * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN.
+ * TILEPro I/O is not always coherent (networking typically uses coherent
+ * I/O, but PCI traffic does not) and setting ARCH_DMA_MINALIGN to the
+ * L2 cacheline size helps ensure that kernel heap allocations are aligned.
+ * TILE-Gx I/O is always coherent when used on hash-for-home pages.
+ *
+ * However, it's possible at runtime to request not to use hash-for-home
+ * for the kernel heap, in which case the kernel will use flush-and-inval
+ * to manage coherence. As a result, we use L2_CACHE_BYTES for the
+ * DMA minimum alignment to avoid false sharing in the kernel heap.
*/
-#ifndef __tilegx__
#define ARCH_DMA_MINALIGN L2_CACHE_BYTES
-#endif
/* use the cache line size for the L2, which is where it counts */
#define SMP_CACHE_BYTES_SHIFT L2_CACHE_SHIFT
@@ -43,9 +49,16 @@
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
/*
- * Attribute for data that is kept read/write coherent until the end of
- * initialization, then bumped to read/only incoherent for performance.
+ * Originally we used small TLB pages for kernel data and grouped some
+ * things together as "write once", enforcing the property at the end
+ * of initialization by making those pages read-only and non-coherent.
+ * This allowed better cache utilization since cache inclusion did not
+ * need to be maintained. However, to do this requires an extra TLB
+ * entry, which on balance is more of a performance hit than the
+ * non-coherence is a performance gain, so we now just make "read
+ * mostly" and "write once" be synonyms. We keep the attribute
+ * separate in case we change our minds at a future date.
*/
-#define __write_once __attribute__((__section__(".w1data")))
+#define __write_once __read_mostly
#endif /* _ASM_TILE_CACHE_H */
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
index e925f4bb498..92ee4c8a4f7 100644
--- a/arch/tile/include/asm/cacheflush.h
+++ b/arch/tile/include/asm/cacheflush.h
@@ -20,7 +20,6 @@
/* Keep includes the same across arches. */
#include <linux/mm.h>
#include <linux/cache.h>
-#include <asm/system.h>
#include <arch/icache.h>
/* Caches are physically-indexed and so don't need special treatment */
@@ -76,23 +75,6 @@ static inline void copy_to_user_page(struct vm_area_struct *vma,
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
memcpy((dst), (src), (len))
-/*
- * Invalidate a VA range; pads to L2 cacheline boundaries.
- *
- * Note that on TILE64, __inv_buffer() actually flushes modified
- * cache lines in addition to invalidating them, i.e., it's the
- * same as __finv_buffer().
- */
-static inline void __inv_buffer(void *buffer, size_t size)
-{
- char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
- char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
- while (next < finish) {
- __insn_inv(next);
- next += CHIP_INV_STRIDE();
- }
-}
-
/* Flush a VA range; pads to L2 cacheline boundaries. */
static inline void __flush_buffer(void *buffer, size_t size)
{
@@ -116,13 +98,6 @@ static inline void __finv_buffer(void *buffer, size_t size)
}
-/* Invalidate a VA range and wait for it to be complete. */
-static inline void inv_buffer(void *buffer, size_t size)
-{
- __inv_buffer(buffer, size);
- mb();
-}
-
/*
* Flush a locally-homecached VA range and wait for the evicted
* cachelines to hit memory.
@@ -143,6 +118,26 @@ static inline void finv_buffer_local(void *buffer, size_t size)
mb_incoherent();
}
+#ifdef __tilepro__
+/* Invalidate a VA range; pads to L2 cacheline boundaries. */
+static inline void __inv_buffer(void *buffer, size_t size)
+{
+ char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+ char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+ while (next < finish) {
+ __insn_inv(next);
+ next += CHIP_INV_STRIDE();
+ }
+}
+
+/* Invalidate a VA range and wait for it to be complete. */
+static inline void inv_buffer(void *buffer, size_t size)
+{
+ __inv_buffer(buffer, size);
+ mb();
+}
+#endif
+
/*
* Flush and invalidate a VA range that is homed remotely, waiting
* until the memory controller holds the flushed values. If "hfh" is
@@ -152,4 +147,14 @@ static inline void finv_buffer_local(void *buffer, size_t size)
*/
void finv_buffer_remote(void *buffer, size_t size, int hfh);
+/*
+ * On SMP systems, when the scheduler does migration-cost autodetection,
+ * it needs a way to flush as much of the CPU's caches as possible:
+ *
+ * TODO: fill this in!
+ */
+static inline void sched_cacheflush(void)
+{
+}
+
#endif /* _ASM_TILE_CACHEFLUSH_H */
diff --git a/arch/tile/include/asm/checksum.h b/arch/tile/include/asm/checksum.h
index a120766c726..b21a2fdec9f 100644
--- a/arch/tile/include/asm/checksum.h
+++ b/arch/tile/include/asm/checksum.h
@@ -21,4 +21,22 @@
__wsum do_csum(const unsigned char *buff, int len);
#define do_csum do_csum
+/*
+ * Return the sum of all the 16-bit subwords in a long.
+ * This sums two subwords on a 32-bit machine, and four on 64 bits.
+ * The implementation does two vector adds to capture any overflow.
+ */
+static inline unsigned int csum_long(unsigned long x)
+{
+ unsigned long ret;
+#ifdef __tilegx__
+ ret = __insn_v2sadu(x, 0);
+ ret = __insn_v2sadu(ret, 0);
+#else
+ ret = __insn_sadh_u(x, 0);
+ ret = __insn_sadh_u(ret, 0);
+#endif
+ return ret;
+}
+
#endif /* _ASM_TILE_CHECKSUM_H */
diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h
new file mode 100644
index 00000000000..0ccda3c425b
--- /dev/null
+++ b/arch/tile/include/asm/cmpxchg.h
@@ -0,0 +1,134 @@
+/*
+ * cmpxchg.h -- forked from asm/atomic.h with this copyright:
+ *
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _ASM_TILE_CMPXCHG_H
+#define _ASM_TILE_CMPXCHG_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/barrier.h>
+
+/* Nonexistent functions intended to cause compile errors. */
+extern void __xchg_called_with_bad_pointer(void)
+ __compiletime_error("Bad argument size for xchg");
+extern void __cmpxchg_called_with_bad_pointer(void)
+ __compiletime_error("Bad argument size for cmpxchg");
+
+#ifndef __tilegx__
+
+/* Note the _atomic_xxx() routines include a final mb(). */
+int _atomic_xchg(int *ptr, int n);
+int _atomic_xchg_add(int *v, int i);
+int _atomic_xchg_add_unless(int *v, int a, int u);
+int _atomic_cmpxchg(int *ptr, int o, int n);
+long long _atomic64_xchg(long long *v, long long n);
+long long _atomic64_xchg_add(long long *v, long long i);
+long long _atomic64_xchg_add_unless(long long *v, long long a, long long u);
+long long _atomic64_cmpxchg(long long *v, long long o, long long n);
+
+#define xchg(ptr, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 4) \
+ __xchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic_xchg((int *)(ptr), (int)(n)); \
+ })
+
+#define cmpxchg(ptr, o, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 4) \
+ __cmpxchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o, \
+ (int)n); \
+ })
+
+#define xchg64(ptr, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 8) \
+ __xchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic64_xchg((long long *)(ptr), \
+ (long long)(n)); \
+ })
+
+#define cmpxchg64(ptr, o, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 8) \
+ __cmpxchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic64_cmpxchg((long long *)ptr, \
+ (long long)o, (long long)n); \
+ })
+
+#else
+
+#define xchg(ptr, n) \
+ ({ \
+ typeof(*(ptr)) __x; \
+ smp_mb(); \
+ switch (sizeof(*(ptr))) { \
+ case 4: \
+ __x = (typeof(__x))(unsigned long) \
+ __insn_exch4((ptr), \
+ (u32)(unsigned long)(n)); \
+ break; \
+ case 8: \
+ __x = (typeof(__x)) \
+ __insn_exch((ptr), (unsigned long)(n)); \
+ break; \
+ default: \
+ __xchg_called_with_bad_pointer(); \
+ break; \
+ } \
+ smp_mb(); \
+ __x; \
+ })
+
+#define cmpxchg(ptr, o, n) \
+ ({ \
+ typeof(*(ptr)) __x; \
+ __insn_mtspr(SPR_CMPEXCH_VALUE, (unsigned long)(o)); \
+ smp_mb(); \
+ switch (sizeof(*(ptr))) { \
+ case 4: \
+ __x = (typeof(__x))(unsigned long) \
+ __insn_cmpexch4((ptr), \
+ (u32)(unsigned long)(n)); \
+ break; \
+ case 8: \
+ __x = (typeof(__x))__insn_cmpexch((ptr), \
+ (long long)(n)); \
+ break; \
+ default: \
+ __cmpxchg_called_with_bad_pointer(); \
+ break; \
+ } \
+ smp_mb(); \
+ __x; \
+ })
+
+#define xchg64 xchg
+#define cmpxchg64 cmpxchg
+
+#endif
+
+#define tas(ptr) xchg((ptr), 1)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_CMPXCHG_H */
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index bf95f55b82b..ffd4493efc7 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -44,7 +44,6 @@ typedef __kernel_uid32_t __compat_gid32_t;
typedef __kernel_mode_t compat_mode_t;
typedef __kernel_dev_t compat_dev_t;
typedef __kernel_loff_t compat_loff_t;
-typedef __kernel_nlink_t compat_nlink_t;
typedef __kernel_ipc_pid_t compat_ipc_pid_t;
typedef __kernel_daddr_t compat_daddr_t;
typedef __kernel_fsid_t compat_fsid_t;
@@ -111,6 +110,68 @@ struct compat_flock64 {
typedef u32 compat_sigset_word;
+typedef union compat_sigval {
+ compat_int_t sival_int;
+ compat_uptr_t sival_ptr;
+} compat_sigval_t;
+
+#define COMPAT_SI_PAD_SIZE (128/sizeof(int) - 3)
+
+typedef struct compat_siginfo {
+ int si_signo;
+ int si_errno;
+ int si_code;
+
+ union {
+ int _pad[COMPAT_SI_PAD_SIZE];
+
+ /* kill() */
+ struct {
+ unsigned int _pid; /* sender's pid */
+ unsigned int _uid; /* sender's uid */
+ } _kill;
+
+ /* POSIX.1b timers */
+ struct {
+ compat_timer_t _tid; /* timer id */
+ int _overrun; /* overrun count */
+ compat_sigval_t _sigval; /* same as below */
+ int _sys_private; /* not to be passed to user */
+ int _overrun_incr; /* amount to add to overrun */
+ } _timer;
+
+ /* POSIX.1b signals */
+ struct {
+ unsigned int _pid; /* sender's pid */
+ unsigned int _uid; /* sender's uid */
+ compat_sigval_t _sigval;
+ } _rt;
+
+ /* SIGCHLD */
+ struct {
+ unsigned int _pid; /* which child */
+ unsigned int _uid; /* sender's uid */
+ int _status; /* exit code */
+ compat_clock_t _utime;
+ compat_clock_t _stime;
+ } _sigchld;
+
+ /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+ struct {
+ unsigned int _addr; /* faulting insn/memory ref. */
+#ifdef __ARCH_SI_TRAPNO
+ int _trapno; /* TRAP # which caused the signal */
+#endif
+ } _sigfault;
+
+ /* SIGPOLL */
+ struct {
+ int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+} compat_siginfo_t;
+
#define COMPAT_OFF_T_MAX 0x7fffffff
#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL
@@ -211,57 +272,26 @@ extern int compat_setup_rt_frame(int sig, struct k_sigaction *ka,
struct pt_regs *regs);
/* Compat syscalls. */
-struct compat_sigaction;
struct compat_siginfo;
struct compat_sigaltstack;
-long compat_sys_execve(const char __user *path,
- compat_uptr_t __user *argv,
- compat_uptr_t __user *envp, struct pt_regs *);
-long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
- struct compat_sigaction __user *oact,
- size_t sigsetsize);
-long compat_sys_rt_sigqueueinfo(int pid, int sig,
- struct compat_siginfo __user *uinfo);
-long compat_sys_rt_sigreturn(struct pt_regs *);
-long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
- struct compat_sigaltstack __user *uoss_ptr,
- struct pt_regs *);
+long compat_sys_rt_sigreturn(void);
long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high);
long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high);
long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count,
u32 dummy, u32 low, u32 high);
long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count,
u32 dummy, u32 low, u32 high);
-long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len);
long compat_sys_sync_file_range2(int fd, unsigned int flags,
u32 offset_lo, u32 offset_hi,
u32 nbytes_lo, u32 nbytes_hi);
long compat_sys_fallocate(int fd, int mode,
u32 offset_lo, u32 offset_hi,
u32 len_lo, u32 len_hi);
-long compat_sys_sched_rr_get_interval(compat_pid_t pid,
- struct compat_timespec __user *interval);
-
-/* Versions of compat functions that differ from generic Linux. */
-struct compat_msgbuf;
-long tile_compat_sys_msgsnd(int msqid,
- struct compat_msgbuf __user *msgp,
- size_t msgsz, int msgflg);
-long tile_compat_sys_msgrcv(int msqid,
- struct compat_msgbuf __user *msgp,
- size_t msgsz, long msgtyp, int msgflg);
-long tile_compat_sys_ptrace(compat_long_t request, compat_long_t pid,
- compat_long_t addr, compat_long_t data);
-
-/* Tilera Linux syscalls that don't have "compat" versions. */
-#define compat_sys_flush_cache sys_flush_cache
-
-/* These are the intvec_64.S trampolines. */
-long _compat_sys_execve(const char __user *path,
- const compat_uptr_t __user *argv,
- const compat_uptr_t __user *envp);
-long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
- struct compat_sigaltstack __user *uoss_ptr);
+long compat_sys_llseek(unsigned int fd, unsigned int offset_high,
+ unsigned int offset_low, loff_t __user * result,
+ unsigned int origin);
+
+/* Assembly trampoline to avoid clobbering r0. */
long _compat_sys_rt_sigreturn(void);
#endif /* _ASM_TILE_COMPAT_H */
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
new file mode 100644
index 00000000000..6ab8bf146d4
--- /dev/null
+++ b/arch/tile/include/asm/device.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ * Arch specific extensions to struct device
+ */
+
+#ifndef _ASM_TILE_DEVICE_H
+#define _ASM_TILE_DEVICE_H
+
+struct dev_archdata {
+ /* DMA operations on that device */
+ struct dma_map_ops *dma_ops;
+
+ /* Offset of the DMA address from the PA. */
+ dma_addr_t dma_offset;
+
+ /*
+ * Highest DMA address that can be generated by devices that
+ * have limited DMA capability, i.e. non 64-bit capable.
+ */
+ dma_addr_t max_direct_dma_addr;
+};
+
+struct pdev_archdata {
+};
+
+#endif /* _ASM_TILE_DEVICE_H */
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index eaa06d175b3..1eae359d831 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -20,69 +20,94 @@
#include <linux/cache.h>
#include <linux/io.h>
-/*
- * Note that on x86 and powerpc, there is a "struct dma_mapping_ops"
- * that is used for all the DMA operations. For now, we don't have an
- * equivalent on tile, because we only have a single way of doing DMA.
- * (Tilera bug 7994 to use dma_mapping_ops.)
- */
+#ifdef __tilegx__
+#define ARCH_HAS_DMA_GET_REQUIRED_MASK
+#endif
+
+extern struct dma_map_ops *tile_dma_map_ops;
+extern struct dma_map_ops *gx_pci_dma_map_ops;
+extern struct dma_map_ops *gx_legacy_pci_dma_map_ops;
+extern struct dma_map_ops *gx_hybrid_pci_dma_map_ops;
+
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+{
+ if (dev && dev->archdata.dma_ops)
+ return dev->archdata.dma_ops;
+ else
+ return tile_dma_map_ops;
+}
+
+static inline dma_addr_t get_dma_offset(struct device *dev)
+{
+ return dev->archdata.dma_offset;
+}
+
+static inline void set_dma_offset(struct device *dev, dma_addr_t off)
+{
+ dev->archdata.dma_offset = off;
+}
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return paddr;
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+ return daddr;
+}
+
+static inline void dma_mark_clean(void *addr, size_t size) {}
+
+#include <asm-generic/dma-mapping-common.h>
+
+static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
+{
+ dev->archdata.dma_ops = ops;
+}
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
- enum dma_data_direction);
-extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction);
-extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction);
-extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nhwentries, enum dma_data_direction);
-extern dma_addr_t dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction);
-extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
- size_t size, enum dma_data_direction);
-extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction);
-extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction);
-
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag);
-
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle);
-
-extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t,
- enum dma_data_direction);
-extern void dma_sync_single_for_device(struct device *, dma_addr_t,
- size_t, enum dma_data_direction);
-extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t,
- unsigned long offset, size_t,
- enum dma_data_direction);
-extern void dma_sync_single_range_for_device(struct device *, dma_addr_t,
- unsigned long offset, size_t,
- enum dma_data_direction);
-extern void dma_cache_sync(struct device *dev, void *vaddr, size_t,
- enum dma_data_direction);
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+ if (!dev->dma_mask)
+ return 0;
+
+ return addr + size - 1 <= *dev->dma_mask;
+}
static inline int
dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
- return 0;
+ debug_dma_mapping_error(dev, dma_addr);
+ return get_dma_ops(dev)->mapping_error(dev, dma_addr);
}
static inline int
dma_supported(struct device *dev, u64 mask)
{
- return 1;
+ return get_dma_ops(dev)->dma_supported(dev, mask);
}
static inline int
dma_set_mask(struct device *dev, u64 mask)
{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ /*
+ * For PCI devices with 64-bit DMA addressing capability, promote
+ * the dma_ops to hybrid, with the consistent memory DMA space limited
+ * to 32-bit. For 32-bit capable devices, limit the streaming DMA
+ * address range to max_direct_dma_addr.
+ */
+ if (dma_ops == gx_pci_dma_map_ops ||
+ dma_ops == gx_hybrid_pci_dma_map_ops ||
+ dma_ops == gx_legacy_pci_dma_map_ops) {
+ if (mask == DMA_BIT_MASK(64) &&
+ dma_ops == gx_legacy_pci_dma_map_ops)
+ set_dma_ops(dev, gx_hybrid_pci_dma_map_ops);
+ else if (mask > dev->archdata.max_direct_dma_addr)
+ mask = dev->archdata.max_direct_dma_addr;
+ }
+
if (!dev->dma_mask || !dma_supported(dev, mask))
return -EIO;
@@ -91,4 +116,43 @@ dma_set_mask(struct device *dev, u64 mask)
return 0;
}
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+ void *cpu_addr;
+
+ cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs);
+
+ debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
+
+ return cpu_addr;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
+
+ dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
+}
+
+#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
+#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
+
+/*
+ * dma_alloc_noncoherent() is #defined to return coherent memory,
+ * so there's no need to do any flushing here.
+ */
+static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction direction)
+{
+}
+
#endif /* _ASM_TILE_DMA_MAPPING_H */
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index 623a6bb741c..41d9878a968 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -30,7 +30,6 @@ typedef unsigned long elf_greg_t;
#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t))
typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-#define EM_TILE64 187
#define EM_TILEPRO 188
#define EM_TILEGX 191
@@ -44,7 +43,11 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
#else
#define ELF_CLASS ELFCLASS32
#endif
+#ifdef __BIG_ENDIAN__
+#define ELF_DATA ELFDATA2MSB
+#else
#define ELF_DATA ELFDATA2LSB
+#endif
/*
* There seems to be a bug in how compat_binfmt_elf.c works: it
@@ -59,6 +62,7 @@ enum { ELF_ARCH = CHIP_ELF_TYPE() };
*/
#define elf_check_arch(x) \
((x)->e_ident[EI_CLASS] == ELF_CLASS && \
+ (x)->e_ident[EI_DATA] == ELF_DATA && \
(x)->e_machine == CHIP_ELF_TYPE())
/* The module loader only handles a few relocation types. */
@@ -127,6 +131,15 @@ extern int dump_task_regs(struct task_struct *, elf_gregset_t *);
struct linux_binprm;
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int executable_stack);
+#define ARCH_DLINFO \
+do { \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \
+} while (0)
+
+struct mm_struct;
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_PLATFORM "tilegx-m32"
@@ -143,6 +156,7 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
#define compat_start_thread(regs, ip, usp) do { \
regs->pc = ptr_to_compat_reg((void *)(ip)); \
regs->sp = ptr_to_compat_reg((void *)(usp)); \
+ single_step_execve(); \
} while (0)
/*
@@ -151,12 +165,12 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
#undef SET_PERSONALITY
#define SET_PERSONALITY(ex) \
do { \
- current->personality = PER_LINUX; \
+ set_personality(PER_LINUX | (current->personality & (~PER_MASK))); \
current_thread_info()->status &= ~TS_COMPAT; \
} while (0)
#define COMPAT_SET_PERSONALITY(ex) \
do { \
- current->personality = PER_LINUX_32BIT; \
+ set_personality(PER_LINUX | (current->personality & (~PER_MASK))); \
current_thread_info()->status |= TS_COMPAT; \
} while (0)
@@ -164,4 +178,6 @@ do { \
#endif /* CONFIG_COMPAT */
+#define CORE_DUMP_USE_REGSET
+
#endif /* _ASM_TILE_ELF_H */
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
index c66f7933bea..ffe2637aeb3 100644
--- a/arch/tile/include/asm/fixmap.h
+++ b/arch/tile/include/asm/fixmap.h
@@ -25,9 +25,6 @@
#include <asm/kmap_types.h>
#endif
-#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
/*
* Here we define all the compile-time 'special' virtual
* addresses. The point is to have a constant address at
@@ -45,15 +42,23 @@
*
* TLB entries of such buffers will not be flushed across
* task switches.
- *
- * We don't bother with a FIX_HOLE since above the fixmaps
- * is unmapped memory in any case.
*/
enum fixed_addresses {
+#ifdef __tilegx__
+ /*
+ * TILEPro has unmapped memory above so the hole isn't needed,
+ * and in any case the hole pushes us over a single 16MB pmd.
+ */
+ FIX_HOLE,
+#endif
#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#endif
+#ifdef __tilegx__ /* see homecache.c */
+ FIX_HOMECACHE_BEGIN,
+ FIX_HOMECACHE_END = FIX_HOMECACHE_BEGIN+(NR_CPUS)-1,
+#endif
__end_of_permanent_fixed_addresses,
/*
@@ -70,48 +75,12 @@ enum fixed_addresses {
#endif
};
-extern void __set_fixmap(enum fixed_addresses idx,
- unsigned long phys, pgprot_t flags);
-
-#define set_fixmap(idx, phys) \
- __set_fixmap(idx, phys, PAGE_KERNEL)
-#define clear_fixmap(idx) \
- __set_fixmap(idx, 0, __pgprot(0))
-
#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
#define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE)
#define FIXADDR_BOOT_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_BOOT_SIZE)
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
-{
- /*
- * this branch gets completely eliminated after inlining,
- * except when someone tries to use fixaddr indices in an
- * illegal way. (such as mixing up address types or using
- * out-of-range indices).
- *
- * If it doesn't get removed, the linker will complain
- * loudly with a reasonably clear error message..
- */
- if (idx >= __end_of_fixed_addresses)
- __this_fixmap_does_not_exist();
-
- return __fix_to_virt(idx);
-}
-
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
- BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
- return __virt_to_fix(vaddr);
-}
+#include <asm-generic/fixmap.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h
index 461459b06d9..13a9bb81a8a 100644
--- a/arch/tile/include/asm/ftrace.h
+++ b/arch/tile/include/asm/ftrace.h
@@ -15,6 +15,26 @@
#ifndef _ASM_TILE_FTRACE_H
#define _ASM_TILE_FTRACE_H
-/* empty */
+#ifdef CONFIG_FUNCTION_TRACER
+
+#define MCOUNT_ADDR ((unsigned long)(__mcount))
+#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void __mcount(void);
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_FUNCTION_TRACER */
#endif /* _ASM_TILE_FTRACE_H */
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index d03ec124a59..1a6ef1b69cb 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -28,29 +28,82 @@
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <linux/errno.h>
+#include <asm/atomic.h>
-extern struct __get_user futex_set(u32 __user *v, int i);
-extern struct __get_user futex_add(u32 __user *v, int n);
-extern struct __get_user futex_or(u32 __user *v, int n);
-extern struct __get_user futex_andn(u32 __user *v, int n);
-extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n);
+/*
+ * Support macros for futex operations. Do not use these macros directly.
+ * They assume "ret", "val", "oparg", and "uaddr" in the lexical context.
+ * __futex_cmpxchg() additionally assumes "oldval".
+ */
+
+#ifdef __tilegx__
+
+#define __futex_asm(OP) \
+ asm("1: {" #OP " %1, %3, %4; movei %0, 0 }\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "0: { movei %0, %5; j 9f }\n" \
+ ".section __ex_table,\"a\"\n" \
+ ".align 8\n" \
+ ".quad 1b, 0b\n" \
+ ".popsection\n" \
+ "9:" \
+ : "=r" (ret), "=r" (val), "+m" (*(uaddr)) \
+ : "r" (uaddr), "r" (oparg), "i" (-EFAULT))
+
+#define __futex_set() __futex_asm(exch4)
+#define __futex_add() __futex_asm(fetchadd4)
+#define __futex_or() __futex_asm(fetchor4)
+#define __futex_andn() ({ oparg = ~oparg; __futex_asm(fetchand4); })
+#define __futex_cmpxchg() \
+ ({ __insn_mtspr(SPR_CMPEXCH_VALUE, oldval); __futex_asm(cmpexch4); })
+
+#define __futex_xor() \
+ ({ \
+ u32 oldval, n = oparg; \
+ if ((ret = __get_user(oldval, uaddr)) == 0) { \
+ do { \
+ oparg = oldval ^ n; \
+ __futex_cmpxchg(); \
+ } while (ret == 0 && oldval != val); \
+ } \
+ })
+
+/* No need to prefetch, since the atomic ops go to the home cache anyway. */
+#define __futex_prolog()
-#ifndef __tilegx__
-extern struct __get_user futex_xor(u32 __user *v, int n);
#else
-static inline struct __get_user futex_xor(u32 __user *uaddr, int n)
-{
- struct __get_user asm_ret = __get_user_4(uaddr);
- if (!asm_ret.err) {
- int oldval, newval;
- do {
- oldval = asm_ret.val;
- newval = oldval ^ n;
- asm_ret = futex_cmpxchg(uaddr, oldval, newval);
- } while (asm_ret.err == 0 && oldval != asm_ret.val);
+
+#define __futex_call(FN) \
+ { \
+ struct __get_user gu = FN((u32 __force *)uaddr, lock, oparg); \
+ val = gu.val; \
+ ret = gu.err; \
}
- return asm_ret;
-}
+
+#define __futex_set() __futex_call(__atomic_xchg)
+#define __futex_add() __futex_call(__atomic_xchg_add)
+#define __futex_or() __futex_call(__atomic_or)
+#define __futex_andn() __futex_call(__atomic_andn)
+#define __futex_xor() __futex_call(__atomic_xor)
+
+#define __futex_cmpxchg() \
+ { \
+ struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \
+ lock, oldval, oparg); \
+ val = gu.val; \
+ ret = gu.err; \
+ }
+
+/*
+ * Find the lock pointer for the atomic calls to use, and issue a
+ * prefetch to the user address to bring it into cache. Similar to
+ * __atomic_setup(), but we can't do a read into the L1 since it might
+ * fault; instead we do a prefetch into the L2.
+ */
+#define __futex_prolog() \
+ int *lock; \
+ __insn_prefetch(uaddr); \
+ lock = __atomic_hashed_lock((int __force *)uaddr)
#endif
static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
@@ -59,8 +112,12 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
- int ret;
- struct __get_user asm_ret;
+ int uninitialized_var(val), ret;
+
+ __futex_prolog();
+
+ /* The 32-bit futex code makes this assumption, so validate it here. */
+ BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
@@ -71,46 +128,45 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
- asm_ret = futex_set(uaddr, oparg);
+ __futex_set();
break;
case FUTEX_OP_ADD:
- asm_ret = futex_add(uaddr, oparg);
+ __futex_add();
break;
case FUTEX_OP_OR:
- asm_ret = futex_or(uaddr, oparg);
+ __futex_or();
break;
case FUTEX_OP_ANDN:
- asm_ret = futex_andn(uaddr, oparg);
+ __futex_andn();
break;
case FUTEX_OP_XOR:
- asm_ret = futex_xor(uaddr, oparg);
+ __futex_xor();
break;
default:
- asm_ret.err = -ENOSYS;
+ ret = -ENOSYS;
+ break;
}
pagefault_enable();
- ret = asm_ret.err;
-
if (!ret) {
switch (cmp) {
case FUTEX_OP_CMP_EQ:
- ret = (asm_ret.val == cmparg);
+ ret = (val == cmparg);
break;
case FUTEX_OP_CMP_NE:
- ret = (asm_ret.val != cmparg);
+ ret = (val != cmparg);
break;
case FUTEX_OP_CMP_LT:
- ret = (asm_ret.val < cmparg);
+ ret = (val < cmparg);
break;
case FUTEX_OP_CMP_GE:
- ret = (asm_ret.val >= cmparg);
+ ret = (val >= cmparg);
break;
case FUTEX_OP_CMP_LE:
- ret = (asm_ret.val <= cmparg);
+ ret = (val <= cmparg);
break;
case FUTEX_OP_CMP_GT:
- ret = (asm_ret.val > cmparg);
+ ret = (val > cmparg);
break;
default:
ret = -ENOSYS;
@@ -120,22 +176,20 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
}
static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
- u32 oldval, u32 newval)
+ u32 oldval, u32 oparg)
{
- struct __get_user asm_ret;
+ int ret, val;
+
+ __futex_prolog();
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- asm_ret = futex_cmpxchg(uaddr, oldval, newval);
- *uval = asm_ret.val;
- return asm_ret.err;
-}
+ __futex_cmpxchg();
-#ifndef __tilegx__
-/* Return failure from the atomic wrappers. */
-struct __get_user __atomic_bad_address(int __user *addr);
-#endif
+ *uval = val;
+ return ret;
+}
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/hardirq.h b/arch/tile/include/asm/hardirq.h
index 822390f9a15..54110af2398 100644
--- a/arch/tile/include/asm/hardirq.h
+++ b/arch/tile/include/asm/hardirq.h
@@ -42,6 +42,4 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
-#define HARDIRQ_BITS 8
-
#endif /* _ASM_TILE_HARDIRQ_H */
diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h
index 2ac422848c7..2f572b6b7bc 100644
--- a/arch/tile/include/asm/hardwall.h
+++ b/arch/tile/include/asm/hardwall.h
@@ -11,45 +11,13 @@
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*
- * Provide methods for the HARDWALL_FILE for accessing the UDN.
+ * Provide methods for access control of per-cpu resources like
+ * UDN, IDN, or IPI.
*/
-
#ifndef _ASM_TILE_HARDWALL_H
#define _ASM_TILE_HARDWALL_H
-#include <linux/ioctl.h>
-
-#define HARDWALL_IOCTL_BASE 0xa2
-
-/*
- * The HARDWALL_CREATE() ioctl is a macro with a "size" argument.
- * The resulting ioctl value is passed to the kernel in conjunction
- * with a pointer to a little-endian bitmask of cpus, which must be
- * physically in a rectangular configuration on the chip.
- * The "size" is the number of bytes of cpu mask data.
- */
-#define _HARDWALL_CREATE 1
-#define HARDWALL_CREATE(size) \
- _IOC(_IOC_READ, HARDWALL_IOCTL_BASE, _HARDWALL_CREATE, (size))
-
-#define _HARDWALL_ACTIVATE 2
-#define HARDWALL_ACTIVATE \
- _IO(HARDWALL_IOCTL_BASE, _HARDWALL_ACTIVATE)
-
-#define _HARDWALL_DEACTIVATE 3
-#define HARDWALL_DEACTIVATE \
- _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE)
-
-#define _HARDWALL_GET_ID 4
-#define HARDWALL_GET_ID \
- _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID)
-
-#ifndef __KERNEL__
-
-/* This is the canonical name expected by userspace. */
-#define HARDWALL_FILE "/dev/hardwall"
-
-#else
+#include <uapi/asm/hardwall.h>
/* /proc hooks for hardwall. */
struct proc_dir_entry;
@@ -59,7 +27,4 @@ int proc_pid_hardwall(struct task_struct *task, char *buffer);
#else
static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {}
#endif
-
-#endif
-
#endif /* _ASM_TILE_HARDWALL_H */
diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
index b2a6c5de79a..fc8429a31c8 100644
--- a/arch/tile/include/asm/highmem.h
+++ b/arch/tile/include/asm/highmem.h
@@ -59,7 +59,7 @@ void *kmap_fix_kpte(struct page *page, int finished);
/* This macro is used only in map_new_virtual() to map "page". */
#define kmap_prot page_to_kpgprot(page)
-void *__kmap_atomic(struct page *page);
+void *kmap_atomic(struct page *page);
void __kunmap_atomic(void *kvaddr);
void *kmap_atomic_pfn(unsigned long pfn);
void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h
index a8243865d49..7ddd1b8d691 100644
--- a/arch/tile/include/asm/homecache.h
+++ b/arch/tile/include/asm/homecache.h
@@ -33,8 +33,7 @@ struct zone;
/*
* Is this page immutable (unwritable) and thus able to be cached more
- * widely than would otherwise be possible? On tile64 this means we
- * mark the PTE to cache locally; on tilepro it means we have "nc" set.
+ * widely than would otherwise be possible? This means we have "nc" set.
*/
#define PAGE_HOME_IMMUTABLE -2
@@ -44,16 +43,8 @@ struct zone;
*/
#define PAGE_HOME_INCOHERENT -3
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Home for the page is distributed via hash-for-home. */
#define PAGE_HOME_HASH -4
-#endif
-
-/* Homing is unknown or unspecified. Not valid for page_home(). */
-#define PAGE_HOME_UNKNOWN -5
-
-/* Home on the current cpu. Not valid for page_home(). */
-#define PAGE_HOME_HERE -6
/* Support wrapper to use instead of explicit hv_flush_remote(). */
extern void flush_remote(unsigned long cache_pfn, unsigned long cache_length,
@@ -79,10 +70,17 @@ extern void homecache_change_page_home(struct page *, int order, int home);
/*
* Flush a page out of whatever cache(s) it is in.
* This is more than just finv, since it properly handles waiting
- * for the data to reach memory on tilepro, but it can be quite
- * heavyweight, particularly on hash-for-home memory.
+ * for the data to reach memory, but it can be quite
+ * heavyweight, particularly on incoherent or immutable memory.
+ */
+extern void homecache_finv_page(struct page *);
+
+/*
+ * Flush a page out of the specified home cache.
+ * Note that the specified home need not be the actual home of the page,
+ * as for example might be the case when coordinating with I/O devices.
*/
-extern void homecache_flush_cache(struct page *, int order);
+extern void homecache_finv_map_page(struct page *, int home);
/*
* Allocate a page with the given GFP flags, home, and optionally
@@ -104,10 +102,10 @@ extern struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
* routines use homecache_change_page_home() to reset the home
* back to the default before returning the page to the allocator.
*/
+void __homecache_free_pages(struct page *, unsigned int order);
void homecache_free_pages(unsigned long addr, unsigned int order);
-#define homecache_free_page(page) \
- homecache_free_pages((page), 0)
-
+#define __homecache_free_page(page) __homecache_free_pages((page), 0)
+#define homecache_free_page(page) homecache_free_pages((page), 0)
/*
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index d396d180516..3257733003f 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -16,6 +16,7 @@
#define _ASM_TILE_HUGETLB_H
#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
static inline int is_hugepage_only_range(struct mm_struct *mm,
@@ -106,4 +107,29 @@ static inline void arch_release_hugepage(struct page *page)
{
}
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+ struct page *page, int writable)
+{
+ size_t pagesize = huge_page_size(hstate_vma(vma));
+ if (pagesize != PUD_SIZE && pagesize != PMD_SIZE)
+ entry = pte_mksuper(entry);
+ return entry;
+}
+#define arch_make_huge_pte arch_make_huge_pte
+
+/* Sizes to scale up page size for PTEs with HV_PTE_SUPER bit. */
+enum {
+ HUGE_SHIFT_PGDIR = 0,
+ HUGE_SHIFT_PMD = 1,
+ HUGE_SHIFT_PAGE = 2,
+ HUGE_SHIFT_ENTRIES
+};
+extern int huge_shift[HUGE_SHIFT_ENTRIES];
+#endif
+
#endif /* _ASM_TILE_HUGETLB_H */
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index d2152deb1f3..9fe434969fa 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -19,7 +19,8 @@
#include <linux/bug.h>
#include <asm/page.h>
-#define IO_SPACE_LIMIT 0xfffffffful
+/* Maximum PCI I/O space address supported. */
+#define IO_SPACE_LIMIT 0xffffffff
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
@@ -62,6 +63,92 @@ extern void iounmap(volatile void __iomem *addr);
#define mm_ptov(addr) ((void *)phys_to_virt(addr))
#define mm_vtop(addr) ((unsigned long)virt_to_phys(addr))
+#if CHIP_HAS_MMIO()
+
+/*
+ * We use inline assembly to guarantee that the compiler does not
+ * split an access into multiple byte-sized accesses as it might
+ * sometimes do if a register data structure is marked "packed".
+ * Obviously on tile we can't tolerate such an access being
+ * actually unaligned, but we want to avoid the case where the
+ * compiler conservatively would generate multiple accesses even
+ * for an aligned read or write.
+ */
+
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+ return *(const volatile u8 __force *)addr;
+}
+
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+ u16 ret;
+ asm volatile("ld2u %0, %1" : "=r" (ret) : "r" (addr));
+ barrier();
+ return le16_to_cpu(ret);
+}
+
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+ u32 ret;
+ /* Sign-extend to conform to u32 ABI sign-extension convention. */
+ asm volatile("ld4s %0, %1" : "=r" (ret) : "r" (addr));
+ barrier();
+ return le32_to_cpu(ret);
+}
+
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+ u64 ret;
+ asm volatile("ld %0, %1" : "=r" (ret) : "r" (addr));
+ barrier();
+ return le64_to_cpu(ret);
+}
+
+static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+{
+ *(volatile u8 __force *)addr = val;
+}
+
+static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+{
+ asm volatile("st2 %0, %1" :: "r" (addr), "r" (cpu_to_le16(val)));
+}
+
+static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+{
+ asm volatile("st4 %0, %1" :: "r" (addr), "r" (cpu_to_le32(val)));
+}
+
+static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+{
+ asm volatile("st %0, %1" :: "r" (addr), "r" (cpu_to_le64(val)));
+}
+
+/*
+ * The on-chip I/O hardware on tilegx is configured with VA=PA for the
+ * kernel's PA range. The low-level APIs and field names use "va" and
+ * "void *" nomenclature, to be consistent with the general notion
+ * that the addresses in question are virtualizable, but in the kernel
+ * context we are actually manipulating PA values. (In other contexts,
+ * e.g. access from user space, we do in fact use real virtual addresses
+ * in the va fields.) To allow readers of the code to understand what's
+ * happening, we direct their attention to this comment by using the
+ * following two functions that just duplicate __va() and __pa().
+ */
+typedef unsigned long tile_io_addr_t;
+static inline tile_io_addr_t va_to_tile_io_addr(void *va)
+{
+ BUILD_BUG_ON(sizeof(phys_addr_t) != sizeof(tile_io_addr_t));
+ return __pa(va);
+}
+static inline void *tile_io_addr_to_va(tile_io_addr_t tile_io_addr)
+{
+ return __va(tile_io_addr);
+}
+
+#else /* CHIP_HAS_MMIO() */
+
#ifdef CONFIG_PCI
extern u8 _tile_readb(unsigned long addr);
@@ -73,10 +160,19 @@ extern void _tile_writew(u16 val, unsigned long addr);
extern void _tile_writel(u32 val, unsigned long addr);
extern void _tile_writeq(u64 val, unsigned long addr);
-#else
+#define __raw_readb(addr) _tile_readb((unsigned long)addr)
+#define __raw_readw(addr) _tile_readw((unsigned long)addr)
+#define __raw_readl(addr) _tile_readl((unsigned long)addr)
+#define __raw_readq(addr) _tile_readq((unsigned long)addr)
+#define __raw_writeb(val, addr) _tile_writeb(val, (unsigned long)addr)
+#define __raw_writew(val, addr) _tile_writew(val, (unsigned long)addr)
+#define __raw_writel(val, addr) _tile_writel(val, (unsigned long)addr)
+#define __raw_writeq(val, addr) _tile_writeq(val, (unsigned long)addr)
+
+#else /* CONFIG_PCI */
/*
- * The Tile architecture does not support IOMEM unless PCI is enabled.
+ * The tilepro architecture does not support IOMEM unless PCI is enabled.
* Unfortunately we can't yet simply not declare these methods,
* since some generic code that compiles into the kernel, but
* we never run, uses them unconditionally.
@@ -88,65 +184,58 @@ static inline int iomem_panic(void)
return 0;
}
-static inline u8 _tile_readb(unsigned long addr)
+static inline u8 readb(unsigned long addr)
{
return iomem_panic();
}
-static inline u16 _tile_readw(unsigned long addr)
+static inline u16 _readw(unsigned long addr)
{
return iomem_panic();
}
-static inline u32 _tile_readl(unsigned long addr)
+static inline u32 readl(unsigned long addr)
{
return iomem_panic();
}
-static inline u64 _tile_readq(unsigned long addr)
+static inline u64 readq(unsigned long addr)
{
return iomem_panic();
}
-static inline void _tile_writeb(u8 val, unsigned long addr)
+static inline void writeb(u8 val, unsigned long addr)
{
iomem_panic();
}
-static inline void _tile_writew(u16 val, unsigned long addr)
+static inline void writew(u16 val, unsigned long addr)
{
iomem_panic();
}
-static inline void _tile_writel(u32 val, unsigned long addr)
+static inline void writel(u32 val, unsigned long addr)
{
iomem_panic();
}
-static inline void _tile_writeq(u64 val, unsigned long addr)
+static inline void writeq(u64 val, unsigned long addr)
{
iomem_panic();
}
-#endif
+#endif /* CONFIG_PCI */
-#define readb(addr) _tile_readb((unsigned long)addr)
-#define readw(addr) _tile_readw((unsigned long)addr)
-#define readl(addr) _tile_readl((unsigned long)addr)
-#define readq(addr) _tile_readq((unsigned long)addr)
-#define writeb(val, addr) _tile_writeb(val, (unsigned long)addr)
-#define writew(val, addr) _tile_writew(val, (unsigned long)addr)
-#define writel(val, addr) _tile_writel(val, (unsigned long)addr)
-#define writeq(val, addr) _tile_writeq(val, (unsigned long)addr)
-
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-#define __raw_readq readq
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
-#define __raw_writeq writeq
+#endif /* CHIP_HAS_MMIO() */
+
+#define readb __raw_readb
+#define readw __raw_readw
+#define readl __raw_readl
+#define readq __raw_readq
+#define writeb __raw_writeb
+#define writew __raw_writew
+#define writel __raw_writel
+#define writeq __raw_writeq
#define readb_relaxed readb
#define readw_relaxed readw
@@ -162,9 +251,11 @@ static inline void _tile_writeq(u64 val, unsigned long addr)
#define iowrite32 writel
#define iowrite64 writeq
-static inline void memset_io(void *dst, int val, size_t len)
+#if CHIP_HAS_MMIO() || defined(CONFIG_PCI)
+
+static inline void memset_io(volatile void *dst, int val, size_t len)
{
- int x;
+ size_t x;
BUG_ON((unsigned long)dst & 0x3);
val = (val & 0xff) * 0x01010101;
for (x = 0; x < len; x += 4)
@@ -174,7 +265,7 @@ static inline void memset_io(void *dst, int val, size_t len)
static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
size_t len)
{
- int x;
+ size_t x;
BUG_ON((unsigned long)src & 0x3);
for (x = 0; x < len; x += 4)
*(u32 *)(dst + x) = readl(src + x);
@@ -183,14 +274,116 @@ static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
size_t len)
{
- int x;
+ size_t x;
BUG_ON((unsigned long)dst & 0x3);
for (x = 0; x < len; x += 4)
writel(*(u32 *)(src + x), dst + x);
}
+#endif
+
+#if CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO)
+
+static inline u8 inb(unsigned long addr)
+{
+ return readb((volatile void __iomem *) addr);
+}
+
+static inline u16 inw(unsigned long addr)
+{
+ return readw((volatile void __iomem *) addr);
+}
+
+static inline u32 inl(unsigned long addr)
+{
+ return readl((volatile void __iomem *) addr);
+}
+
+static inline void outb(u8 b, unsigned long addr)
+{
+ writeb(b, (volatile void __iomem *) addr);
+}
+
+static inline void outw(u16 b, unsigned long addr)
+{
+ writew(b, (volatile void __iomem *) addr);
+}
+
+static inline void outl(u32 b, unsigned long addr)
+{
+ writel(b, (volatile void __iomem *) addr);
+}
+
+static inline void insb(unsigned long addr, void *buffer, int count)
+{
+ if (count) {
+ u8 *buf = buffer;
+ do {
+ u8 x = inb(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+
+static inline void insw(unsigned long addr, void *buffer, int count)
+{
+ if (count) {
+ u16 *buf = buffer;
+ do {
+ u16 x = inw(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+
+static inline void insl(unsigned long addr, void *buffer, int count)
+{
+ if (count) {
+ u32 *buf = buffer;
+ do {
+ u32 x = inl(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+
+static inline void outsb(unsigned long addr, const void *buffer, int count)
+{
+ if (count) {
+ const u8 *buf = buffer;
+ do {
+ outb(*buf++, addr);
+ } while (--count);
+ }
+}
+
+static inline void outsw(unsigned long addr, const void *buffer, int count)
+{
+ if (count) {
+ const u16 *buf = buffer;
+ do {
+ outw(*buf++, addr);
+ } while (--count);
+ }
+}
+
+static inline void outsl(unsigned long addr, const void *buffer, int count)
+{
+ if (count) {
+ const u32 *buf = buffer;
+ do {
+ outl(*buf++, addr);
+ } while (--count);
+ }
+}
+
+extern void __iomem *ioport_map(unsigned long port, unsigned int len);
+extern void ioport_unmap(void __iomem *addr);
+
+#else
+
/*
- * The Tile architecture does not support IOPORT, even with PCI.
+ * The TilePro architecture does not support IOPORT, even with PCI.
* Unfortunately we can't yet simply not declare these methods,
* since some generic code that compiles into the kernel, but
* we never run, uses them unconditionally.
@@ -198,7 +391,12 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
static inline long ioport_panic(void)
{
+#ifdef __tilegx__
+ panic("PCI IO space support is disabled. Configure the kernel with"
+ " CONFIG_TILE_PCI_IO to enable it");
+#else
panic("inb/outb and friends do not exist on tile");
+#endif
return 0;
}
@@ -243,13 +441,6 @@ static inline void outl(u32 b, unsigned long addr)
ioport_panic();
}
-#define inb_p(addr) inb(addr)
-#define inw_p(addr) inw(addr)
-#define inl_p(addr) inl(addr)
-#define outb_p(x, addr) outb((x), (addr))
-#define outw_p(x, addr) outw((x), (addr))
-#define outl_p(x, addr) outl((x), (addr))
-
static inline void insb(unsigned long addr, void *buffer, int count)
{
ioport_panic();
@@ -280,6 +471,15 @@ static inline void outsl(unsigned long addr, const void *buffer, int count)
ioport_panic();
}
+#endif /* CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO) */
+
+#define inb_p(addr) inb(addr)
+#define inw_p(addr) inw(addr)
+#define inl_p(addr) inl(addr)
+#define outb_p(x, addr) outb((x), (addr))
+#define outw_p(x, addr) outw((x), (addr))
+#define outl_p(x, addr) outl((x), (addr))
+
#define ioread16be(addr) be16_to_cpu(ioread16(addr))
#define ioread32be(addr) be32_to_cpu(ioread32(addr))
#define iowrite16be(v, addr) iowrite16(be16_to_cpu(v), (addr))
diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h
index f80f8ceabc6..1fe86911838 100644
--- a/arch/tile/include/asm/irq.h
+++ b/arch/tile/include/asm/irq.h
@@ -18,10 +18,12 @@
#include <linux/hardirq.h>
/* The hypervisor interface provides 32 IRQs. */
-#define NR_IRQS 32
+#define NR_IRQS 32
/* IRQ numbers used for linux IPIs. */
-#define IRQ_RESCHEDULE 1
+#define IRQ_RESCHEDULE 0
+/* Interrupts for dynamic allocation start at 1. Let the core allocate irq0 */
+#define NR_IRQS_LEGACY 1
#define irq_canonicalize(irq) (irq)
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
index 5db0ce54284..71af5747874 100644
--- a/arch/tile/include/asm/irqflags.h
+++ b/arch/tile/include/asm/irqflags.h
@@ -18,32 +18,20 @@
#include <arch/interrupts.h>
#include <arch/chip.h>
-#if !defined(__tilegx__) && defined(__ASSEMBLY__)
-
/*
* The set of interrupts we want to allow when interrupts are nominally
* disabled. The remainder are effectively "NMI" interrupts from
* the point of view of the generic Linux code. Note that synchronous
* interrupts (aka "non-queued") are not blocked by the mask in any case.
*/
-#if CHIP_HAS_AUX_PERF_COUNTERS()
-#define LINUX_MASKABLE_INTERRUPTS_HI \
- (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT)))
-#else
-#define LINUX_MASKABLE_INTERRUPTS_HI \
- (~(INT_MASK_HI(INT_PERF_COUNT)))
-#endif
-
-#else
-
-#if CHIP_HAS_AUX_PERF_COUNTERS()
-#define LINUX_MASKABLE_INTERRUPTS \
- (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT)))
-#else
#define LINUX_MASKABLE_INTERRUPTS \
- (~(INT_MASK(INT_PERF_COUNT)))
-#endif
+ (~((_AC(1,ULL) << INT_PERF_COUNT) | (_AC(1,ULL) << INT_AUX_PERF_COUNT)))
+#if CHIP_HAS_SPLIT_INTR_MASK()
+/* The same macro, but for the two 32-bit SPRs separately. */
+#define LINUX_MASKABLE_INTERRUPTS_LO (-1)
+#define LINUX_MASKABLE_INTERRUPTS_HI \
+ (~((1 << (INT_PERF_COUNT - 32)) | (1 << (INT_AUX_PERF_COUNT - 32))))
#endif
#ifndef __ASSEMBLY__
@@ -52,7 +40,15 @@
#include <asm/percpu.h>
#include <arch/spr_def.h>
-/* Set and clear kernel interrupt masks. */
+/*
+ * Set and clear kernel interrupt masks.
+ *
+ * NOTE: __insn_mtspr() is a compiler builtin marked as a memory
+ * clobber. We rely on it being equivalent to a compiler barrier in
+ * this code since arch_local_irq_save() and friends must act as
+ * compiler barriers. This compiler semantic is baked into enough
+ * places that the compiler will maintain it going forward.
+ */
#if CHIP_HAS_SPLIT_INTR_MASK()
#if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32
# error Fix assumptions about which word various interrupts are in
@@ -90,6 +86,14 @@
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, (unsigned long)(__m)); \
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, (unsigned long)(__m>>32)); \
} while (0)
+#define interrupt_mask_save_mask() \
+ (__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_0) | \
+ (((unsigned long long)__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_1))<<32))
+#define interrupt_mask_restore_mask(mask) do { \
+ unsigned long long __m = (mask); \
+ __insn_mtspr(SPR_INTERRUPT_MASK_K_0, (unsigned long)(__m)); \
+ __insn_mtspr(SPR_INTERRUPT_MASK_K_1, (unsigned long)(__m>>32)); \
+} while (0)
#else
#define interrupt_mask_set(n) \
__insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (1UL << (n)))
@@ -101,6 +105,10 @@
__insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (mask))
#define interrupt_mask_reset_mask(mask) \
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (mask))
+#define interrupt_mask_save_mask() \
+ __insn_mfspr(SPR_INTERRUPT_MASK_K)
+#define interrupt_mask_restore_mask(mask) \
+ __insn_mtspr(SPR_INTERRUPT_MASK_K, (mask))
#endif
/*
@@ -114,7 +122,13 @@
* to know our current state.
*/
DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
-#define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR)
+#define INITIAL_INTERRUPTS_ENABLED (1ULL << INT_MEM_ERROR)
+
+#ifdef CONFIG_DEBUG_PREEMPT
+/* Due to inclusion issues, we can't rely on <linux/smp.h> here. */
+extern unsigned int debug_smp_processor_id(void);
+# define smp_processor_id() debug_smp_processor_id()
+#endif
/* Disable interrupts. */
#define arch_local_irq_disable() \
@@ -122,11 +136,20 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
/* Disable all interrupts, including NMIs. */
#define arch_local_irq_disable_all() \
- interrupt_mask_set_mask(-1UL)
+ interrupt_mask_set_mask(-1ULL)
+
+/*
+ * Read the set of maskable interrupts.
+ * We avoid the preemption warning here via __this_cpu_ptr since even
+ * if irqs are already enabled, it's harmless to read the wrong cpu's
+ * enabled mask.
+ */
+#define arch_local_irqs_enabled() \
+ (*__this_cpu_ptr(&interrupts_enabled_mask))
/* Re-enable all maskable interrupts. */
#define arch_local_irq_enable() \
- interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask))
+ interrupt_mask_reset_mask(arch_local_irqs_enabled())
/* Disable or enable interrupts based on flag argument. */
#define arch_local_irq_restore(disabled) do { \
@@ -153,7 +176,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
/* Prevent the given interrupt from being enabled next time we enable irqs. */
#define arch_local_irq_mask(interrupt) \
- (__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt))
+ this_cpu_and(interrupts_enabled_mask, ~(1ULL << (interrupt)))
/* Prevent the given interrupt from being enabled immediately. */
#define arch_local_irq_mask_now(interrupt) do { \
@@ -163,7 +186,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
/* Allow the given interrupt to be enabled next time we enable irqs. */
#define arch_local_irq_unmask(interrupt) \
- (__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt))
+ this_cpu_or(interrupts_enabled_mask, (1ULL << (interrupt)))
/* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */
#define arch_local_irq_unmask_now(interrupt) do { \
@@ -179,7 +202,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
#ifdef __tilegx__
#if INT_MEM_ERROR != 0
-# error Fix IRQ_DISABLED() macro
+# error Fix IRQS_DISABLED() macro
#endif
/* Return 0 or 1 to indicate whether interrupts are currently disabled. */
@@ -207,9 +230,10 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
mtspr SPR_INTERRUPT_MASK_SET_K, tmp
/* Enable interrupts. */
-#define IRQ_ENABLE(tmp0, tmp1) \
+#define IRQ_ENABLE_LOAD(tmp0, tmp1) \
GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \
- ld tmp0, tmp0; \
+ ld tmp0, tmp0
+#define IRQ_ENABLE_APPLY(tmp0, tmp1) \
mtspr SPR_INTERRUPT_MASK_RESET_K, tmp0
#else /* !__tilegx__ */
@@ -237,7 +261,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
/* Disable interrupts. */
#define IRQ_DISABLE(tmp0, tmp1) \
{ \
- movei tmp0, -1; \
+ movei tmp0, LINUX_MASKABLE_INTERRUPTS_LO; \
moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \
}; \
{ \
@@ -253,17 +277,22 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp
/* Enable interrupts. */
-#define IRQ_ENABLE(tmp0, tmp1) \
+#define IRQ_ENABLE_LOAD(tmp0, tmp1) \
GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \
{ \
lw tmp0, tmp0; \
addi tmp1, tmp0, 4 \
}; \
- lw tmp1, tmp1; \
+ lw tmp1, tmp1
+#define IRQ_ENABLE_APPLY(tmp0, tmp1) \
mtspr SPR_INTERRUPT_MASK_RESET_K_0, tmp0; \
mtspr SPR_INTERRUPT_MASK_RESET_K_1, tmp1
#endif
+#define IRQ_ENABLE(tmp0, tmp1) \
+ IRQ_ENABLE_LOAD(tmp0, tmp1); \
+ IRQ_ENABLE_APPLY(tmp0, tmp1)
+
/*
* Do the CPU's IRQ-state tracing from assembly code. We call a
* C function, but almost everywhere we do, we don't mind clobbering
diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h
new file mode 100644
index 00000000000..5bbbfa904c2
--- /dev/null
+++ b/arch/tile/include/asm/kdebug.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_KDEBUG_H
+#define _ASM_TILE_KDEBUG_H
+
+#include <linux/notifier.h>
+
+enum die_val {
+ DIE_OOPS = 1,
+ DIE_BREAK,
+ DIE_SSTEPBP,
+ DIE_PAGE_FAULT,
+ DIE_COMPILED_BPT
+};
+
+#endif /* _ASM_TILE_KDEBUG_H */
diff --git a/arch/tile/include/asm/kexec.h b/arch/tile/include/asm/kexec.h
index c11a6cc73bb..fc98ccfc98a 100644
--- a/arch/tile/include/asm/kexec.h
+++ b/arch/tile/include/asm/kexec.h
@@ -19,12 +19,24 @@
#include <asm/page.h>
+#ifndef __tilegx__
/* Maximum physical address we can use pages from. */
#define KEXEC_SOURCE_MEMORY_LIMIT TASK_SIZE
/* Maximum address we can reach in physical address mode. */
#define KEXEC_DESTINATION_MEMORY_LIMIT TASK_SIZE
/* Maximum address we can use for the control code buffer. */
#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+#else
+/* We need to limit the memory below PGDIR_SIZE since
+ * we only setup page table for [0, PGDIR_SIZE) before final kexec.
+ */
+/* Maximum physical address we can use pages from. */
+#define KEXEC_SOURCE_MEMORY_LIMIT PGDIR_SIZE
+/* Maximum address we can reach in physical address mode. */
+#define KEXEC_DESTINATION_MEMORY_LIMIT PGDIR_SIZE
+/* Maximum address we can use for the control code buffer. */
+#define KEXEC_CONTROL_MEMORY_LIMIT PGDIR_SIZE
+#endif
#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
diff --git a/arch/tile/include/asm/kgdb.h b/arch/tile/include/asm/kgdb.h
new file mode 100644
index 00000000000..280c181cf0d
--- /dev/null
+++ b/arch/tile/include/asm/kgdb.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE-Gx KGDB support.
+ */
+
+#ifndef __TILE_KGDB_H__
+#define __TILE_KGDB_H__
+
+#include <linux/kdebug.h>
+#include <arch/opcode.h>
+
+#define GDB_SIZEOF_REG sizeof(unsigned long)
+
+/*
+ * TILE-Gx gdb is expecting the following register layout:
+ * 56 GPRs(R0 - R52, TP, SP, LR), 8 special GPRs(networks and ZERO),
+ * plus the PC and the faultnum.
+ *
+ * Even though kernel not use the 8 special GPRs, they need to be present
+ * in the registers sent for correct processing in the host-side gdb.
+ *
+ */
+#define DBG_MAX_REG_NUM (56+8+2)
+#define NUMREGBYTES (DBG_MAX_REG_NUM * GDB_SIZEOF_REG)
+
+/*
+ * BUFMAX defines the maximum number of characters in inbound/outbound
+ * buffers at least NUMREGBYTES*2 are needed for register packets,
+ * Longer buffer is needed to list all threads.
+ */
+#define BUFMAX 2048
+
+#define BREAK_INSTR_SIZE TILEGX_BUNDLE_SIZE_IN_BYTES
+
+/*
+ * Require cache flush for set/clear a software breakpoint or write memory.
+ */
+#define CACHE_FLUSH_IS_SAFE 1
+
+/*
+ * The compiled-in breakpoint instruction can be used to "break" into
+ * the debugger via magic system request key (sysrq-G).
+ */
+static tile_bundle_bits compiled_bpt = TILEGX_BPT_BUNDLE | DIE_COMPILED_BPT;
+
+enum tilegx_regnum {
+ TILEGX_PC_REGNUM = TREG_LAST_GPR + 9,
+ TILEGX_FAULTNUM_REGNUM,
+};
+
+/*
+ * Generate a breakpoint exception to "break" into the debugger.
+ */
+static inline void arch_kgdb_breakpoint(void)
+{
+ asm volatile (".quad %0\n\t"
+ ::""(compiled_bpt));
+}
+
+#endif /* __TILE_KGDB_H__ */
diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h
index 3d0f2024626..92b28e3e997 100644
--- a/arch/tile/include/asm/kmap_types.h
+++ b/arch/tile/include/asm/kmap_types.h
@@ -23,35 +23,6 @@
* adds 4MB of required address-space. For now we leave KM_TYPE_NR
* set to depth 8.
*/
-enum km_type {
- KM_TYPE_NR = 8
-};
-
-/*
- * We provide dummy definitions of all the stray values that used to be
- * required for kmap_atomic() and no longer are.
- */
-enum {
- KM_BOUNCE_READ,
- KM_SKB_SUNRPC_DATA,
- KM_SKB_DATA_SOFTIRQ,
- KM_USER0,
- KM_USER1,
- KM_BIO_SRC_IRQ,
- KM_BIO_DST_IRQ,
- KM_PTE0,
- KM_PTE1,
- KM_IRQ0,
- KM_IRQ1,
- KM_SOFTIRQ0,
- KM_SOFTIRQ1,
- KM_SYNC_ICACHE,
- KM_SYNC_DCACHE,
- KM_UML_USERCOPY,
- KM_IRQ_PTE,
- KM_NMI,
- KM_NMI_PTE,
- KM_KDB
-};
+#define KM_TYPE_NR 8
#endif /* _ASM_TILE_KMAP_TYPES_H */
diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h
new file mode 100644
index 00000000000..d8f9a83943b
--- /dev/null
+++ b/arch/tile/include/asm/kprobes.h
@@ -0,0 +1,79 @@
+/*
+ * arch/tile/include/asm/kprobes.h
+ *
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_KPROBES_H
+#define _ASM_TILE_KPROBES_H
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#include <arch/opcode.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE 2
+
+#define kretprobe_blacklist_size 0
+
+typedef tile_bundle_bits kprobe_opcode_t;
+
+#define flush_insn_slot(p) \
+ flush_icache_range((unsigned long)p->addr, \
+ (unsigned long)p->addr + \
+ (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
+
+struct kprobe;
+
+/* Architecture specific copy of original instruction. */
+struct arch_specific_insn {
+ kprobe_opcode_t *insn;
+};
+
+struct prev_kprobe {
+ struct kprobe *kp;
+ unsigned long status;
+ unsigned long saved_pc;
+};
+
+#define MAX_JPROBES_STACK_SIZE 128
+#define MAX_JPROBES_STACK_ADDR \
+ (((unsigned long)current_thread_info()) + THREAD_SIZE - 32 \
+ - sizeof(struct pt_regs))
+
+#define MIN_JPROBES_STACK_SIZE(ADDR) \
+ ((((ADDR) + MAX_JPROBES_STACK_SIZE) > MAX_JPROBES_STACK_ADDR) \
+ ? MAX_JPROBES_STACK_ADDR - (ADDR) \
+ : MAX_JPROBES_STACK_SIZE)
+
+/* per-cpu kprobe control block. */
+struct kprobe_ctlblk {
+ unsigned long kprobe_status;
+ unsigned long kprobe_saved_pc;
+ unsigned long jprobe_saved_sp;
+ struct prev_kprobe prev_kprobe;
+ struct pt_regs jprobe_saved_regs;
+ char jprobes_stack[MAX_JPROBES_STACK_SIZE];
+};
+
+extern tile_bundle_bits breakpoint2_insn;
+extern tile_bundle_bits breakpoint_insn;
+
+void arch_remove_kprobe(struct kprobe *);
+
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data);
+
+#endif /* _ASM_TILE_KPROBES_H */
diff --git a/arch/tile/include/asm/memprof.h b/arch/tile/include/asm/memprof.h
deleted file mode 100644
index 359949be28c..00000000000
--- a/arch/tile/include/asm/memprof.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- *
- * The hypervisor's memory controller profiling infrastructure allows
- * the programmer to find out what fraction of the available memory
- * bandwidth is being consumed at each memory controller. The
- * profiler provides start, stop, and clear operations to allows
- * profiling over a specific time window, as well as an interface for
- * reading the most recent profile values.
- *
- * This header declares IOCTL codes necessary to control memprof.
- */
-#ifndef _ASM_TILE_MEMPROF_H
-#define _ASM_TILE_MEMPROF_H
-
-#include <linux/ioctl.h>
-
-#define MEMPROF_IOCTL_TYPE 0xB4
-#define MEMPROF_IOCTL_START _IO(MEMPROF_IOCTL_TYPE, 0)
-#define MEMPROF_IOCTL_STOP _IO(MEMPROF_IOCTL_TYPE, 1)
-#define MEMPROF_IOCTL_CLEAR _IO(MEMPROF_IOCTL_TYPE, 2)
-
-#endif /* _ASM_TILE_MEMPROF_H */
diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h
index 92f94c77b6e..0cab1182bde 100644
--- a/arch/tile/include/asm/mmu.h
+++ b/arch/tile/include/asm/mmu.h
@@ -21,7 +21,8 @@ struct mm_context {
* Written under the mmap_sem semaphore; read without the
* semaphore but atomically, but it is conservatively set.
*/
- unsigned int priority_cached;
+ unsigned long priority_cached;
+ unsigned long vdso_base;
};
typedef struct mm_context mm_context_t;
diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h
index 15fb2464112..4734215e2ad 100644
--- a/arch/tile/include/asm/mmu_context.h
+++ b/arch/tile/include/asm/mmu_context.h
@@ -30,18 +30,22 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
return 0;
}
-/* Note that arch/tile/kernel/head.S also calls hv_install_context() */
+/*
+ * Note that arch/tile/kernel/head_NN.S and arch/tile/mm/migrate_NN.S
+ * also call hv_install_context().
+ */
static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot)
{
/* FIXME: DIRECTIO should not always be set. FIXME. */
- int rc = hv_install_context(__pa(pgdir), prot, asid, HV_CTX_DIRECTIO);
+ int rc = hv_install_context(__pa(pgdir), prot, asid,
+ HV_CTX_DIRECTIO | CTX_PAGE_FLAG);
if (rc < 0)
panic("hv_install_context failed: %d", rc);
}
static inline void install_page_table(pgd_t *pgdir, int asid)
{
- pte_t *ptep = virt_to_pte(NULL, (unsigned long)pgdir);
+ pte_t *ptep = virt_to_kpte((unsigned long)pgdir);
__install_page_table(pgdir, asid, *ptep);
}
diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h
index 9d3dbce8f95..804f1098b6c 100644
--- a/arch/tile/include/asm/mmzone.h
+++ b/arch/tile/include/asm/mmzone.h
@@ -42,7 +42,7 @@ static inline int pfn_to_nid(unsigned long pfn)
#define kern_addr_valid(kaddr) virt_addr_valid((void *)kaddr)
-static inline int pfn_valid(int pfn)
+static inline int pfn_valid(unsigned long pfn)
{
int nid = pfn_to_nid(pfn);
diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h
new file mode 100644
index 00000000000..44ed07ccd3d
--- /dev/null
+++ b/arch/tile/include/asm/module.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_MODULE_H
+#define _ASM_TILE_MODULE_H
+
+#include <arch/chip.h>
+
+#include <asm-generic/module.h>
+
+/* We can't use modules built with different page sizes. */
+#if defined(CONFIG_PAGE_SIZE_16KB)
+# define MODULE_PGSZ " 16KB"
+#elif defined(CONFIG_PAGE_SIZE_64KB)
+# define MODULE_PGSZ " 64KB"
+#else
+# define MODULE_PGSZ ""
+#endif
+
+/* We don't really support no-SMP so tag if someone tries. */
+#ifdef CONFIG_SMP
+#define MODULE_NOSMP ""
+#else
+#define MODULE_NOSMP " nosmp"
+#endif
+
+#define MODULE_ARCH_VERMAGIC CHIP_ARCH_NAME MODULE_PGSZ MODULE_NOSMP
+
+#endif /* _ASM_TILE_MODULE_H */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index db93518fac0..67276800861 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -20,8 +20,17 @@
#include <arch/chip.h>
/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
-#define PAGE_SHIFT HV_LOG2_PAGE_SIZE_SMALL
-#define HPAGE_SHIFT HV_LOG2_PAGE_SIZE_LARGE
+#if defined(CONFIG_PAGE_SIZE_16KB)
+#define PAGE_SHIFT 14
+#define CTX_PAGE_FLAG HV_CTX_PG_SM_16K
+#elif defined(CONFIG_PAGE_SIZE_64KB)
+#define PAGE_SHIFT 16
+#define CTX_PAGE_FLAG HV_CTX_PG_SM_64K
+#else
+#define PAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_SMALL
+#define CTX_PAGE_FLAG 0
+#endif
+#define HPAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_LARGE
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
@@ -30,6 +39,12 @@
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
/*
+ * We do define AT_SYSINFO_EHDR to support vDSO,
+ * but don't use the gate mechanism.
+ */
+#define __HAVE_ARCH_GATE_AREA 1
+
+/*
* If the Kconfig doesn't specify, set a maximum zone order that
* is enough so that we can create huge pages from small pages given
* the respective sizes of the two page types. See <linux/mmzone.h>.
@@ -78,8 +93,7 @@ typedef HV_PTE pgprot_t;
/*
* User L2 page tables are managed as one L2 page table per page,
* because we use the page allocator for them. This keeps the allocation
- * simple and makes it potentially useful to implement HIGHPTE at some point.
- * However, it's also inefficient, since L2 page tables are much smaller
+ * simple, but it's also inefficient, since L2 page tables are much smaller
* than pages (currently 2KB vs 64KB). So we should revisit this.
*/
typedef struct page *pgtable_t;
@@ -128,14 +142,18 @@ static inline __attribute_const__ int get_order(unsigned long size)
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
-#define HUGE_MAX_HSTATE 2
+#define HUGE_MAX_HSTATE 6
#ifdef CONFIG_HUGETLB_PAGE
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif
+/* Allow overriding how much VA or PA the kernel will use. */
+#define MAX_PA_WIDTH CHIP_PA_WIDTH()
+#define MAX_VA_WIDTH CHIP_VA_WIDTH()
+
/* Each memory controller has PAs distinct in their high bits. */
-#define NR_PA_HIGHBIT_SHIFT (CHIP_PA_WIDTH() - CHIP_LOG_NUM_MSHIMS())
+#define NR_PA_HIGHBIT_SHIFT (MAX_PA_WIDTH - CHIP_LOG_NUM_MSHIMS())
#define NR_PA_HIGHBIT_VALUES (1 << CHIP_LOG_NUM_MSHIMS())
#define __pa_to_highbits(pa) ((phys_addr_t)(pa) >> NR_PA_HIGHBIT_SHIFT)
#define __pfn_to_highbits(pfn) ((pfn) >> (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT))
@@ -146,7 +164,7 @@ static inline __attribute_const__ int get_order(unsigned long size)
* We reserve the lower half of memory for user-space programs, and the
* upper half for system code. We re-map all of physical memory in the
* upper half, which takes a quarter of our VA space. Then we have
- * the vmalloc regions. The supervisor code lives at 0xfffffff700000000,
+ * the vmalloc regions. The supervisor code lives at the highest address,
* with the hypervisor above that.
*
* Loadable kernel modules are placed immediately after the static
@@ -158,27 +176,18 @@ static inline __attribute_const__ int get_order(unsigned long size)
* Similarly, for now we don't play any struct page mapping games.
*/
-#if CHIP_PA_WIDTH() + 2 > CHIP_VA_WIDTH()
+#if MAX_PA_WIDTH + 2 > MAX_VA_WIDTH
# error Too much PA to map with the VA available!
#endif
-#define HALF_VA_SPACE (_AC(1, UL) << (CHIP_VA_WIDTH() - 1))
-
-#define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */
-#define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */
-#define PAGE_OFFSET MEM_HIGH_START
-#define _VMALLOC_START _AC(0xfffffff500000000, UL) /* 4 GB */
-#define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */
-#define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */
-#define MEM_SV_INTRPT MEM_SV_START
-#define MEM_MODULE_START _AC(0xfffffff710000000, UL) /* 256 MB */
-#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024))
-#define MEM_HV_START _AC(0xfffffff800000000, UL) /* 32 GB */
-
-/* Highest DTLB address we will use */
-#define KERNEL_HIGH_VADDR MEM_SV_START
-/* Since we don't currently provide any fixmaps, we use an impossible VA. */
-#define FIXADDR_TOP MEM_HV_START
+#define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1)))
+#define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */
+#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */
+#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */
+#define _VMALLOC_START FIXADDR_TOP
+#define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */
+#define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */
+#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024))
#else /* !__tilegx__ */
@@ -200,25 +209,18 @@ static inline __attribute_const__ int get_order(unsigned long size)
* values, and after that, we show "typical" values, since the actual
* addresses depend on kernel #defines.
*
- * MEM_HV_INTRPT 0xfe000000
- * MEM_SV_INTRPT (kernel code) 0xfd000000
+ * MEM_HV_START 0xfe000000
+ * MEM_SV_START (kernel code) 0xfd000000
* MEM_USER_INTRPT (user vector) 0xfc000000
- * FIX_KMAP_xxx 0xf8000000 (via NR_CPUS * KM_TYPE_NR)
- * PKMAP_BASE 0xf7000000 (via LAST_PKMAP)
- * HUGE_VMAP 0xf3000000 (via CONFIG_NR_HUGE_VMAPS)
- * VMALLOC_START 0xf0000000 (via __VMALLOC_RESERVE)
+ * FIX_KMAP_xxx 0xfa000000 (via NR_CPUS * KM_TYPE_NR)
+ * PKMAP_BASE 0xf9000000 (via LAST_PKMAP)
+ * VMALLOC_START 0xf7000000 (via VMALLOC_RESERVE)
* mapped LOWMEM 0xc0000000
*/
#define MEM_USER_INTRPT _AC(0xfc000000, UL)
-#if CONFIG_KERNEL_PL == 1
-#define MEM_SV_INTRPT _AC(0xfd000000, UL)
-#define MEM_HV_INTRPT _AC(0xfe000000, UL)
-#else
-#define MEM_GUEST_INTRPT _AC(0xfd000000, UL)
-#define MEM_SV_INTRPT _AC(0xfe000000, UL)
-#define MEM_HV_INTRPT _AC(0xff000000, UL)
-#endif
+#define MEM_SV_START _AC(0xfd000000, UL)
+#define MEM_HV_START _AC(0xfe000000, UL)
#define INTRPT_SIZE 0x4000
@@ -239,7 +241,7 @@ static inline __attribute_const__ int get_order(unsigned long size)
#endif /* __tilegx__ */
-#ifndef __ASSEMBLY__
+#if !defined(__ASSEMBLY__) && !defined(VDSO_BUILD)
#ifdef CONFIG_HIGHMEM
@@ -325,6 +327,7 @@ static inline int pfn_valid(unsigned long pfn)
struct mm_struct;
extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
+extern pte_t *virt_to_kpte(unsigned long kaddr);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index 5d5a635530b..dfedd7ac729 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,9 +15,12 @@
#ifndef _ASM_TILE_PCI_H
#define _ASM_TILE_PCI_H
+#include <linux/dma-mapping.h>
#include <linux/pci.h>
#include <asm-generic/pci_iomap.h>
+#ifndef __tilegx__
+
/*
* Structure of a PCI controller (host bridge)
*/
@@ -25,7 +28,6 @@ struct pci_controller {
int index; /* PCI domain number */
struct pci_bus *root_bus;
- int first_busno;
int last_busno;
int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */
@@ -41,20 +43,161 @@ struct pci_controller {
};
/*
+ * This flag tells if the platform is TILEmpower that needs
+ * special configuration for the PLX switch chip.
+ */
+extern int tile_plx_gen1;
+
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
+
+#define TILE_NUM_PCIE 2
+
+/*
* The hypervisor maps the entirety of CPA-space as bus addresses, so
* bus addresses are physical addresses. The networking and block
* device layers use this boolean for bounce buffer decisions.
*/
#define PCI_DMA_BUS_IS_PHYS 1
-int __devinit tile_pci_init(void);
-int __devinit pcibios_init(void);
+/* generic pci stuff */
+#include <asm-generic/pci.h>
-static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
+#else
-void __devinit pcibios_fixup_bus(struct pci_bus *bus);
+#include <asm/page.h>
+#include <gxio/trio.h>
-#define TILE_NUM_PCIE 2
+/**
+ * We reserve the hugepage-size address range at the top of the 64-bit address
+ * space to serve as the PCI window, emulating the BAR0 space of an endpoint
+ * device. This window is used by the chip-to-chip applications running on
+ * the RC node. The reason for carving out this window is that Mem-Maps that
+ * back up this window will not overlap with those that map the real physical
+ * memory.
+ */
+#define PCIE_HOST_BAR0_SIZE HPAGE_SIZE
+#define PCIE_HOST_BAR0_START HPAGE_MASK
+
+/**
+ * The first PAGE_SIZE of the above "BAR" window is mapped to the
+ * gxpci_host_regs structure.
+ */
+#define PCIE_HOST_REGS_SIZE PAGE_SIZE
+
+/*
+ * This is the PCI address where the Mem-Map interrupt regions start.
+ * We use the 2nd to the last huge page of the 64-bit address space.
+ * The last huge page is used for the rootcomplex "bar", for C2C purpose.
+ */
+#define MEM_MAP_INTR_REGIONS_BASE (HPAGE_MASK - HPAGE_SIZE)
+
+/*
+ * Each Mem-Map interrupt region occupies 4KB.
+ */
+#define MEM_MAP_INTR_REGION_SIZE (1 << TRIO_MAP_MEM_LIM__ADDR_SHIFT)
+
+/*
+ * Allocate the PCI BAR window right below 4GB.
+ */
+#define TILE_PCI_BAR_WINDOW_TOP (1ULL << 32)
+
+/*
+ * Allocate 1GB for the PCI BAR window.
+ */
+#define TILE_PCI_BAR_WINDOW_SIZE (1 << 30)
+
+/*
+ * This is the highest bus address targeting the host memory that
+ * can be generated by legacy PCI devices with 32-bit or less
+ * DMA capability, dictated by the BAR window size and location.
+ */
+#define TILE_PCI_MAX_DIRECT_DMA_ADDRESS \
+ (TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE - 1)
+
+/*
+ * We shift the PCI bus range for all the physical memory up by the whole PA
+ * range. The corresponding CPA of an incoming PCI request will be the PCI
+ * address minus TILE_PCI_MEM_MAP_BASE_OFFSET. This also implies
+ * that the 64-bit capable devices will be given DMA addresses as
+ * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit
+ * devices, we create a separate map region that handles the low
+ * 4GB.
+ *
+ * This design lets us avoid the "PCI hole" problem where the host bridge
+ * won't pass DMA traffic with target addresses that happen to fall within the
+ * BAR space. This enables us to use all the physical memory for DMA, instead
+ * of wasting the same amount of physical memory as the BAR window size.
+ */
+#define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH())
+
+/*
+ * Start of the PCI memory resource, which starts at the end of the
+ * maximum system physical RAM address.
+ */
+#define TILE_PCI_MEM_START (1ULL << CHIP_PA_WIDTH())
+
+/*
+ * Structure of a PCI controller (host bridge) on Gx.
+ */
+struct pci_controller {
+
+ /* Pointer back to the TRIO that this PCIe port is connected to. */
+ gxio_trio_context_t *trio;
+ int mac; /* PCIe mac index on the TRIO shim */
+ int trio_index; /* Index of TRIO shim that contains the MAC. */
+
+ int pio_mem_index; /* PIO region index for memory access */
+
+#ifdef CONFIG_TILE_PCI_IO
+ int pio_io_index; /* PIO region index for I/O space access */
+#endif
+
+ /*
+ * Mem-Map regions for all the memory controllers so that Linux can
+ * map all of its physical memory space to the PCI bus.
+ */
+ int mem_maps[MAX_NUMNODES];
+
+ int index; /* PCI domain number */
+ struct pci_bus *root_bus;
+
+ /* PCI I/O space resource for this controller. */
+ struct resource io_space;
+ char io_space_name[32];
+
+ /* PCI memory space resource for this controller. */
+ struct resource mem_space;
+ char mem_space_name[32];
+
+ uint64_t mem_offset; /* cpu->bus memory mapping offset. */
+
+ int first_busno;
+
+ struct pci_ops *ops;
+
+ /* Table that maps the INTx numbers to Linux irq numbers. */
+ int irq_intx_table[4];
+};
+
+extern struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES];
+extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
+extern int num_trio_shims;
+
+extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
+
+/*
+ * The PCI address space does not equal the physical memory address
+ * space (we have an IOMMU). The IDE and SCSI device layers use this
+ * boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS 0
+
+#endif /* __tilegx__ */
+
+int __init tile_pci_init(void);
+int __init pcibios_init(void);
+
+void pcibios_fixup_bus(struct pci_bus *bus);
#define pci_domain_nr(bus) (((struct pci_controller *)(bus)->sysdata)->index)
@@ -77,13 +220,8 @@ static inline int pcibios_assign_all_busses(void)
}
#define PCIBIOS_MIN_MEM 0
-#define PCIBIOS_MIN_IO 0
-
-/*
- * This flag tells if the platform is TILEmpower that needs
- * special configuration for the PLX switch chip.
- */
-extern int tile_plx_gen1;
+/* Minimum PCI I/O address, starting at the page boundary. */
+#define PCIBIOS_MIN_IO PAGE_SIZE
/* Use any cpu for PCI. */
#define cpumask_of_pcibus(bus) cpu_online_mask
@@ -91,7 +229,4 @@ extern int tile_plx_gen1;
/* implement the pci_ DMA API in terms of the generic device dma_ one */
#include <asm-generic/pci-dma-compat.h>
-/* generic pci stuff */
-#include <asm-generic/pci.h>
-
#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h
index 63294f5a8ef..4f7ae39fa20 100644
--- a/arch/tile/include/asm/percpu.h
+++ b/arch/tile/include/asm/percpu.h
@@ -15,9 +15,37 @@
#ifndef _ASM_TILE_PERCPU_H
#define _ASM_TILE_PERCPU_H
-register unsigned long __my_cpu_offset __asm__("tp");
-#define __my_cpu_offset __my_cpu_offset
-#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp))
+register unsigned long my_cpu_offset_reg asm("tp");
+
+#ifdef CONFIG_PREEMPT
+/*
+ * For full preemption, we can't just use the register variable
+ * directly, since we need barrier() to hazard against it, causing the
+ * compiler to reload anything computed from a previous "tp" value.
+ * But we also don't want to use volatile asm, since we'd like the
+ * compiler to be able to cache the value across multiple percpu reads.
+ * So we use a fake stack read as a hazard against barrier().
+ * The 'U' constraint is like 'm' but disallows postincrement.
+ */
+static inline unsigned long __my_cpu_offset(void)
+{
+ unsigned long tp;
+ register unsigned long *sp asm("sp");
+ asm("move %0, tp" : "=r" (tp) : "U" (*sp));
+ return tp;
+}
+#define __my_cpu_offset __my_cpu_offset()
+#else
+/*
+ * We don't need to hazard against barrier() since "tp" doesn't ever
+ * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only
+ * changes at function call points, at which we are already re-reading
+ * the value of "tp" due to "my_cpu_offset_reg" being a global variable.
+ */
+#define __my_cpu_offset my_cpu_offset_reg
+#endif
+
+#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp))
#include <asm-generic/percpu.h>
diff --git a/arch/tile/include/asm/perf_event.h b/arch/tile/include/asm/perf_event.h
new file mode 100644
index 00000000000..59c5b164e5b
--- /dev/null
+++ b/arch/tile/include/asm/perf_event.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_PERF_EVENT_H
+#define _ASM_TILE_PERF_EVENT_H
+
+#include <linux/percpu.h>
+DECLARE_PER_CPU(u64, perf_irqs);
+
+unsigned long handle_syscall_link_address(void);
+#endif /* _ASM_TILE_PERF_EVENT_H */
diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h
index e919c0bdc22..1b902508b66 100644
--- a/arch/tile/include/asm/pgalloc.h
+++ b/arch/tile/include/asm/pgalloc.h
@@ -19,24 +19,24 @@
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <asm/fixmap.h>
+#include <asm/page.h>
#include <hv/hypervisor.h>
/* Bits for the size of the second-level page table. */
-#define L2_KERNEL_PGTABLE_SHIFT \
- (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL + HV_LOG2_PTE_SIZE)
+#define L2_KERNEL_PGTABLE_SHIFT _HV_LOG2_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT)
+
+/* How big is a kernel L2 page table? */
+#define L2_KERNEL_PGTABLE_SIZE (1UL << L2_KERNEL_PGTABLE_SHIFT)
/* We currently allocate user L2 page tables by page (unlike kernel L2s). */
-#if L2_KERNEL_PGTABLE_SHIFT < HV_LOG2_PAGE_SIZE_SMALL
-#define L2_USER_PGTABLE_SHIFT HV_LOG2_PAGE_SIZE_SMALL
+#if L2_KERNEL_PGTABLE_SHIFT < PAGE_SHIFT
+#define L2_USER_PGTABLE_SHIFT PAGE_SHIFT
#else
#define L2_USER_PGTABLE_SHIFT L2_KERNEL_PGTABLE_SHIFT
#endif
/* How many pages do we need, as an "order", for a user L2 page table? */
-#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - HV_LOG2_PAGE_SIZE_SMALL)
-
-/* How big is a kernel L2 page table? */
-#define L2_KERNEL_PGTABLE_SIZE (1 << L2_KERNEL_PGTABLE_SHIFT)
+#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - PAGE_SHIFT)
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
@@ -50,14 +50,14 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *ptep)
{
- set_pmd(pmd, ptfn_pmd(__pa(ptep) >> HV_LOG2_PAGE_TABLE_ALIGN,
+ set_pmd(pmd, ptfn_pmd(HV_CPA_TO_PTFN(__pa(ptep)),
__pgprot(_PAGE_PRESENT)));
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t page)
{
- set_pmd(pmd, ptfn_pmd(HV_PFN_TO_PTFN(page_to_pfn(page)),
+ set_pmd(pmd, ptfn_pmd(HV_CPA_TO_PTFN(PFN_PHYS(page_to_pfn(page))),
__pgprot(_PAGE_PRESENT)));
}
@@ -68,8 +68,20 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
-extern void pte_free(struct mm_struct *mm, struct page *pte);
+extern pgtable_t pgtable_alloc_one(struct mm_struct *mm, unsigned long address,
+ int order);
+extern void pgtable_free(struct mm_struct *mm, struct page *pte, int order);
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+{
+ return pgtable_alloc_one(mm, address, L2_USER_PGTABLE_ORDER);
+}
+
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
+{
+ pgtable_free(mm, pte, L2_USER_PGTABLE_ORDER);
+}
#define pmd_pgtable(pmd) pmd_page(pmd)
@@ -85,8 +97,13 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
pte_free(mm, virt_to_page(pte));
}
-extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
- unsigned long address);
+extern void __pgtable_free_tlb(struct mmu_gather *tlb, struct page *pte,
+ unsigned long address, int order);
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+ unsigned long address)
+{
+ __pgtable_free_tlb(tlb, pte, address, L2_USER_PGTABLE_ORDER);
+}
#define check_pgt_cache() do { } while (0)
@@ -104,19 +121,44 @@ void shatter_pmd(pmd_t *pmd);
void shatter_huge_page(unsigned long addr);
#ifdef __tilegx__
-/* We share a single page allocator for both L1 and L2 page tables. */
-#if HV_L1_SIZE != HV_L2_SIZE
-# error Rework assumption that L1 and L2 page tables are same size.
-#endif
-#define L1_USER_PGTABLE_ORDER L2_USER_PGTABLE_ORDER
+
#define pud_populate(mm, pud, pmd) \
pmd_populate_kernel((mm), (pmd_t *)(pud), (pte_t *)(pmd))
-#define pmd_alloc_one(mm, addr) \
- ((pmd_t *)page_to_virt(pte_alloc_one((mm), (addr))))
-#define pmd_free(mm, pmdp) \
- pte_free((mm), virt_to_page(pmdp))
-#define __pmd_free_tlb(tlb, pmdp, address) \
- __pte_free_tlb((tlb), virt_to_page(pmdp), (address))
+
+/* Bits for the size of the L1 (intermediate) page table. */
+#define L1_KERNEL_PGTABLE_SHIFT _HV_LOG2_L1_SIZE(HPAGE_SHIFT)
+
+/* How big is a kernel L2 page table? */
+#define L1_KERNEL_PGTABLE_SIZE (1UL << L1_KERNEL_PGTABLE_SHIFT)
+
+/* We currently allocate L1 page tables by page. */
+#if L1_KERNEL_PGTABLE_SHIFT < PAGE_SHIFT
+#define L1_USER_PGTABLE_SHIFT PAGE_SHIFT
+#else
+#define L1_USER_PGTABLE_SHIFT L1_KERNEL_PGTABLE_SHIFT
#endif
+/* How many pages do we need, as an "order", for an L1 page table? */
+#define L1_USER_PGTABLE_ORDER (L1_USER_PGTABLE_SHIFT - PAGE_SHIFT)
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ struct page *p = pgtable_alloc_one(mm, address, L1_USER_PGTABLE_ORDER);
+ return (pmd_t *)page_to_virt(p);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
+{
+ pgtable_free(mm, virt_to_page(pmdp), L1_USER_PGTABLE_ORDER);
+}
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
+ unsigned long address)
+{
+ __pgtable_free_tlb(tlb, virt_to_page(pmdp), address,
+ L1_USER_PGTABLE_ORDER);
+}
+
+#endif /* __tilegx__ */
+
#endif /* _ASM_TILE_PGALLOC_H */
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 1a20b7ef8ea..33587f16c15 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -27,9 +27,10 @@
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/pfn.h>
#include <asm/processor.h>
#include <asm/fixmap.h>
-#include <asm/system.h>
+#include <asm/page.h>
struct mm_struct;
struct vm_area_struct;
@@ -70,6 +71,7 @@ extern void set_page_homes(void);
#define _PAGE_PRESENT HV_PTE_PRESENT
#define _PAGE_HUGE_PAGE HV_PTE_PAGE
+#define _PAGE_SUPER_PAGE HV_PTE_SUPER
#define _PAGE_READABLE HV_PTE_READABLE
#define _PAGE_WRITABLE HV_PTE_WRITABLE
#define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE
@@ -86,6 +88,7 @@ extern void set_page_homes(void);
#define _PAGE_ALL (\
_PAGE_PRESENT | \
_PAGE_HUGE_PAGE | \
+ _PAGE_SUPER_PAGE | \
_PAGE_READABLE | \
_PAGE_WRITABLE | \
_PAGE_EXECUTABLE | \
@@ -163,7 +166,7 @@ extern void set_page_homes(void);
(pgprot_t) { ((oldprot).val & ~_PAGE_ALL) | (newprot).val }
/* Just setting the PFN to zero suffices. */
-#define pte_pgprot(x) hv_pte_set_pfn((x), 0)
+#define pte_pgprot(x) hv_pte_set_pa((x), 0)
/*
* For PTEs and PDEs, we must clear the Present bit first when
@@ -188,6 +191,7 @@ static inline void __pte_clear(pte_t *ptep)
* Undefined behaviour if not..
*/
#define pte_present hv_pte_get_present
+#define pte_mknotpresent hv_pte_clear_present
#define pte_user hv_pte_get_user
#define pte_read hv_pte_get_readable
#define pte_dirty hv_pte_get_dirty
@@ -195,6 +199,7 @@ static inline void __pte_clear(pte_t *ptep)
#define pte_write hv_pte_get_writable
#define pte_exec hv_pte_get_executable
#define pte_huge hv_pte_get_page
+#define pte_super hv_pte_get_super
#define pte_rdprotect hv_pte_clear_readable
#define pte_exprotect hv_pte_clear_executable
#define pte_mkclean hv_pte_clear_dirty
@@ -207,6 +212,7 @@ static inline void __pte_clear(pte_t *ptep)
#define pte_mkyoung hv_pte_set_accessed
#define pte_mkwrite hv_pte_set_writable
#define pte_mkhuge hv_pte_set_page
+#define pte_mksuper hv_pte_set_super
#define pte_special(pte) 0
#define pte_mkspecial(pte) (pte)
@@ -262,7 +268,7 @@ static inline int pte_none(pte_t pte)
static inline unsigned long pte_pfn(pte_t pte)
{
- return hv_pte_get_pfn(pte);
+ return PFN_DOWN(hv_pte_get_pa(pte));
}
/* Set or get the remote cache cpu in a pgprot with remote caching. */
@@ -271,7 +277,7 @@ extern int get_remote_cache_cpu(pgprot_t prot);
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
{
- return hv_pte_set_pfn(prot, pfn);
+ return hv_pte_set_pa(prot, PFN_PHYS(pfn));
}
/* Support for priority mappings. */
@@ -313,7 +319,7 @@ extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next);
*/
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
- return pfn_pte(hv_pte_get_pfn(pte), newprot);
+ return pfn_pte(pte_pfn(pte), newprot);
}
/*
@@ -336,13 +342,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
*/
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-#if defined(CONFIG_HIGHPTE)
-extern pte_t *pte_offset_map(pmd_t *, unsigned long address);
-#define pte_unmap(pte) kunmap_atomic(pte)
-#else
#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
#define pte_unmap(pte) do { } while (0)
-#endif
/* Clear a non-executable kernel PTE and flush it from the TLB. */
#define kpte_clear_flush(ptep, vaddr) \
@@ -361,9 +362,6 @@ do { \
#define kern_addr_valid(addr) (1)
#endif /* CONFIG_FLATMEM */
-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
- remap_pfn_range(vma, vaddr, pfn, size, prot)
-
extern void vmalloc_sync_all(void);
#endif /* !__ASSEMBLY__ */
@@ -411,6 +409,46 @@ static inline unsigned long pmd_index(unsigned long address)
return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
}
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address,
+ pmd_t *pmdp)
+{
+ return ptep_test_and_clear_young(vma, address, pmdp_ptep(pmdp));
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ ptep_set_wrprotect(mm, address, pmdp_ptep(pmdp));
+}
+
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+ unsigned long address,
+ pmd_t *pmdp)
+{
+ return pte_pmd(ptep_get_and_clear(mm, address, pmdp_ptep(pmdp)));
+}
+
+static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+ set_pte(pmdp_ptep(pmdp), pmd_pte(pmdval));
+}
+
+#define set_pmd_at(mm, addr, pmdp, pmdval) __set_pmd(pmdp, pmdval)
+
+/* Create a pmd from a PTFN. */
+static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
+{
+ return pte_pmd(hv_pte_set_ptfn(prot, ptfn));
+}
+
+/* Return the page-table frame number (ptfn) that a pmd_t points at. */
+#define pmd_ptfn(pmd) hv_pte_get_ptfn(pmd_pte(pmd))
+
/*
* A given kernel pmd_t maps to a specific virtual address (either a
* kernel huge page or a kernel pte_t table). Since kernel pte_t
@@ -431,7 +469,48 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
* OK for pte_lockptr(), since we just end up with potentially one
* lock being used for several pte_t arrays.
*/
-#define pmd_page(pmd) pfn_to_page(HV_PTFN_TO_PFN(pmd_ptfn(pmd)))
+#define pmd_page(pmd) pfn_to_page(PFN_DOWN(HV_PTFN_TO_CPA(pmd_ptfn(pmd))))
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ __pte_clear(pmdp_ptep(pmdp));
+}
+
+#define pmd_mknotpresent(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd)))
+#define pmd_young(pmd) pte_young(pmd_pte(pmd))
+#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_write(pmd) pte_write(pmd_pte(pmd))
+#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd))
+#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd)))
+#define __HAVE_ARCH_PMD_WRITE
+
+#define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot)))
+#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
+#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ return pfn_pmd(pmd_pfn(pmd), newprot);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define has_transparent_hugepage() 1
+#define pmd_trans_huge pmd_huge_page
+
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+ return pte_pmd(hv_pte_set_client2(pmd_pte(pmd)));
+}
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+ return hv_pte_get_client2(pmd_pte(pmd));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
* The pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
@@ -449,17 +528,13 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
}
-static inline int pmd_huge_page(pmd_t pmd)
-{
- return pmd_val(pmd) & _PAGE_HUGE_PAGE;
-}
-
#include <asm-generic/pgtable.h>
/* Support /proc/NN/pgtable API. */
struct seq_file;
int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm,
- unsigned long vaddr, pte_t *ptep, void **datap);
+ unsigned long vaddr, unsigned long pagesize,
+ pte_t *ptep, void **datap);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
index 9f98529761f..d26a4227903 100644
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -20,11 +20,12 @@
* The level-1 index is defined by the huge page size. A PGD is composed
* of PTRS_PER_PGD pgd_t's and is the top level of the page table.
*/
-#define PGDIR_SHIFT HV_LOG2_PAGE_SIZE_LARGE
-#define PGDIR_SIZE HV_PAGE_SIZE_LARGE
+#define PGDIR_SHIFT HPAGE_SHIFT
+#define PGDIR_SIZE HPAGE_SIZE
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT))
-#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t))
+#define PTRS_PER_PGD _HV_L1_ENTRIES(HPAGE_SHIFT)
+#define PGD_INDEX(va) _HV_L1_INDEX(va, HPAGE_SHIFT)
+#define SIZEOF_PGD _HV_L1_SIZE(HPAGE_SHIFT)
/*
* The level-2 index is defined by the difference between the huge
@@ -33,8 +34,9 @@
* Note that the hypervisor docs use PTE for what we call pte_t, so
* this nomenclature is somewhat confusing.
*/
-#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
-#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t))
+#define PTRS_PER_PTE _HV_L2_ENTRIES(HPAGE_SHIFT, PAGE_SHIFT)
+#define PTE_INDEX(va) _HV_L2_INDEX(va, HPAGE_SHIFT, PAGE_SHIFT)
+#define SIZEOF_PTE _HV_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT)
#ifndef __ASSEMBLY__
@@ -53,17 +55,9 @@
#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK)
#ifdef CONFIG_HIGHMEM
-# define __VMAPPING_END (PKMAP_BASE & ~(HPAGE_SIZE-1))
+# define _VMALLOC_END (PKMAP_BASE & ~(HPAGE_SIZE-1))
#else
-# define __VMAPPING_END (FIXADDR_START & ~(HPAGE_SIZE-1))
-#endif
-
-#ifdef CONFIG_HUGEVMAP
-#define HUGE_VMAP_END __VMAPPING_END
-#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE)
-#define _VMALLOC_END HUGE_VMAP_BASE
-#else
-#define _VMALLOC_END __VMAPPING_END
+# define _VMALLOC_END (FIXADDR_START & ~(HPAGE_SIZE-1))
#endif
/*
@@ -82,10 +76,12 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */;
/* We have no pmd or pud since we are strictly a two-level page table */
#include <asm-generic/pgtable-nopmd.h>
+static inline int pud_huge_page(pud_t pud) { return 0; }
+
/* We don't define any pgds for these addresses. */
static inline int pgd_addr_invalid(unsigned long addr)
{
- return addr >= MEM_HV_INTRPT;
+ return addr >= MEM_HV_START;
}
/*
@@ -111,24 +107,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
return pte;
}
-static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
- set_pte(&pmdp->pud.pgd, pmdval.pud.pgd);
-}
-
-/* Create a pmd from a PTFN. */
-static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
-{
- return (pmd_t){ { hv_pte_set_ptfn(prot, ptfn) } };
-}
-
-/* Return the page-table frame number (ptfn) that a pmd_t points at. */
-#define pmd_ptfn(pmd) hv_pte_get_ptfn((pmd).pud.pgd)
-
-static inline void pmd_clear(pmd_t *pmdp)
-{
- __pte_clear(&pmdp->pud.pgd);
-}
+/*
+ * pmds are wrappers around pgds, which are the same as ptes.
+ * It's often convenient to "cast" back and forth and use the pte methods,
+ * which are the methods supplied by the hypervisor.
+ */
+#define pmd_pte(pmd) ((pmd).pud.pgd)
+#define pmdp_ptep(pmdp) (&(pmdp)->pud.pgd)
+#define pte_pmd(pte) ((pmd_t){ { (pte) } })
#endif /* __ASSEMBLY__ */
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h
index fd80328523b..2c8a9cd102d 100644
--- a/arch/tile/include/asm/pgtable_64.h
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -21,17 +21,19 @@
#define PGDIR_SIZE HV_L1_SPAN
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PGD HV_L0_ENTRIES
-#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t))
+#define PGD_INDEX(va) HV_L0_INDEX(va)
+#define SIZEOF_PGD HV_L0_SIZE
/*
* The level-1 index is defined by the huge page size. A PMD is composed
* of PTRS_PER_PMD pgd_t's and is the middle level of the page table.
*/
-#define PMD_SHIFT HV_LOG2_PAGE_SIZE_LARGE
-#define PMD_SIZE HV_PAGE_SIZE_LARGE
+#define PMD_SHIFT HPAGE_SHIFT
+#define PMD_SIZE HPAGE_SIZE
#define PMD_MASK (~(PMD_SIZE-1))
-#define PTRS_PER_PMD (1 << (PGDIR_SHIFT - PMD_SHIFT))
-#define SIZEOF_PMD (PTRS_PER_PMD * sizeof(pmd_t))
+#define PTRS_PER_PMD _HV_L1_ENTRIES(HPAGE_SHIFT)
+#define PMD_INDEX(va) _HV_L1_INDEX(va, HPAGE_SHIFT)
+#define SIZEOF_PMD _HV_L1_SIZE(HPAGE_SHIFT)
/*
* The level-2 index is defined by the difference between the huge
@@ -40,25 +42,34 @@
* Note that the hypervisor docs use PTE for what we call pte_t, so
* this nomenclature is somewhat confusing.
*/
-#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
-#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t))
+#define PTRS_PER_PTE _HV_L2_ENTRIES(HPAGE_SHIFT, PAGE_SHIFT)
+#define PTE_INDEX(va) _HV_L2_INDEX(va, HPAGE_SHIFT, PAGE_SHIFT)
+#define SIZEOF_PTE _HV_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT)
/*
- * Align the vmalloc area to an L2 page table, and leave a guard page
- * at the beginning and end. The vmalloc code also puts in an internal
+ * Align the vmalloc area to an L2 page table. Omit guard pages at
+ * the beginning and end for simplicity (particularly in the per-cpu
+ * memory allocation code). The vmalloc code puts in an internal
* guard page between each allocation.
*/
-#define _VMALLOC_END HUGE_VMAP_BASE
-#define VMALLOC_END (_VMALLOC_END - PAGE_SIZE)
-#define VMALLOC_START (_VMALLOC_START + PAGE_SIZE)
-
-#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE)
+#define _VMALLOC_END MEM_SV_START
+#define VMALLOC_END _VMALLOC_END
+#define VMALLOC_START _VMALLOC_START
#ifndef __ASSEMBLY__
/* We have no pud since we are a three-level page table. */
#include <asm-generic/pgtable-nopud.h>
+/*
+ * pmds are the same as pgds and ptes, so converting is a no-op.
+ */
+#define pmd_pte(pmd) (pmd)
+#define pmdp_ptep(pmdp) (pmdp)
+#define pte_pmd(pte) (pte)
+
+#define pud_pte(pud) ((pud).pgd)
+
static inline int pud_none(pud_t pud)
{
return pud_val(pud) == 0;
@@ -69,6 +80,11 @@ static inline int pud_present(pud_t pud)
return pud_val(pud) & _PAGE_PRESENT;
}
+static inline int pud_huge_page(pud_t pud)
+{
+ return pud_val(pud) & _PAGE_HUGE_PAGE;
+}
+
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e))
@@ -85,6 +101,9 @@ static inline int pud_bad(pud_t pud)
/* Return the page-table frame number (ptfn) that a pud_t points at. */
#define pud_ptfn(pud) hv_pte_get_ptfn((pud).pgd)
+/* Return the page frame number (pfn) that a pud_t points at. */
+#define pud_pfn(pud) pte_pfn(pud_pte(pud))
+
/*
* A given kernel pud_t maps to a kernel pmd_t table at a specific
* virtual address. Since kernel pmd_t tables can be aligned at
@@ -98,7 +117,7 @@ static inline int pud_bad(pud_t pud)
* A pud_t points to a pmd_t array. Since we can have multiple per
* page, we don't have a one-to-one mapping of pud_t's to pages.
*/
-#define pud_page(pud) pfn_to_page(HV_PTFN_TO_PFN(pud_ptfn(pud)))
+#define pud_page(pud) pfn_to_page(PFN_DOWN(HV_PTFN_TO_CPA(pud_ptfn(pud))))
static inline unsigned long pud_index(unsigned long address)
{
@@ -108,28 +127,6 @@ static inline unsigned long pud_index(unsigned long address)
#define pmd_offset(pud, address) \
((pmd_t *)pud_page_vaddr(*(pud)) + pmd_index(address))
-static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
- set_pte(pmdp, pmdval);
-}
-
-/* Create a pmd from a PTFN and pgprot. */
-static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
-{
- return hv_pte_set_ptfn(prot, ptfn);
-}
-
-/* Return the page-table frame number (ptfn) that a pmd_t points at. */
-static inline unsigned long pmd_ptfn(pmd_t pmd)
-{
- return hv_pte_get_ptfn(pmd);
-}
-
-static inline void pmd_clear(pmd_t *pmdp)
-{
- __pte_clear(pmdp);
-}
-
/* Normalize an address to having the correct high bits set. */
#define pgd_addr_normalize pgd_addr_normalize
static inline unsigned long pgd_addr_normalize(unsigned long addr)
@@ -141,8 +138,7 @@ static inline unsigned long pgd_addr_normalize(unsigned long addr)
/* We don't define any pgds for these addresses. */
static inline int pgd_addr_invalid(unsigned long addr)
{
- return addr >= MEM_HV_START ||
- (addr > MEM_LOW_END && addr < MEM_HIGH_START);
+ return addr >= KERNEL_HIGH_VADDR || addr != pgd_addr_normalize(addr);
}
/*
diff --git a/arch/tile/include/asm/pmc.h b/arch/tile/include/asm/pmc.h
new file mode 100644
index 00000000000..7ae3956d900
--- /dev/null
+++ b/arch/tile/include/asm/pmc.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_PMC_H
+#define _ASM_TILE_PMC_H
+
+#include <linux/ptrace.h>
+
+#define TILE_BASE_COUNTERS 2
+
+/* Bitfields below are derived from SPR PERF_COUNT_CTL*/
+#ifndef __tilegx__
+/* PERF_COUNT_CTL on TILEPro */
+#define TILE_CTL_EXCL_USER (1 << 7) /* exclude user level */
+#define TILE_CTL_EXCL_KERNEL (1 << 8) /* exclude kernel level */
+#define TILE_CTL_EXCL_HV (1 << 9) /* exclude hypervisor level */
+
+#define TILE_SEL_MASK 0x7f /* 7 bits for event SEL,
+ COUNT_0_SEL */
+#define TILE_PLM_MASK 0x780 /* 4 bits priv level msks,
+ COUNT_0_MASK*/
+#define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_PLM_MASK)
+
+#else /* __tilegx__*/
+/* PERF_COUNT_CTL on TILEGx*/
+#define TILE_CTL_EXCL_USER (1 << 10) /* exclude user level */
+#define TILE_CTL_EXCL_KERNEL (1 << 11) /* exclude kernel level */
+#define TILE_CTL_EXCL_HV (1 << 12) /* exclude hypervisor level */
+
+#define TILE_SEL_MASK 0x3f /* 6 bits for event SEL,
+ COUNT_0_SEL*/
+#define TILE_BOX_MASK 0x1c0 /* 3 bits box msks,
+ COUNT_0_BOX */
+#define TILE_PLM_MASK 0x3c00 /* 4 bits priv level msks,
+ COUNT_0_MASK */
+#define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_BOX_MASK | TILE_PLM_MASK)
+#endif /* __tilegx__*/
+
+/* Takes register and fault number. Returns error to disable the interrupt. */
+typedef int (*perf_irq_t)(struct pt_regs *, int);
+
+int userspace_perf_handler(struct pt_regs *regs, int fault);
+
+perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq);
+void release_pmc_hardware(void);
+
+unsigned long pmc_get_overflow(void);
+void pmc_ack_overflow(unsigned long status);
+
+void unmask_pmc_interrupts(void);
+void mask_pmc_interrupts(void);
+
+#endif /* _ASM_TILE_PMC_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 34c1e01ffb5..42323636c45 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -15,6 +15,8 @@
#ifndef _ASM_TILE_PROCESSOR_H
#define _ASM_TILE_PROCESSOR_H
+#include <arch/chip.h>
+
#ifndef __ASSEMBLY__
/*
@@ -25,7 +27,6 @@
#include <asm/ptrace.h>
#include <asm/percpu.h>
-#include <arch/chip.h>
#include <arch/spr_def.h>
struct task_struct;
@@ -76,6 +77,17 @@ struct async_tlb {
#ifdef CONFIG_HARDWALL
struct hardwall_info;
+struct hardwall_task {
+ /* Which hardwall is this task tied to? (or NULL if none) */
+ struct hardwall_info *info;
+ /* Chains this task into the list at info->task_head. */
+ struct list_head list;
+};
+#ifdef __tilepro__
+#define HARDWALL_TYPES 1 /* udn */
+#else
+#define HARDWALL_TYPES 3 /* udn, idn, and ipi */
+#endif
#endif
struct thread_struct {
@@ -99,47 +111,38 @@ struct thread_struct {
unsigned long long interrupt_mask;
/* User interrupt-control 0 state */
unsigned long intctrl_0;
-#if CHIP_HAS_PROC_STATUS_SPR()
+ /* Is this task currently doing a backtrace? */
+ bool in_backtrace;
/* Any other miscellaneous processor state bits */
unsigned long proc_status;
-#endif
#if !CHIP_HAS_FIXED_INTVEC_BASE()
/* Interrupt base for PL0 interrupts */
unsigned long interrupt_vector_base;
#endif
-#if CHIP_HAS_TILE_RTF_HWM()
/* Tile cache retry fifo high-water mark */
unsigned long tile_rtf_hwm;
-#endif
#if CHIP_HAS_DSTREAM_PF()
/* Data stream prefetch control */
unsigned long dstream_pf;
#endif
#ifdef CONFIG_HARDWALL
- /* Is this task tied to an activated hardwall? */
- struct hardwall_info *hardwall;
- /* Chains this task into the list at hardwall->list. */
- struct list_head hardwall_list;
+ /* Hardwall information for various resources. */
+ struct hardwall_task hardwall[HARDWALL_TYPES];
#endif
#if CHIP_HAS_TILE_DMA()
/* Async DMA TLB fault information */
struct async_tlb dma_async_tlb;
#endif
-#if CHIP_HAS_SN_PROC()
- /* Was static network processor when we were switched out? */
- int sn_proc_running;
- /* Async SNI TLB fault information */
- struct async_tlb sn_async_tlb;
-#endif
};
#endif /* !__ASSEMBLY__ */
/*
* Start with "sp" this many bytes below the top of the kernel stack.
- * This preserves the invariant that a called function may write to *sp.
+ * This allows us to be cache-aware when handling the initial save
+ * of the pt_regs value to the stack.
*/
-#define STACK_TOP_DELTA 8
+#define STACK_TOP_DELTA 64
/*
* When entering the kernel via a fault, start with the top of the
@@ -155,7 +158,7 @@ struct thread_struct {
#ifndef __ASSEMBLY__
#ifdef __tilegx__
-#define TASK_SIZE_MAX (MEM_LOW_END + 1)
+#define TASK_SIZE_MAX (_AC(1, UL) << (MAX_VA_WIDTH - 1))
#else
#define TASK_SIZE_MAX PAGE_OFFSET
#endif
@@ -169,10 +172,10 @@ struct thread_struct {
#define TASK_SIZE TASK_SIZE_MAX
#endif
-/* We provide a minimal "vdso" a la x86; just the sigreturn code for now. */
-#define VDSO_BASE (TASK_SIZE - PAGE_SIZE)
+#define VDSO_BASE ((unsigned long)current->active_mm->context.vdso_base)
+#define VDSO_SYM(x) (VDSO_BASE + (unsigned long)(x))
-#define STACK_TOP VDSO_BASE
+#define STACK_TOP TASK_SIZE
/* STACK_TOP_MAX is used temporarily in execve and should not check COMPAT. */
#define STACK_TOP_MAX TASK_SIZE_MAX
@@ -202,6 +205,7 @@ static inline void start_thread(struct pt_regs *regs,
{
regs->pc = pc;
regs->sp = usp;
+ single_step_execve();
}
/* Free all resources held by a thread. */
@@ -210,35 +214,40 @@ static inline void release_thread(struct task_struct *dead_task)
/* Nothing for now */
}
-/* Prepare to copy thread state - unlazy all lazy status. */
-#define prepare_to_copy(tsk) do { } while (0)
-
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
extern int do_work_pending(struct pt_regs *regs, u32 flags);
/*
* Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched.c, so don't work too hard.
+ * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
*/
#define thread_saved_pc(t) ((t)->thread.pc)
unsigned long get_wchan(struct task_struct *p);
/* Return initial ksp value for given task. */
-#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE)
+#define task_ksp0(task) \
+ ((unsigned long)(task)->stack + THREAD_SIZE - STACK_TOP_DELTA)
/* Return some info about the user process TASK. */
-#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA)
#define task_pt_regs(task) \
- ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
+ ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
+#define current_pt_regs() \
+ ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \
+ STACK_TOP_DELTA - (KSTK_PTREGS_GAP - 1)) - 1)
#define task_sp(task) (task_pt_regs(task)->sp)
#define task_pc(task) (task_pt_regs(task)->pc)
/* Aliases for pc and sp (used in fs/proc/array.c) */
#define KSTK_EIP(task) task_pc(task)
#define KSTK_ESP(task) task_sp(task)
+/* Fine-grained unaligned JIT support */
+#define GET_UNALIGN_CTL(tsk, adr) get_unalign_ctl((tsk), (adr))
+#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val))
+
+extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
+extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
+
/* Standard format for printing registers and other word-size data. */
#ifdef __tilegx__
# define REGFMT "0x%016lx"
@@ -267,7 +276,6 @@ extern char chip_model[64];
/* Data on which physical memory controller corresponds to which NUMA node. */
extern int node_controller[];
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Does the heap allocator return hash-for-home pages by default? */
extern int hash_default;
@@ -277,11 +285,6 @@ extern int kstack_hash;
/* Does MAP_ANONYMOUS return hash-for-home pages by default? */
#define uheap_hash hash_default
-#else
-#define hash_default 0
-#define kstack_hash 0
-#define uheap_hash 0
-#endif
/* Are we using huge pages in the TLB for kernel data? */
extern int kdata_huge;
@@ -329,7 +332,6 @@ extern int kdata_huge;
/*
* Provide symbolic constants for PLs.
- * Note that assembly code assumes that USER_PL is zero.
*/
#define USER_PL 0
#if CONFIG_KERNEL_PL == 2
@@ -338,20 +340,38 @@ extern int kdata_huge;
#define KERNEL_PL CONFIG_KERNEL_PL
/* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */
-#define CPU_LOG_MASK_VALUE 12
-#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1)
-#if CONFIG_NR_CPUS > CPU_MASK_VALUE
-# error Too many cpus!
+#ifdef __tilegx__
+#define CPU_SHIFT 48
+#if CHIP_VA_WIDTH() > CPU_SHIFT
+# error Too many VA bits!
#endif
+#define MAX_CPU_ID ((1 << (64 - CPU_SHIFT)) - 1)
#define raw_smp_processor_id() \
- ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE)
+ ((int)(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) >> CPU_SHIFT))
#define get_current_ksp0() \
- (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE)
+ ((unsigned long)(((long)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) << \
+ (64 - CPU_SHIFT)) >> (64 - CPU_SHIFT)))
+#define next_current_ksp0(task) ({ \
+ unsigned long __ksp0 = task_ksp0(task) & ((1UL << CPU_SHIFT) - 1); \
+ unsigned long __cpu = (long)raw_smp_processor_id() << CPU_SHIFT; \
+ __ksp0 | __cpu; \
+})
+#else
+#define LOG2_NR_CPU_IDS 6
+#define MAX_CPU_ID ((1 << LOG2_NR_CPU_IDS) - 1)
+#define raw_smp_processor_id() \
+ ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & MAX_CPU_ID)
+#define get_current_ksp0() \
+ (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~MAX_CPU_ID)
#define next_current_ksp0(task) ({ \
unsigned long __ksp0 = task_ksp0(task); \
int __cpu = raw_smp_processor_id(); \
- BUG_ON(__ksp0 & CPU_MASK_VALUE); \
+ BUG_ON(__ksp0 & MAX_CPU_ID); \
__ksp0 | __cpu; \
})
+#endif
+#if CONFIG_NR_CPUS > (MAX_CPU_ID + 1)
+# error Too many cpus!
+#endif
#endif /* _ASM_TILE_PROCESSOR_H */
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
index c6cddd7e8d5..b9620c077ab 100644
--- a/arch/tile/include/asm/ptrace.h
+++ b/arch/tile/include/asm/ptrace.h
@@ -11,87 +11,20 @@
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
-
#ifndef _ASM_TILE_PTRACE_H
#define _ASM_TILE_PTRACE_H
-#include <arch/chip.h>
-#include <arch/abi.h>
-
-/* These must match struct pt_regs, below. */
-#if CHIP_WORD_SIZE() == 32
-#define PTREGS_OFFSET_REG(n) ((n)*4)
-#else
-#define PTREGS_OFFSET_REG(n) ((n)*8)
-#endif
-#define PTREGS_OFFSET_BASE 0
-#define PTREGS_OFFSET_TP PTREGS_OFFSET_REG(53)
-#define PTREGS_OFFSET_SP PTREGS_OFFSET_REG(54)
-#define PTREGS_OFFSET_LR PTREGS_OFFSET_REG(55)
-#define PTREGS_NR_GPRS 56
-#define PTREGS_OFFSET_PC PTREGS_OFFSET_REG(56)
-#define PTREGS_OFFSET_EX1 PTREGS_OFFSET_REG(57)
-#define PTREGS_OFFSET_FAULTNUM PTREGS_OFFSET_REG(58)
-#define PTREGS_OFFSET_ORIG_R0 PTREGS_OFFSET_REG(59)
-#define PTREGS_OFFSET_FLAGS PTREGS_OFFSET_REG(60)
-#if CHIP_HAS_CMPEXCH()
-#define PTREGS_OFFSET_CMPEXCH PTREGS_OFFSET_REG(61)
-#endif
-#define PTREGS_SIZE PTREGS_OFFSET_REG(64)
+#include <linux/compiler.h>
#ifndef __ASSEMBLY__
-
-#ifdef __KERNEL__
/* Benefit from consistent use of "long" on all chips. */
typedef unsigned long pt_reg_t;
-#else
-/* Provide appropriate length type to userspace regardless of -m32/-m64. */
-typedef uint_reg_t pt_reg_t;
-#endif
-
-/*
- * This struct defines the way the registers are stored on the stack during a
- * system call or exception. "struct sigcontext" has the same shape.
- */
-struct pt_regs {
- /* Saved main processor registers; 56..63 are special. */
- /* tp, sp, and lr must immediately follow regs[] for aliasing. */
- pt_reg_t regs[53];
- pt_reg_t tp; /* aliases regs[TREG_TP] */
- pt_reg_t sp; /* aliases regs[TREG_SP] */
- pt_reg_t lr; /* aliases regs[TREG_LR] */
-
- /* Saved special registers. */
- pt_reg_t pc; /* stored in EX_CONTEXT_K_0 */
- pt_reg_t ex1; /* stored in EX_CONTEXT_K_1 (PL and ICS bit) */
- pt_reg_t faultnum; /* fault number (INT_SWINT_1 for syscall) */
- pt_reg_t orig_r0; /* r0 at syscall entry, else zero */
- pt_reg_t flags; /* flags (see below) */
-#if !CHIP_HAS_CMPEXCH()
- pt_reg_t pad[3];
-#else
- pt_reg_t cmpexch; /* value of CMPEXCH_VALUE SPR at interrupt */
- pt_reg_t pad[2];
#endif
-};
-
-#endif /* __ASSEMBLY__ */
-#define PTRACE_GETREGS 12
-#define PTRACE_SETREGS 13
-#define PTRACE_GETFPREGS 14
-#define PTRACE_SETFPREGS 15
+#include <uapi/asm/ptrace.h>
-/* Support TILE-specific ptrace options, with events starting at 16. */
-#define PTRACE_O_TRACEMIGRATE 0x00010000
-#define PTRACE_EVENT_MIGRATE 16
-#ifdef __KERNEL__
#define PTRACE_O_MASK_TILE (PTRACE_O_TRACEMIGRATE)
-#define PT_TRACE_MIGRATE 0x00080000
-#define PT_TRACE_MASK_TILE (PT_TRACE_MIGRATE)
-#endif
-
-#ifdef __KERNEL__
+#define PT_TRACE_MIGRATE PT_EVENT_FLAG(PTRACE_EVENT_MIGRATE)
/* Flag bits in pt_regs.flags */
#define PT_FLAGS_DISABLE_IRQ 1 /* on return to kernel, disable irqs */
@@ -100,17 +33,20 @@ struct pt_regs {
#ifndef __ASSEMBLY__
+#define regs_return_value(regs) ((regs)->regs[0])
#define instruction_pointer(regs) ((regs)->pc)
#define profile_pc(regs) instruction_pointer(regs)
+#define user_stack_pointer(regs) ((regs)->sp)
/* Does the process account for user or for system time? */
-#define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL)
+#define user_mode(regs) (EX1_PL((regs)->ex1) < KERNEL_PL)
/* Fill in a struct pt_regs with the current kernel registers. */
struct pt_regs *get_pt_regs(struct pt_regs *);
/* Trace the current syscall. */
-extern void do_syscall_trace(void);
+extern int do_syscall_trace_enter(struct pt_regs *regs);
+extern void do_syscall_trace_exit(struct pt_regs *regs);
#define arch_has_single_step() (1)
@@ -144,8 +80,7 @@ extern void single_step_execve(void);
struct task_struct;
-extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code);
+extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs);
#ifdef __tilegx__
/* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */
@@ -159,6 +94,4 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
#define SINGLESTEP_STATE_TARGET_LB 2
#define SINGLESTEP_STATE_TARGET_UB 7
-#endif /* !__KERNEL__ */
-
#endif /* _ASM_TILE_PTRACE_H */
diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h
index d062d463fca..5d5d3b739a6 100644
--- a/arch/tile/include/asm/sections.h
+++ b/arch/tile/include/asm/sections.h
@@ -25,16 +25,22 @@ extern char _sinitdata[], _einitdata[];
/* Write-once data is writable only till the end of initialization. */
extern char __w1data_begin[], __w1data_end[];
+extern char vdso_start[], vdso_end[];
+#ifdef CONFIG_COMPAT
+extern char vdso32_start[], vdso32_end[];
+#endif
/* Not exactly sections, but PC comparison points in the code. */
extern char __rt_sigreturn[], __rt_sigreturn_end[];
-#ifndef __tilegx__
+#ifdef __tilegx__
+extern char __start_unalign_asm_code[], __end_unalign_asm_code[];
+#else
extern char sys_cmpxchg[], __sys_cmpxchg_end[];
extern char __sys_cmpxchg_grab_lock[];
extern char __start_atomic_asm_code[], __end_atomic_asm_code[];
#endif
-/* Handle the discontiguity between _sdata and _stext. */
+/* Handle the discontiguity between _sdata and _text. */
static inline int arch_is_kernel_data(unsigned long addr)
{
return addr >= (unsigned long)_sdata &&
diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h
index 7caf0f36b03..e98909033e5 100644
--- a/arch/tile/include/asm/setup.h
+++ b/arch/tile/include/asm/setup.h
@@ -11,26 +11,42 @@
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
-
#ifndef _ASM_TILE_SETUP_H
#define _ASM_TILE_SETUP_H
-#define COMMAND_LINE_SIZE 2048
-
-#ifdef __KERNEL__
#include <linux/pfn.h>
#include <linux/init.h>
+#include <uapi/asm/setup.h>
/*
* Reserved space for vmalloc and iomap - defined in asm/page.h
*/
#define MAXMEM_PFN PFN_DOWN(MAXMEM)
+int tile_console_write(const char *buf, int count);
void early_panic(const char *fmt, ...);
-void warn_early_printk(void);
-void __init disable_early_printk(void);
-#endif /* __KERNEL__ */
+/* Init-time routine to do tile-specific per-cpu setup. */
+void setup_cpu(int boot);
+
+/* User-level DMA management functions */
+void grant_dma_mpls(void);
+void restrict_dma_mpls(void);
+
+#ifdef CONFIG_HARDWALL
+/* User-level network management functions */
+void reset_network_state(void);
+struct task_struct;
+void hardwall_switch_tasks(struct task_struct *prev, struct task_struct *next);
+void hardwall_deactivate_all(struct task_struct *task);
+int hardwall_ipi_valid(int cpu);
+
+/* Hook hardwall code into changes in affinity. */
+#define arch_set_cpus_allowed(p, new_mask) do { \
+ if (!cpumask_equal(&p->cpus_allowed, new_mask)) \
+ hardwall_deactivate_all(p); \
+} while (0)
+#endif
#endif /* _ASM_TILE_SETUP_H */
diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h
index 1e5e49aad54..10e183de96d 100644
--- a/arch/tile/include/asm/signal.h
+++ b/arch/tile/include/asm/signal.h
@@ -11,19 +11,11 @@
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
-
#ifndef _ASM_TILE_SIGNAL_H
#define _ASM_TILE_SIGNAL_H
-/* Do not notify a ptracer when this signal is handled. */
-#define SA_NOPTRACE 0x02000000u
-
-/* Used in earlier Tilera releases, so keeping for binary compatibility. */
-#define SA_RESTORER 0x04000000u
-
-#include <asm-generic/signal.h>
+#include <uapi/asm/signal.h>
-#if defined(__KERNEL__)
#if !defined(__ASSEMBLY__)
struct pt_regs;
int restore_sigcontext(struct pt_regs *, struct sigcontext __user *);
@@ -34,6 +26,4 @@ void signal_fault(const char *type, struct pt_regs *,
void trace_unhandled_signal(const char *type, struct pt_regs *regs,
unsigned long address, int signo);
#endif
-#endif
-
#endif /* _ASM_TILE_SIGNAL_H */
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
index 532124ae4b1..9a326b64f7a 100644
--- a/arch/tile/include/asm/smp.h
+++ b/arch/tile/include/asm/smp.h
@@ -43,10 +43,6 @@ void evaluate_message(int tag);
/* Boot a secondary cpu */
void online_secondary(void);
-/* Call a function on a specified set of CPUs (may include this one). */
-extern void on_each_cpu_mask(const struct cpumask *mask,
- void (*func)(void *), void *info, bool wait);
-
/* Topology of the supervisor tile grid, and coordinates of boot processor */
extern HV_Topology smp_topology;
@@ -91,9 +87,6 @@ void print_disabled_cpus(void);
#else /* !CONFIG_SMP */
-#define on_each_cpu_mask(mask, func, info, wait) \
- do { if (cpumask_test_cpu(0, (mask))) func(info); } while (0)
-
#define smp_master_cpu 0
#define smp_height 1
#define smp_width 1
@@ -108,10 +101,8 @@ void print_disabled_cpus(void);
extern struct cpumask cpu_lotar_map;
#define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map)
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Which processors are used for hash-for-home mapping */
extern struct cpumask hash_for_home_map;
-#endif
/* Which cpus can have their cache flushed by hv_flush_remote(). */
extern struct cpumask cpu_cacheable_map;
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index a5e4208d34f..c0a77b38d39 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -19,7 +19,6 @@
#include <linux/atomic.h>
#include <asm/page.h>
-#include <asm/system.h>
#include <linux/compiler.h>
/*
diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
index 72be5904e02..9a12b9c7e5d 100644
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -27,7 +27,7 @@
* Return the "current" portion of a ticket lock value,
* i.e. the number that currently owns the lock.
*/
-static inline int arch_spin_current(u32 val)
+static inline u32 arch_spin_current(u32 val)
{
return val >> __ARCH_SPIN_CURRENT_SHIFT;
}
@@ -36,7 +36,7 @@ static inline int arch_spin_current(u32 val)
* Return the "next" portion of a ticket lock value,
* i.e. the number that the next task to try to acquire the lock will get.
*/
-static inline int arch_spin_next(u32 val)
+static inline u32 arch_spin_next(u32 val)
{
return val & __ARCH_SPIN_NEXT_MASK;
}
@@ -137,7 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
__insn_mf();
- rw->lock = 0;
+ __insn_exch4(&rw->lock, 0); /* Avoid waiting in the write buffer. */
}
static inline int arch_read_trylock(arch_rwlock_t *rw)
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
index 4d97a2db932..0e9d382a2d4 100644
--- a/arch/tile/include/asm/stack.h
+++ b/arch/tile/include/asm/stack.h
@@ -25,7 +25,6 @@
struct KBacktraceIterator {
BacktraceIterator it;
struct task_struct *task; /* task we are backtracing */
- pte_t *pgtable; /* page table for user space access */
int end; /* iteration complete. */
int new_context; /* new context is starting */
int profile; /* profiling, so stop on async intrpt */
diff --git a/arch/tile/include/asm/string.h b/arch/tile/include/asm/string.h
index 7535cf1a30e..92b271bd9eb 100644
--- a/arch/tile/include/asm/string.h
+++ b/arch/tile/include/asm/string.h
@@ -21,8 +21,10 @@
#define __HAVE_ARCH_MEMMOVE
#define __HAVE_ARCH_STRCHR
#define __HAVE_ARCH_STRLEN
+#define __HAVE_ARCH_STRNLEN
extern __kernel_size_t strlen(const char *);
+extern __kernel_size_t strnlen(const char *, __kernel_size_t);
extern char *strchr(const char *s, int c);
extern void *memchr(const void *s, int c, size_t n);
extern void *memset(void *, int, __kernel_size_t);
diff --git a/arch/tile/include/asm/switch_to.h b/arch/tile/include/asm/switch_to.h
new file mode 100644
index 00000000000..b8f888cbe6b
--- /dev/null
+++ b/arch/tile/include/asm/switch_to.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SWITCH_TO_H
+#define _ASM_TILE_SWITCH_TO_H
+
+#include <arch/sim_def.h>
+
+/*
+ * switch_to(n) should switch tasks to task nr n, first
+ * checking that n isn't the current task, in which case it does nothing.
+ * The number of callee-saved registers saved on the kernel stack
+ * is defined here for use in copy_thread() and must agree with __switch_to().
+ */
+#define CALLEE_SAVED_FIRST_REG 30
+#define CALLEE_SAVED_REGS_COUNT 24 /* r30 to r52, plus an empty to align */
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+
+/*
+ * Pause the DMA engine and static network before task switching.
+ */
+#define prepare_arch_switch(next) _prepare_arch_switch(next)
+void _prepare_arch_switch(struct task_struct *next);
+
+struct task_struct;
+#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next)))
+extern struct task_struct *_switch_to(struct task_struct *prev,
+ struct task_struct *next);
+
+/* Helper function for _switch_to(). */
+extern struct task_struct *__switch_to(struct task_struct *prev,
+ struct task_struct *next,
+ unsigned long new_system_save_k_0);
+
+/* Address that switched-away from tasks are at. */
+extern unsigned long get_switch_to_pc(void);
+
+/*
+ * Kernel threads can check to see if they need to migrate their
+ * stack whenever they return from a context switch; for user
+ * threads, we defer until they are returning to user-space.
+ */
+#define finish_arch_switch(prev) do { \
+ if (unlikely((prev)->state == TASK_DEAD)) \
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \
+ ((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \
+ (current->pid << _SIM_CONTROL_OPERATOR_BITS)); \
+ if (current->mm == NULL && !kstack_hash && \
+ current_thread_info()->homecache_cpu != smp_processor_id()) \
+ homecache_migrate_kthread(); \
+} while (0)
+
+/* Support function for forking a new task. */
+void ret_from_fork(void);
+
+/* Support function for forking a new kernel thread. */
+void ret_from_kernel_thread(void *fn, void *arg);
+
+/* Called from ret_from_xxx() when a new process starts up. */
+struct task_struct *sim_notify_fork(struct task_struct *prev);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_SWITCH_TO_H */
diff --git a/arch/tile/include/asm/syscall.h b/arch/tile/include/asm/syscall.h
index d35e0dcb67b..9644b88f133 100644
--- a/arch/tile/include/asm/syscall.h
+++ b/arch/tile/include/asm/syscall.h
@@ -22,6 +22,12 @@
#include <linux/err.h>
#include <arch/abi.h>
+/* The array of function pointers for syscalls. */
+extern void *sys_call_table[];
+#ifdef CONFIG_COMPAT
+extern void *compat_sys_call_table[];
+#endif
+
/*
* Only the low 32 bits of orig_r0 are meaningful, so we return int.
* This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
index 3b5507c31ea..07b298450ef 100644
--- a/arch/tile/include/asm/syscalls.h
+++ b/arch/tile/include/asm/syscalls.h
@@ -24,12 +24,6 @@
#include <linux/types.h>
#include <linux/compat.h>
-/* The array of function pointers for syscalls. */
-extern void *sys_call_table[];
-#ifdef CONFIG_COMPAT
-extern void *compat_sys_call_table[];
-#endif
-
/*
* Note that by convention, any syscall which requires the current
* register set takes an additional "struct pt_regs *" pointer; a
@@ -43,15 +37,15 @@ long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi,
u32 len, int advice);
int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi,
u32 len_lo, u32 len_hi, int advice);
-long sys_flush_cache(void);
+long sys_cacheflush(unsigned long addr, unsigned long len,
+ unsigned long flags);
#ifndef __tilegx__ /* No mmap() in the 32-bit kernel. */
#define sys_mmap sys_mmap
#endif
#ifndef __tilegx__
/* mm/fault.c */
-long sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *);
-long _sys_cmpxchg_badaddr(unsigned long address);
+long sys_cmpxchg_badaddr(unsigned long address);
#endif
#ifdef CONFIG_COMPAT
@@ -62,14 +56,14 @@ long sys_truncate64(const char __user *path, loff_t length);
long sys_ftruncate64(unsigned int fd, loff_t length);
#endif
+/* Provide versions of standard syscalls that use current_pt_regs(). */
+long sys_rt_sigreturn(void);
+#define sys_rt_sigreturn sys_rt_sigreturn
+
/* These are the intvec*.S trampolines. */
-long _sys_sigaltstack(const stack_t __user *, stack_t __user *);
long _sys_rt_sigreturn(void);
long _sys_clone(unsigned long clone_flags, unsigned long newsp,
void __user *parent_tid, void __user *child_tid);
-long _sys_execve(const char __user *filename,
- const char __user *const __user *argv,
- const char __user *const __user *envp);
#include <asm-generic/syscalls.h>
diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h
deleted file mode 100644
index 23d1842f483..00000000000
--- a/arch/tile/include/asm/system.h
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-#ifndef _ASM_TILE_SYSTEM_H
-#define _ASM_TILE_SYSTEM_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-#include <linux/irqflags.h>
-
-/* NOTE: we can't include <linux/ptrace.h> due to #include dependencies. */
-#include <asm/ptrace.h>
-
-#include <arch/chip.h>
-#include <arch/sim_def.h>
-#include <arch/spr_def.h>
-
-/*
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier. All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads. This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies. See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * b = 2;
- * memory_barrier();
- * p = &b; q = p;
- * read_barrier_depends();
- * d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends(). However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * a = 2;
- * memory_barrier();
- * b = 3; y = b;
- * read_barrier_depends();
- * x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b". Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
- * in cases like this where there are no data dependencies.
- */
-
-#define read_barrier_depends() do { } while (0)
-
-#define __sync() __insn_mf()
-
-#if CHIP_HAS_SPLIT_CYCLE()
-#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW)
-#else
-#define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */
-#endif
-
-#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
-#include <hv/syscall_public.h>
-/*
- * Issue an uncacheable load to each memory controller, then
- * wait until those loads have completed.
- */
-static inline void __mb_incoherent(void)
-{
- long clobber_r10;
- asm volatile("swint2"
- : "=R10" (clobber_r10)
- : "R10" (HV_SYS_fence_incoherent)
- : "r0", "r1", "r2", "r3", "r4",
- "r5", "r6", "r7", "r8", "r9",
- "r11", "r12", "r13", "r14",
- "r15", "r16", "r17", "r18", "r19",
- "r20", "r21", "r22", "r23", "r24",
- "r25", "r26", "r27", "r28", "r29");
-}
-#endif
-
-/* Fence to guarantee visibility of stores to incoherent memory. */
-static inline void
-mb_incoherent(void)
-{
- __insn_mf();
-
-#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
- {
-#if CHIP_HAS_TILE_WRITE_PENDING()
- const unsigned long WRITE_TIMEOUT_CYCLES = 400;
- unsigned long start = get_cycles_low();
- do {
- if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0)
- return;
- } while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES);
-#endif /* CHIP_HAS_TILE_WRITE_PENDING() */
- (void) __mb_incoherent();
- }
-#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */
-}
-
-#define fast_wmb() __sync()
-#define fast_rmb() __sync()
-#define fast_mb() __sync()
-#define fast_iob() mb_incoherent()
-
-#define wmb() fast_wmb()
-#define rmb() fast_rmb()
-#define mb() fast_mb()
-#define iob() fast_iob()
-
-#ifdef CONFIG_SMP
-#define smp_mb() mb()
-#define smp_rmb() rmb()
-#define smp_wmb() wmb()
-#define smp_read_barrier_depends() read_barrier_depends()
-#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while (0)
-#endif
-
-#define set_mb(var, value) \
- do { var = value; mb(); } while (0)
-
-/*
- * Pause the DMA engine and static network before task switching.
- */
-#define prepare_arch_switch(next) _prepare_arch_switch(next)
-void _prepare_arch_switch(struct task_struct *next);
-
-
-/*
- * switch_to(n) should switch tasks to task nr n, first
- * checking that n isn't the current task, in which case it does nothing.
- * The number of callee-saved registers saved on the kernel stack
- * is defined here for use in copy_thread() and must agree with __switch_to().
- */
-#endif /* !__ASSEMBLY__ */
-#define CALLEE_SAVED_FIRST_REG 30
-#define CALLEE_SAVED_REGS_COUNT 24 /* r30 to r52, plus an empty to align */
-#ifndef __ASSEMBLY__
-struct task_struct;
-#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next)))
-extern struct task_struct *_switch_to(struct task_struct *prev,
- struct task_struct *next);
-
-/* Helper function for _switch_to(). */
-extern struct task_struct *__switch_to(struct task_struct *prev,
- struct task_struct *next,
- unsigned long new_system_save_k_0);
-
-/* Address that switched-away from tasks are at. */
-extern unsigned long get_switch_to_pc(void);
-
-/*
- * On SMP systems, when the scheduler does migration-cost autodetection,
- * it needs a way to flush as much of the CPU's caches as possible:
- *
- * TODO: fill this in!
- */
-static inline void sched_cacheflush(void)
-{
-}
-
-#define arch_align_stack(x) (x)
-
-/*
- * Is the kernel doing fixups of unaligned accesses? If <0, no kernel
- * intervention occurs and SIGBUS is delivered with no data address
- * info. If 0, the kernel single-steps the instruction to discover
- * the data address to provide with the SIGBUS. If 1, the kernel does
- * a fixup.
- */
-extern int unaligned_fixup;
-
-/* Is the kernel printing on each unaligned fixup? */
-extern int unaligned_printk;
-
-/* Number of unaligned fixups performed */
-extern unsigned int unaligned_fixup_count;
-
-/* Init-time routine to do tile-specific per-cpu setup. */
-void setup_cpu(int boot);
-
-/* User-level DMA management functions */
-void grant_dma_mpls(void);
-void restrict_dma_mpls(void);
-
-#ifdef CONFIG_HARDWALL
-/* User-level network management functions */
-void reset_network_state(void);
-void grant_network_mpls(void);
-void restrict_network_mpls(void);
-int hardwall_deactivate(struct task_struct *task);
-
-/* Hook hardwall code into changes in affinity. */
-#define arch_set_cpus_allowed(p, new_mask) do { \
- if (p->thread.hardwall && !cpumask_equal(&p->cpus_allowed, new_mask)) \
- hardwall_deactivate(p); \
-} while (0)
-#endif
-
-/*
- * Kernel threads can check to see if they need to migrate their
- * stack whenever they return from a context switch; for user
- * threads, we defer until they are returning to user-space.
- */
-#define finish_arch_switch(prev) do { \
- if (unlikely((prev)->state == TASK_DEAD)) \
- __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \
- ((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \
- __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \
- (current->pid << _SIM_CONTROL_OPERATOR_BITS)); \
- if (current->mm == NULL && !kstack_hash && \
- current_thread_info()->homecache_cpu != smp_processor_id()) \
- homecache_migrate_kthread(); \
-} while (0)
-
-/* Support function for forking a new task. */
-void ret_from_fork(void);
-
-/* Called from ret_from_fork() when a new process starts up. */
-struct task_struct *sim_notify_fork(struct task_struct *prev);
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _ASM_TILE_SYSTEM_H */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index bc4f562bd45..48e4fd0f38e 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -39,6 +39,11 @@ struct thread_info {
struct restart_block restart_block;
struct single_step_state *step_state; /* single step state
(if non-zero) */
+ int align_ctl; /* controls unaligned access */
+#ifdef __tilegx__
+ unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */
+ void __user *unalign_jit_base; /* unalign fixup JIT base */
+#endif
};
/*
@@ -56,6 +61,7 @@ struct thread_info {
.fn = do_no_restart_syscall, \
}, \
.step_state = NULL, \
+ .align_ctl = 0, \
}
#define init_thread_info (init_thread_union.thread_info)
@@ -77,40 +83,36 @@ struct thread_info {
#ifndef __ASSEMBLY__
+void arch_release_thread_info(struct thread_info *info);
+
/* How to get the thread information struct from C. */
register unsigned long stack_pointer __asm__("sp");
#define current_thread_info() \
((struct thread_info *)(stack_pointer & -THREAD_SIZE))
-#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-extern struct thread_info *alloc_thread_info_node(struct task_struct *task, int node);
-extern void free_thread_info(struct thread_info *info);
-
/* Sit on a nap instruction until interrupted. */
extern void smp_nap(void);
-/* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */
+/* Enable interrupts racelessly and nap forever: helper for arch_cpu_idle(). */
extern void _cpu_idle(void);
-/* Switch boot idle thread to a freshly-allocated stack and free old stack. */
-extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
- unsigned long new_sp,
- unsigned long new_ss10);
-
#else /* __ASSEMBLY__ */
-/* how to get the thread information struct from ASM */
+/*
+ * How to get the thread information struct from assembly.
+ * Note that we use different macros since different architectures
+ * have different semantics in their "mm" instruction and we would
+ * like to guarantee that the macro expands to exactly one instruction.
+ */
#ifdef __tilegx__
-#define GET_THREAD_INFO(reg) move reg, sp; mm reg, zero, LOG2_THREAD_SIZE, 63
+#define EXTRACT_THREAD_INFO(reg) mm reg, zero, LOG2_THREAD_SIZE, 63
#else
#define GET_THREAD_INFO(reg) mm reg, sp, zero, LOG2_THREAD_SIZE, 31
#endif
#endif /* !__ASSEMBLY__ */
-#define PREEMPT_ACTIVE 0x10000000
-
/*
* Thread information flags that various assembly files may need to access.
* Keep flags accessed frequently in low bits, particular since it makes
@@ -126,6 +128,8 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
#define TIF_SECCOMP 6 /* secure computing */
#define TIF_MEMDIE 7 /* OOM killer at work */
#define TIF_NOTIFY_RESUME 8 /* callback before returning to user */
+#define TIF_SYSCALL_TRACEPOINT 9 /* syscall tracepoint instrumentation */
+#define TIF_POLLING_NRFLAG 10 /* idle is polling for TIF_NEED_RESCHED */
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
@@ -136,12 +140,20 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_MEMDIE (1<<TIF_MEMDIE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
+#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
/* Work to do on any return to user space. */
#define _TIF_ALLWORK_MASK \
(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SINGLESTEP|\
_TIF_ASYNC_TLB|_TIF_NOTIFY_RESUME)
+/* Work to do at syscall entry. */
+#define _TIF_SYSCALL_ENTRY_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
+
+/* Work to do at syscall exit. */
+#define _TIF_SYSCALL_EXIT_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
+
/*
* Thread-synchronous status.
*
@@ -152,18 +164,31 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
#ifdef __tilegx__
#define TS_COMPAT 0x0001 /* 32-bit compatibility mode */
#endif
-#define TS_POLLING 0x0004 /* in idle loop but not sleeping */
#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal */
-#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
-
#ifndef __ASSEMBLY__
#define HAVE_SET_RESTORE_SIGMASK 1
static inline void set_restore_sigmask(void)
{
struct thread_info *ti = current_thread_info();
ti->status |= TS_RESTORE_SIGMASK;
- set_bit(TIF_SIGPENDING, &ti->flags);
+ WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
+}
+static inline void clear_restore_sigmask(void)
+{
+ current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+ return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+ struct thread_info *ti = current_thread_info();
+ if (!(ti->status & TS_RESTORE_SIGMASK))
+ return false;
+ ti->status &= ~TS_RESTORE_SIGMASK;
+ return true;
}
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h
index 29921f0b86d..dc987d53e2a 100644
--- a/arch/tile/include/asm/timex.h
+++ b/arch/tile/include/asm/timex.h
@@ -29,11 +29,13 @@ typedef unsigned long long cycles_t;
#if CHIP_HAS_SPLIT_CYCLE()
cycles_t get_cycles(void);
+#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW)
#else
static inline cycles_t get_cycles(void)
{
return __insn_mfspr(SPR_CYCLE);
}
+#define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */
#endif
cycles_t get_clock_rate(void);
diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h
index 96199d214fb..dcf91b25a1e 100644
--- a/arch/tile/include/asm/tlbflush.h
+++ b/arch/tile/include/asm/tlbflush.h
@@ -38,16 +38,11 @@ DECLARE_PER_CPU(int, current_asid);
/* The hypervisor tells us what ASIDs are available to us. */
extern int min_asid, max_asid;
-static inline unsigned long hv_page_size(const struct vm_area_struct *vma)
-{
- return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE;
-}
-
/* Pass as vma pointer for non-executable mapping, if no vma available. */
-#define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL)
+#define FLUSH_NONEXEC ((struct vm_area_struct *)-1UL)
/* Flush a single user page on this cpu. */
-static inline void local_flush_tlb_page(const struct vm_area_struct *vma,
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
unsigned long addr,
unsigned long page_size)
{
@@ -60,7 +55,7 @@ static inline void local_flush_tlb_page(const struct vm_area_struct *vma,
}
/* Flush range of user pages on this cpu. */
-static inline void local_flush_tlb_pages(const struct vm_area_struct *vma,
+static inline void local_flush_tlb_pages(struct vm_area_struct *vma,
unsigned long addr,
unsigned long page_size,
unsigned long len)
@@ -117,10 +112,10 @@ extern void flush_tlb_all(void);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
extern void flush_tlb_current_task(void);
extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(const struct vm_area_struct *, unsigned long);
-extern void flush_tlb_page_mm(const struct vm_area_struct *,
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+extern void flush_tlb_page_mm(struct vm_area_struct *,
struct mm_struct *, unsigned long);
-extern void flush_tlb_range(const struct vm_area_struct *,
+extern void flush_tlb_range(struct vm_area_struct *,
unsigned long start, unsigned long end);
#define flush_tlb() flush_tlb_current_task()
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
index 6fdd0c86019..93831184423 100644
--- a/arch/tile/include/asm/topology.h
+++ b/arch/tile/include/asm/topology.h
@@ -44,66 +44,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
/* For now, use numa node -1 for global allocation. */
#define pcibus_to_node(bus) ((void)(bus), -1)
-/*
- * TILE architecture has many cores integrated in one processor, so we need
- * setup bigger balance_interval for both CPU/NODE scheduling domains to
- * reduce process scheduling costs.
- */
-
-/* sched_domains SD_CPU_INIT for TILE architecture */
-#define SD_CPU_INIT (struct sched_domain) { \
- .min_interval = 4, \
- .max_interval = 128, \
- .busy_factor = 64, \
- .imbalance_pct = 125, \
- .cache_nice_tries = 1, \
- .busy_idx = 2, \
- .idle_idx = 1, \
- .newidle_idx = 0, \
- .wake_idx = 0, \
- .forkexec_idx = 0, \
- \
- .flags = 1*SD_LOAD_BALANCE \
- | 1*SD_BALANCE_NEWIDLE \
- | 1*SD_BALANCE_EXEC \
- | 1*SD_BALANCE_FORK \
- | 0*SD_BALANCE_WAKE \
- | 0*SD_WAKE_AFFINE \
- | 0*SD_PREFER_LOCAL \
- | 0*SD_SHARE_CPUPOWER \
- | 0*SD_SHARE_PKG_RESOURCES \
- | 0*SD_SERIALIZE \
- , \
- .last_balance = jiffies, \
- .balance_interval = 32, \
-}
-
-/* sched_domains SD_NODE_INIT for TILE architecture */
-#define SD_NODE_INIT (struct sched_domain) { \
- .min_interval = 16, \
- .max_interval = 512, \
- .busy_factor = 32, \
- .imbalance_pct = 125, \
- .cache_nice_tries = 1, \
- .busy_idx = 3, \
- .idle_idx = 1, \
- .newidle_idx = 2, \
- .wake_idx = 1, \
- .flags = 1*SD_LOAD_BALANCE \
- | 1*SD_BALANCE_NEWIDLE \
- | 1*SD_BALANCE_EXEC \
- | 1*SD_BALANCE_FORK \
- | 0*SD_BALANCE_WAKE \
- | 0*SD_WAKE_AFFINE \
- | 0*SD_PREFER_LOCAL \
- | 0*SD_SHARE_CPUPOWER \
- | 0*SD_SHARE_PKG_RESOURCES \
- | 1*SD_SERIALIZE \
- , \
- .last_balance = jiffies, \
- .balance_interval = 128, \
-}
-
/* By definition, we create nodes based on online memory. */
#define node_has_online_mem(nid) 1
@@ -116,9 +56,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
#define topology_core_id(cpu) (cpu)
#define topology_core_cpumask(cpu) ((void)(cpu), cpu_online_mask)
#define topology_thread_cpumask(cpu) cpumask_of(cpu)
-
-/* indicates that pointers to the topology struct cpumask maps are valid */
-#define arch_provides_topology_pointers yes
#endif
#endif /* _ASM_TILE_TOPOLOGY_H */
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index 5f20f920f93..4b99a1c3aab 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -15,12 +15,13 @@
#ifndef _ASM_TILE_TRAPS_H
#define _ASM_TILE_TRAPS_H
+#ifndef __ASSEMBLY__
#include <arch/chip.h>
/* mm/fault.c */
void do_page_fault(struct pt_regs *, int fault_num,
unsigned long address, unsigned long write);
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+#if CHIP_HAS_TILE_DMA()
void do_async_page_fault(struct pt_regs *);
#endif
@@ -64,7 +65,21 @@ void do_breakpoint(struct pt_regs *, int fault_num);
#ifdef __tilegx__
+/* kernel/single_step.c */
void gx_singlestep_handle(struct pt_regs *, int fault_num);
+
+/* kernel/intvec_64.S */
+void fill_ra_stack(void);
+
+/* Handle unalign data fixup. */
+extern void do_unaligned(struct pt_regs *regs, int vecnum);
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef __tilegx__
+/* 128 byte JIT per unalign fixup. */
+#define UNALIGN_JIT_SHIFT 7
#endif
-#endif /* _ASM_TILE_SYSCALLS_H */
+#endif /* _ASM_TILE_TRAPS_H */
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index ef34d2caa5b..b6cde3209b9 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -114,45 +114,79 @@ struct exception_table_entry {
extern int fixup_exception(struct pt_regs *regs);
/*
- * We return the __get_user_N function results in a structure,
- * thus in r0 and r1. If "err" is zero, "val" is the result
- * of the read; otherwise, "err" is -EFAULT.
- *
- * We rarely need 8-byte values on a 32-bit architecture, but
- * we size the structure to accommodate. In practice, for the
- * the smaller reads, we can zero the high word for free, and
- * the caller will ignore it by virtue of casting anyway.
+ * Support macros for __get_user().
+ *
+ * Implementation note: The "case 8" logic of casting to the type of
+ * the result of subtracting the value from itself is basically a way
+ * of keeping all integer types the same, but casting any pointers to
+ * ptrdiff_t, i.e. also an integer type. This way there are no
+ * questionable casts seen by the compiler on an ILP32 platform.
+ *
+ * Note that __get_user() and __put_user() assume proper alignment.
*/
-struct __get_user {
- unsigned long long val;
- int err;
-};
-/*
- * FIXME: we should express these as inline extended assembler, since
- * they're fundamentally just a variable dereference and some
- * supporting exception_table gunk. Note that (a la i386) we can
- * extend the copy_to_user and copy_from_user routines to call into
- * such extended assembler routines, though we will have to use a
- * different return code in that case (1, 2, or 4, rather than -EFAULT).
- */
-extern struct __get_user __get_user_1(const void __user *);
-extern struct __get_user __get_user_2(const void __user *);
-extern struct __get_user __get_user_4(const void __user *);
-extern struct __get_user __get_user_8(const void __user *);
-extern int __put_user_1(long, void __user *);
-extern int __put_user_2(long, void __user *);
-extern int __put_user_4(long, void __user *);
-extern int __put_user_8(long long, void __user *);
-
-/* Unimplemented routines to cause linker failures */
-extern struct __get_user __get_user_bad(void);
-extern int __put_user_bad(void);
+#ifdef __LP64__
+#define _ASM_PTR ".quad"
+#define _ASM_ALIGN ".align 8"
+#else
+#define _ASM_PTR ".long"
+#define _ASM_ALIGN ".align 4"
+#endif
+
+#define __get_user_asm(OP, x, ptr, ret) \
+ asm volatile("1: {" #OP " %1, %2; movei %0, 0 }\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "0: { movei %1, 0; movei %0, %3 }\n" \
+ "j 9f\n" \
+ ".section __ex_table,\"a\"\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR " 1b, 0b\n" \
+ ".popsection\n" \
+ "9:" \
+ : "=r" (ret), "=r" (x) \
+ : "r" (ptr), "i" (-EFAULT))
+
+#ifdef __tilegx__
+#define __get_user_1(x, ptr, ret) __get_user_asm(ld1u, x, ptr, ret)
+#define __get_user_2(x, ptr, ret) __get_user_asm(ld2u, x, ptr, ret)
+#define __get_user_4(x, ptr, ret) __get_user_asm(ld4s, x, ptr, ret)
+#define __get_user_8(x, ptr, ret) __get_user_asm(ld, x, ptr, ret)
+#else
+#define __get_user_1(x, ptr, ret) __get_user_asm(lb_u, x, ptr, ret)
+#define __get_user_2(x, ptr, ret) __get_user_asm(lh_u, x, ptr, ret)
+#define __get_user_4(x, ptr, ret) __get_user_asm(lw, x, ptr, ret)
+#ifdef __LITTLE_ENDIAN
+#define __lo32(a, b) a
+#define __hi32(a, b) b
+#else
+#define __lo32(a, b) b
+#define __hi32(a, b) a
+#endif
+#define __get_user_8(x, ptr, ret) \
+ ({ \
+ unsigned int __a, __b; \
+ asm volatile("1: { lw %1, %3; addi %2, %3, 4 }\n" \
+ "2: { lw %2, %2; movei %0, 0 }\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "0: { movei %1, 0; movei %2, 0 }\n" \
+ "{ movei %0, %4; j 9f }\n" \
+ ".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
+ ".word 1b, 0b\n" \
+ ".word 2b, 0b\n" \
+ ".popsection\n" \
+ "9:" \
+ : "=r" (ret), "=r" (__a), "=&r" (__b) \
+ : "r" (ptr), "i" (-EFAULT)); \
+ (x) = (__typeof(x))(__typeof((x)-(x))) \
+ (((u64)__hi32(__a, __b) << 32) | \
+ __lo32(__a, __b)); \
+ })
+#endif
+
+extern int __get_user_bad(void)
+ __attribute__((warning("sizeof __get_user argument not 1, 2, 4 or 8")));
-/*
- * Careful: we have to cast the result to the type of the pointer
- * for sign reasons.
- */
/**
* __get_user: - Get a simple variable from user space, with less checking.
* @x: Variable to store result.
@@ -174,30 +208,64 @@ extern int __put_user_bad(void);
* function.
*/
#define __get_user(x, ptr) \
-({ struct __get_user __ret; \
- __typeof__(*(ptr)) const __user *__gu_addr = (ptr); \
- __chk_user_ptr(__gu_addr); \
- switch (sizeof(*(__gu_addr))) { \
- case 1: \
- __ret = __get_user_1(__gu_addr); \
- break; \
- case 2: \
- __ret = __get_user_2(__gu_addr); \
- break; \
- case 4: \
- __ret = __get_user_4(__gu_addr); \
- break; \
- case 8: \
- __ret = __get_user_8(__gu_addr); \
- break; \
- default: \
- __ret = __get_user_bad(); \
- break; \
- } \
- (x) = (__typeof__(*__gu_addr)) (__typeof__(*__gu_addr - *__gu_addr)) \
- __ret.val; \
- __ret.err; \
-})
+ ({ \
+ int __ret; \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: __get_user_1(x, ptr, __ret); break; \
+ case 2: __get_user_2(x, ptr, __ret); break; \
+ case 4: __get_user_4(x, ptr, __ret); break; \
+ case 8: __get_user_8(x, ptr, __ret); break; \
+ default: __ret = __get_user_bad(); break; \
+ } \
+ __ret; \
+ })
+
+/* Support macros for __put_user(). */
+
+#define __put_user_asm(OP, x, ptr, ret) \
+ asm volatile("1: {" #OP " %1, %2; movei %0, 0 }\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "0: { movei %0, %3; j 9f }\n" \
+ ".section __ex_table,\"a\"\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR " 1b, 0b\n" \
+ ".popsection\n" \
+ "9:" \
+ : "=r" (ret) \
+ : "r" (ptr), "r" (x), "i" (-EFAULT))
+
+#ifdef __tilegx__
+#define __put_user_1(x, ptr, ret) __put_user_asm(st1, x, ptr, ret)
+#define __put_user_2(x, ptr, ret) __put_user_asm(st2, x, ptr, ret)
+#define __put_user_4(x, ptr, ret) __put_user_asm(st4, x, ptr, ret)
+#define __put_user_8(x, ptr, ret) __put_user_asm(st, x, ptr, ret)
+#else
+#define __put_user_1(x, ptr, ret) __put_user_asm(sb, x, ptr, ret)
+#define __put_user_2(x, ptr, ret) __put_user_asm(sh, x, ptr, ret)
+#define __put_user_4(x, ptr, ret) __put_user_asm(sw, x, ptr, ret)
+#define __put_user_8(x, ptr, ret) \
+ ({ \
+ u64 __x = (__typeof((x)-(x)))(x); \
+ int __lo = (int) __x, __hi = (int) (__x >> 32); \
+ asm volatile("1: { sw %1, %2; addi %0, %1, 4 }\n" \
+ "2: { sw %0, %3; movei %0, 0 }\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "0: { movei %0, %4; j 9f }\n" \
+ ".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
+ ".word 1b, 0b\n" \
+ ".word 2b, 0b\n" \
+ ".popsection\n" \
+ "9:" \
+ : "=&r" (ret) \
+ : "r" (ptr), "r" (__lo32(__lo, __hi)), \
+ "r" (__hi32(__lo, __hi)), "i" (-EFAULT)); \
+ })
+#endif
+
+extern int __put_user_bad(void)
+ __attribute__((warning("sizeof __put_user argument not 1, 2, 4 or 8")));
/**
* __put_user: - Write a simple value into user space, with less checking.
@@ -217,39 +285,19 @@ extern int __put_user_bad(void);
* function.
*
* Returns zero on success, or -EFAULT on error.
- *
- * Implementation note: The "case 8" logic of casting to the type of
- * the result of subtracting the value from itself is basically a way
- * of keeping all integer types the same, but casting any pointers to
- * ptrdiff_t, i.e. also an integer type. This way there are no
- * questionable casts seen by the compiler on an ILP32 platform.
*/
#define __put_user(x, ptr) \
({ \
- int __pu_err = 0; \
- __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
- typeof(*__pu_addr) __pu_val = (x); \
- __chk_user_ptr(__pu_addr); \
- switch (sizeof(__pu_val)) { \
- case 1: \
- __pu_err = __put_user_1((long)__pu_val, __pu_addr); \
- break; \
- case 2: \
- __pu_err = __put_user_2((long)__pu_val, __pu_addr); \
- break; \
- case 4: \
- __pu_err = __put_user_4((long)__pu_val, __pu_addr); \
- break; \
- case 8: \
- __pu_err = \
- __put_user_8((__typeof__(__pu_val - __pu_val))__pu_val,\
- __pu_addr); \
- break; \
- default: \
- __pu_err = __put_user_bad(); \
- break; \
+ int __ret; \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: __put_user_1(x, ptr, __ret); break; \
+ case 2: __put_user_2(x, ptr, __ret); break; \
+ case 4: __put_user_4(x, ptr, __ret); break; \
+ case 8: __put_user_8(x, ptr, __ret); break; \
+ default: __ret = __put_user_bad(); break; \
} \
- __pu_err; \
+ __ret; \
})
/*
@@ -353,7 +401,12 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
return n;
}
-#ifdef CONFIG_DEBUG_COPY_FROM_USER
+#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
+/*
+ * There are still unprovable places in the generic code as of 2.6.34, so this
+ * option is not really compatible with -Werror, which is more useful in
+ * general.
+ */
extern void copy_from_user_overflow(void)
__compiletime_warning("copy_from_user() size is not provably correct");
@@ -378,7 +431,7 @@ static inline unsigned long __must_check copy_from_user(void *to,
/**
* __copy_in_user() - copy data within user space, with less checking.
* @to: Destination address, in user space.
- * @from: Source address, in kernel space.
+ * @from: Source address, in user space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep.
@@ -395,7 +448,7 @@ extern unsigned long __copy_in_user_inatomic(
static inline unsigned long __must_check
__copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
- might_sleep();
+ might_fault();
return __copy_in_user_inatomic(to, from, n);
}
@@ -520,37 +573,6 @@ static inline unsigned long __must_check flush_user(
}
/**
- * inv_user: - Invalidate a block of memory in user space from cache.
- * @mem: Destination address, in user space.
- * @len: Number of bytes to invalidate.
- *
- * Returns number of bytes that could not be invalidated.
- * On success, this will be zero.
- *
- * Note that on Tile64, the "inv" operation is in fact a
- * "flush and invalidate", so cache write-backs will occur prior
- * to the cache being marked invalid.
- */
-extern unsigned long inv_user_asm(void __user *mem, unsigned long len);
-static inline unsigned long __must_check __inv_user(
- void __user *mem, unsigned long len)
-{
- int retval;
-
- might_fault();
- retval = inv_user_asm(mem, len);
- mb_incoherent();
- return retval;
-}
-static inline unsigned long __must_check inv_user(
- void __user *mem, unsigned long len)
-{
- if (access_ok(VERIFY_WRITE, mem, len))
- return __inv_user(mem, len);
- return len;
-}
-
-/**
* finv_user: - Flush-inval a block of memory in user space from cache.
* @mem: Destination address, in user space.
* @len: Number of bytes to invalidate.
diff --git a/arch/tile/include/asm/unaligned.h b/arch/tile/include/asm/unaligned.h
index 137e2de5b10..5a58a0d1144 100644
--- a/arch/tile/include/asm/unaligned.h
+++ b/arch/tile/include/asm/unaligned.h
@@ -15,10 +15,29 @@
#ifndef _ASM_TILE_UNALIGNED_H
#define _ASM_TILE_UNALIGNED_H
-#include <linux/unaligned/le_struct.h>
-#include <linux/unaligned/be_byteshift.h>
-#include <linux/unaligned/generic.h>
-#define get_unaligned __get_unaligned_le
-#define put_unaligned __put_unaligned_le
+/*
+ * We could implement faster get_unaligned_[be/le]64 using the ldna
+ * instruction on tilegx; however, we need to either copy all of the
+ * other generic functions to here (which is pretty ugly) or else
+ * modify both the generic code and other arch code to allow arch
+ * specific unaligned data access functions. Given these functions
+ * are not often called, we'll stick with the generic version.
+ */
+#include <asm-generic/unaligned.h>
+
+/*
+ * Is the kernel doing fixups of unaligned accesses? If <0, no kernel
+ * intervention occurs and SIGBUS is delivered with no data address
+ * info. If 0, the kernel single-steps the instruction to discover
+ * the data address to provide with the SIGBUS. If 1, the kernel does
+ * a fixup.
+ */
+extern int unaligned_fixup;
+
+/* Is the kernel printing on each unaligned fixup? */
+extern int unaligned_printk;
+
+/* Number of unaligned fixups performed */
+extern unsigned int unaligned_fixup_count;
#endif /* _ASM_TILE_UNALIGNED_H */
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
index f70bf1c541f..940831fe9e9 100644
--- a/arch/tile/include/asm/unistd.h
+++ b/arch/tile/include/asm/unistd.h
@@ -11,37 +11,10 @@
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
-
-#if !defined(_ASM_TILE_UNISTD_H) || defined(__SYSCALL)
-#define _ASM_TILE_UNISTD_H
-
-#if !defined(__LP64__) || defined(__SYSCALL_COMPAT)
-/* Use the flavor of this syscall that matches the 32-bit API better. */
-#define __ARCH_WANT_SYNC_FILE_RANGE2
-#endif
-
-/* Use the standard ABI for syscalls. */
-#include <asm-generic/unistd.h>
-
-/* Additional Tilera-specific syscalls. */
-#define __NR_flush_cache (__NR_arch_specific_syscall + 1)
-__SYSCALL(__NR_flush_cache, sys_flush_cache)
-
-#ifndef __tilegx__
-/* "Fast" syscalls provide atomic support for 32-bit chips. */
-#define __NR_FAST_cmpxchg -1
-#define __NR_FAST_atomic_update -2
-#define __NR_FAST_cmpxchg64 -3
-#define __NR_cmpxchg_badaddr (__NR_arch_specific_syscall + 0)
-__SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr)
-#endif
-
-#ifdef __KERNEL__
/* In compat mode, we use sys_llseek() for compat_sys_llseek(). */
#ifdef CONFIG_COMPAT
#define __ARCH_WANT_SYS_LLSEEK
#endif
#define __ARCH_WANT_SYS_NEWFSTATAT
-#endif
-
-#endif /* _ASM_TILE_UNISTD_H */
+#define __ARCH_WANT_SYS_CLONE
+#include <uapi/asm/unistd.h>
diff --git a/arch/tile/include/asm/vdso.h b/arch/tile/include/asm/vdso.h
new file mode 100644
index 00000000000..9f6a78d665f
--- /dev/null
+++ b/arch/tile/include/asm/vdso.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __TILE_VDSO_H__
+#define __TILE_VDSO_H__
+
+#include <linux/types.h>
+
+/*
+ * Note about the vdso_data structure:
+ *
+ * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
+ * structure is supposed to be known only to the function in the vdso
+ * itself and may change without notice.
+ */
+
+struct vdso_data {
+ __u64 tz_update_count; /* Timezone atomicity ctr */
+ __u64 tb_update_count; /* Timebase atomicity ctr */
+ __u64 xtime_tod_stamp; /* TOD clock for xtime */
+ __u64 xtime_clock_sec; /* Kernel time second */
+ __u64 xtime_clock_nsec; /* Kernel time nanosecond */
+ __u64 wtom_clock_sec; /* Wall to monotonic clock second */
+ __u64 wtom_clock_nsec; /* Wall to monotonic clock nanosecond */
+ __u32 mult; /* Cycle to nanosecond multiplier */
+ __u32 shift; /* Cycle to nanosecond divisor (power of two) */
+ __u32 tz_minuteswest; /* Minutes west of Greenwich */
+ __u32 tz_dsttime; /* Type of dst correction */
+};
+
+extern struct vdso_data *vdso_data;
+
+/* __vdso_rt_sigreturn is defined with the addresses in the vdso page. */
+extern void __vdso_rt_sigreturn(void);
+
+extern int setup_vdso_pages(void);
+
+#endif /* __TILE_VDSO_H__ */
diff --git a/arch/tile/include/gxio/common.h b/arch/tile/include/gxio/common.h
new file mode 100644
index 00000000000..724595a24d0
--- /dev/null
+++ b/arch/tile/include/gxio/common.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _GXIO_COMMON_H_
+#define _GXIO_COMMON_H_
+
+/*
+ * Routines shared between the various GXIO device components.
+ */
+
+#include <hv/iorpc.h>
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/io.h>
+
+/* Define the standard gxio MMIO functions using kernel functions. */
+#define __gxio_mmio_read8(addr) readb(addr)
+#define __gxio_mmio_read16(addr) readw(addr)
+#define __gxio_mmio_read32(addr) readl(addr)
+#define __gxio_mmio_read64(addr) readq(addr)
+#define __gxio_mmio_write8(addr, val) writeb((val), (addr))
+#define __gxio_mmio_write16(addr, val) writew((val), (addr))
+#define __gxio_mmio_write32(addr, val) writel((val), (addr))
+#define __gxio_mmio_write64(addr, val) writeq((val), (addr))
+#define __gxio_mmio_read(addr) __gxio_mmio_read64(addr)
+#define __gxio_mmio_write(addr, val) __gxio_mmio_write64((addr), (val))
+
+#endif /* !_GXIO_COMMON_H_ */
diff --git a/arch/tile/include/gxio/dma_queue.h b/arch/tile/include/gxio/dma_queue.h
new file mode 100644
index 00000000000..b9e45e37649
--- /dev/null
+++ b/arch/tile/include/gxio/dma_queue.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _GXIO_DMA_QUEUE_H_
+#define _GXIO_DMA_QUEUE_H_
+
+/*
+ * DMA queue management APIs shared between TRIO and mPIPE.
+ */
+
+#include <gxio/common.h>
+
+/* The credit counter lives in the high 32 bits. */
+#define DMA_QUEUE_CREDIT_SHIFT 32
+
+/*
+ * State object that tracks a DMA queue's head and tail indices, as
+ * well as the number of commands posted and completed. The
+ * structure is accessed via a thread-safe, lock-free algorithm.
+ */
+typedef struct {
+ /*
+ * Address of a MPIPE_EDMA_POST_REGION_VAL_t,
+ * TRIO_PUSH_DMA_REGION_VAL_t, or TRIO_PULL_DMA_REGION_VAL_t
+ * register. These register have identical encodings and provide
+ * information about how many commands have been processed.
+ */
+ void *post_region_addr;
+
+ /*
+ * A lazily-updated count of how many edescs the hardware has
+ * completed.
+ */
+ uint64_t hw_complete_count __attribute__ ((aligned(64)));
+
+ /*
+ * High 32 bits are a count of available egress command credits,
+ * low 24 bits are the next egress "slot".
+ */
+ int64_t credits_and_next_index;
+
+} __gxio_dma_queue_t;
+
+/* Initialize a dma queue. */
+extern void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue,
+ void *post_region_addr,
+ unsigned int num_entries);
+
+/*
+ * Update the "credits_and_next_index" and "hw_complete_count" fields
+ * based on pending hardware completions. Note that some other thread
+ * may have already done this and, importantly, may still be in the
+ * process of updating "credits_and_next_index".
+ */
+extern void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue);
+
+/* Wait for credits to become available. */
+extern int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue,
+ int64_t modifier);
+
+/* Reserve slots in the queue, optionally waiting for slots to become
+ * available, and optionally returning a "completion_slot" suitable for
+ * direct comparison to "hw_complete_count".
+ */
+static inline int64_t __gxio_dma_queue_reserve(__gxio_dma_queue_t *dma_queue,
+ unsigned int num, bool wait,
+ bool completion)
+{
+ uint64_t slot;
+
+ /*
+ * Try to reserve 'num' egress command slots. We do this by
+ * constructing a constant that subtracts N credits and adds N to
+ * the index, and using fetchaddgez to only apply it if the credits
+ * count doesn't go negative.
+ */
+ int64_t modifier = (((int64_t)(-num)) << DMA_QUEUE_CREDIT_SHIFT) | num;
+ int64_t old =
+ __insn_fetchaddgez(&dma_queue->credits_and_next_index,
+ modifier);
+
+ if (unlikely(old + modifier < 0)) {
+ /*
+ * We're out of credits. Try once to get more by checking for
+ * completed egress commands. If that fails, wait or fail.
+ */
+ __gxio_dma_queue_update_credits(dma_queue);
+ old = __insn_fetchaddgez(&dma_queue->credits_and_next_index,
+ modifier);
+ if (old + modifier < 0) {
+ if (wait)
+ old = __gxio_dma_queue_wait_for_credits
+ (dma_queue, modifier);
+ else
+ return GXIO_ERR_DMA_CREDITS;
+ }
+ }
+
+ /* The bottom 24 bits of old encode the "slot". */
+ slot = (old & 0xffffff);
+
+ if (completion) {
+ /*
+ * A "completion_slot" is a "slot" which can be compared to
+ * "hw_complete_count" at any time in the future. To convert
+ * "slot" into a "completion_slot", we access "hw_complete_count"
+ * once (knowing that we have reserved a slot, and thus, it will
+ * be "basically" accurate), and combine its high 40 bits with
+ * the 24 bit "slot", and handle "wrapping" by adding "1 << 24"
+ * if the result is LESS than "hw_complete_count".
+ */
+ uint64_t complete;
+ complete = ACCESS_ONCE(dma_queue->hw_complete_count);
+ slot |= (complete & 0xffffffffff000000);
+ if (slot < complete)
+ slot += 0x1000000;
+ }
+
+ /*
+ * If any of our slots mod 256 were equivalent to 0, go ahead and
+ * collect some egress credits, and update "hw_complete_count", and
+ * make sure the index doesn't overflow into the credits.
+ */
+ if (unlikely(((old + num) & 0xff) < num)) {
+ __gxio_dma_queue_update_credits(dma_queue);
+
+ /* Make sure the index doesn't overflow into the credits. */
+#ifdef __BIG_ENDIAN__
+ *(((uint8_t *)&dma_queue->credits_and_next_index) + 4) = 0;
+#else
+ *(((uint8_t *)&dma_queue->credits_and_next_index) + 3) = 0;
+#endif
+ }
+
+ return slot;
+}
+
+/* Non-inlinable "__gxio_dma_queue_reserve(..., true)". */
+extern int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue,
+ unsigned int num, int wait);
+
+/* Check whether a particular "completion slot" has completed.
+ *
+ * Note that this function requires a "completion slot", and thus
+ * cannot be used with the result of any "reserve_fast" function.
+ */
+extern int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue,
+ int64_t completion_slot, int update);
+
+#endif /* !_GXIO_DMA_QUEUE_H_ */
diff --git a/arch/tile/include/gxio/iorpc_globals.h b/arch/tile/include/gxio/iorpc_globals.h
new file mode 100644
index 00000000000..52c721f8dad
--- /dev/null
+++ b/arch/tile/include/gxio/iorpc_globals.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#ifndef __IORPC_LINUX_RPC_H__
+#define __IORPC_LINUX_RPC_H__
+
+#include <hv/iorpc.h>
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <asm/pgtable.h>
+
+#define IORPC_OP_ARM_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9000)
+#define IORPC_OP_CLOSE_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9001)
+#define IORPC_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
+#define IORPC_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
+
+int __iorpc_arm_pollfd(int fd, int pollfd_cookie);
+
+int __iorpc_close_pollfd(int fd, int pollfd_cookie);
+
+int __iorpc_get_mmio_base(int fd, HV_PTE *base);
+
+int __iorpc_check_mmio_offset(int fd, unsigned long offset, unsigned long size);
+
+#endif /* !__IORPC_LINUX_RPC_H__ */
diff --git a/arch/tile/include/gxio/iorpc_mpipe.h b/arch/tile/include/gxio/iorpc_mpipe.h
new file mode 100644
index 00000000000..4cda03de734
--- /dev/null
+++ b/arch/tile/include/gxio/iorpc_mpipe.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#ifndef __GXIO_MPIPE_LINUX_RPC_H__
+#define __GXIO_MPIPE_LINUX_RPC_H__
+
+#include <hv/iorpc.h>
+
+#include <hv/drv_mpipe_intf.h>
+#include <asm/page.h>
+#include <gxio/kiorpc.h>
+#include <gxio/mpipe.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <asm/pgtable.h>
+
+#define GXIO_MPIPE_OP_ALLOC_BUFFER_STACKS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1200)
+#define GXIO_MPIPE_OP_INIT_BUFFER_STACK_AUX IORPC_OPCODE(IORPC_FORMAT_KERNEL_MEM, 0x1201)
+
+#define GXIO_MPIPE_OP_ALLOC_NOTIF_RINGS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1203)
+#define GXIO_MPIPE_OP_INIT_NOTIF_RING_AUX IORPC_OPCODE(IORPC_FORMAT_KERNEL_MEM, 0x1204)
+#define GXIO_MPIPE_OP_REQUEST_NOTIF_RING_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1205)
+#define GXIO_MPIPE_OP_ENABLE_NOTIF_RING_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1206)
+#define GXIO_MPIPE_OP_ALLOC_NOTIF_GROUPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1207)
+#define GXIO_MPIPE_OP_INIT_NOTIF_GROUP IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1208)
+#define GXIO_MPIPE_OP_ALLOC_BUCKETS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1209)
+#define GXIO_MPIPE_OP_INIT_BUCKET IORPC_OPCODE(IORPC_FORMAT_NONE, 0x120a)
+#define GXIO_MPIPE_OP_ALLOC_EDMA_RINGS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x120b)
+#define GXIO_MPIPE_OP_INIT_EDMA_RING_AUX IORPC_OPCODE(IORPC_FORMAT_KERNEL_MEM, 0x120c)
+
+#define GXIO_MPIPE_OP_COMMIT_RULES IORPC_OPCODE(IORPC_FORMAT_NONE, 0x120f)
+#define GXIO_MPIPE_OP_REGISTER_CLIENT_MEMORY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1210)
+#define GXIO_MPIPE_OP_LINK_OPEN_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1211)
+#define GXIO_MPIPE_OP_LINK_CLOSE_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1212)
+#define GXIO_MPIPE_OP_LINK_SET_ATTR_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1213)
+
+#define GXIO_MPIPE_OP_GET_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x121e)
+#define GXIO_MPIPE_OP_SET_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x121f)
+#define GXIO_MPIPE_OP_ADJUST_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1220)
+#define GXIO_MPIPE_OP_CONFIG_EDMA_RING_BLKS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1221)
+#define GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1222)
+#define GXIO_MPIPE_OP_ARM_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9000)
+#define GXIO_MPIPE_OP_CLOSE_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9001)
+#define GXIO_MPIPE_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
+#define GXIO_MPIPE_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
+
+int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
+int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context,
+ void *mem_va, size_t mem_size,
+ unsigned int mem_flags, unsigned int stack,
+ unsigned int buffer_size_enum);
+
+
+int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
+int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
+ size_t mem_size, unsigned int mem_flags,
+ unsigned int ring);
+
+int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context,
+ int inter_x, int inter_y,
+ int inter_ipi, int inter_event,
+ unsigned int ring);
+
+int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context,
+ unsigned int ring);
+
+int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
+int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context,
+ unsigned int group,
+ gxio_mpipe_notif_group_bits_t bits);
+
+int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count,
+ unsigned int first, unsigned int flags);
+
+int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket,
+ MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info);
+
+int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
+int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
+ size_t mem_size, unsigned int mem_flags,
+ unsigned int ring, unsigned int channel);
+
+
+int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob,
+ size_t blob_size);
+
+int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context,
+ unsigned int iotlb, HV_PTE pte,
+ unsigned int flags);
+
+int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context,
+ _gxio_mpipe_link_name_t name, unsigned int flags);
+
+int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac);
+
+int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac,
+ uint32_t attr, int64_t val);
+
+int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec,
+ uint64_t *nsec, uint64_t *cycles);
+
+int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec,
+ uint64_t nsec, uint64_t cycles);
+
+int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context,
+ int64_t nsec);
+
+int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context,
+ int32_t ppb);
+
+int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie);
+
+int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie);
+
+int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base);
+
+int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context,
+ unsigned long offset, unsigned long size);
+
+#endif /* !__GXIO_MPIPE_LINUX_RPC_H__ */
diff --git a/arch/tile/include/gxio/iorpc_mpipe_info.h b/arch/tile/include/gxio/iorpc_mpipe_info.h
new file mode 100644
index 00000000000..f0b04284468
--- /dev/null
+++ b/arch/tile/include/gxio/iorpc_mpipe_info.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#ifndef __GXIO_MPIPE_INFO_LINUX_RPC_H__
+#define __GXIO_MPIPE_INFO_LINUX_RPC_H__
+
+#include <hv/iorpc.h>
+
+#include <hv/drv_mpipe_intf.h>
+#include <asm/page.h>
+#include <gxio/kiorpc.h>
+#include <gxio/mpipe.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <asm/pgtable.h>
+
+
+#define GXIO_MPIPE_INFO_OP_INSTANCE_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1250)
+#define GXIO_MPIPE_INFO_OP_ENUMERATE_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1251)
+#define GXIO_MPIPE_INFO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
+#define GXIO_MPIPE_INFO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
+
+
+int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context,
+ _gxio_mpipe_link_name_t name);
+
+int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context,
+ unsigned int idx,
+ _gxio_mpipe_link_name_t *name,
+ _gxio_mpipe_link_mac_t *mac);
+
+int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context,
+ HV_PTE *base);
+
+int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context,
+ unsigned long offset, unsigned long size);
+
+#endif /* !__GXIO_MPIPE_INFO_LINUX_RPC_H__ */
diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h
new file mode 100644
index 00000000000..376a4f77116
--- /dev/null
+++ b/arch/tile/include/gxio/iorpc_trio.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#ifndef __GXIO_TRIO_LINUX_RPC_H__
+#define __GXIO_TRIO_LINUX_RPC_H__
+
+#include <hv/iorpc.h>
+
+#include <hv/drv_trio_intf.h>
+#include <gxio/trio.h>
+#include <gxio/kiorpc.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <asm/pgtable.h>
+
+#define GXIO_TRIO_OP_DEALLOC_ASID IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400)
+#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1401)
+
+#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1404)
+
+#define GXIO_TRIO_OP_ALLOC_SCATTER_QUEUES IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140e)
+#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1412)
+
+#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1414)
+
+#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e)
+#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141f)
+#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1420)
+#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1421)
+
+#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1423)
+#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1424)
+#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1425)
+#define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
+#define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
+
+int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count,
+ unsigned int first, unsigned int flags);
+
+
+int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
+
+int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
+int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
+int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context,
+ unsigned int pio_region, unsigned int mac,
+ uint32_t bus_address_hi, unsigned int flags);
+
+
+int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context,
+ unsigned int map, unsigned long va,
+ uint64_t size, unsigned int asid,
+ unsigned int mac, uint64_t bus_address,
+ unsigned int node,
+ unsigned int order_mode);
+
+int gxio_trio_get_port_property(gxio_trio_context_t *context,
+ struct pcie_trio_ports_property *trio_ports);
+
+int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x,
+ int inter_y, int inter_ipi, int inter_event,
+ unsigned int mac, unsigned int intx);
+
+int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x,
+ int inter_y, int inter_ipi, int inter_event,
+ unsigned int mac, unsigned int mem_map,
+ uint64_t mem_map_base, uint64_t mem_map_limit,
+ unsigned int asid);
+
+
+int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps,
+ uint16_t mrs, unsigned int mac);
+
+int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac);
+
+int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac);
+
+int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base);
+
+int gxio_trio_check_mmio_offset(gxio_trio_context_t *context,
+ unsigned long offset, unsigned long size);
+
+#endif /* !__GXIO_TRIO_LINUX_RPC_H__ */
diff --git a/arch/tile/include/gxio/iorpc_uart.h b/arch/tile/include/gxio/iorpc_uart.h
new file mode 100644
index 00000000000..55429d48ea5
--- /dev/null
+++ b/arch/tile/include/gxio/iorpc_uart.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#ifndef __GXIO_UART_LINUX_RPC_H__
+#define __GXIO_UART_LINUX_RPC_H__
+
+#include <hv/iorpc.h>
+
+#include <hv/drv_uart_intf.h>
+#include <gxio/uart.h>
+#include <gxio/kiorpc.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <asm/pgtable.h>
+
+#define GXIO_UART_OP_CFG_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1900)
+#define GXIO_UART_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
+#define GXIO_UART_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
+
+int gxio_uart_cfg_interrupt(gxio_uart_context_t *context, int inter_x,
+ int inter_y, int inter_ipi, int inter_event);
+
+int gxio_uart_get_mmio_base(gxio_uart_context_t *context, HV_PTE *base);
+
+int gxio_uart_check_mmio_offset(gxio_uart_context_t *context,
+ unsigned long offset, unsigned long size);
+
+#endif /* !__GXIO_UART_LINUX_RPC_H__ */
diff --git a/arch/tile/include/gxio/iorpc_usb_host.h b/arch/tile/include/gxio/iorpc_usb_host.h
new file mode 100644
index 00000000000..79962a97de8
--- /dev/null
+++ b/arch/tile/include/gxio/iorpc_usb_host.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#ifndef __GXIO_USB_HOST_LINUX_RPC_H__
+#define __GXIO_USB_HOST_LINUX_RPC_H__
+
+#include <hv/iorpc.h>
+
+#include <hv/drv_usb_host_intf.h>
+#include <asm/page.h>
+#include <gxio/kiorpc.h>
+#include <gxio/usb_host.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <asm/pgtable.h>
+
+#define GXIO_USB_HOST_OP_CFG_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1800)
+#define GXIO_USB_HOST_OP_REGISTER_CLIENT_MEMORY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1801)
+#define GXIO_USB_HOST_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
+#define GXIO_USB_HOST_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
+
+int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x,
+ int inter_y, int inter_ipi, int inter_event);
+
+int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context,
+ HV_PTE pte, unsigned int flags);
+
+int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context,
+ HV_PTE *base);
+
+int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context,
+ unsigned long offset, unsigned long size);
+
+#endif /* !__GXIO_USB_HOST_LINUX_RPC_H__ */
diff --git a/arch/tile/include/gxio/kiorpc.h b/arch/tile/include/gxio/kiorpc.h
new file mode 100644
index 00000000000..ee5820979ff
--- /dev/null
+++ b/arch/tile/include/gxio/kiorpc.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Support routines for kernel IORPC drivers.
+ */
+
+#ifndef _GXIO_KIORPC_H
+#define _GXIO_KIORPC_H
+
+#include <linux/types.h>
+#include <asm/page.h>
+#include <arch/chip.h>
+
+#if CHIP_HAS_MMIO()
+void __iomem *iorpc_ioremap(int hv_fd, resource_size_t offset,
+ unsigned long size);
+#endif
+
+#endif /* _GXIO_KIORPC_H */
diff --git a/arch/tile/include/gxio/mpipe.h b/arch/tile/include/gxio/mpipe.h
new file mode 100644
index 00000000000..e37cf4f0cff
--- /dev/null
+++ b/arch/tile/include/gxio/mpipe.h
@@ -0,0 +1,1871 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _GXIO_MPIPE_H_
+#define _GXIO_MPIPE_H_
+
+/*
+ *
+ * An API for allocating, configuring, and manipulating mPIPE hardware
+ * resources.
+ */
+
+#include <gxio/common.h>
+#include <gxio/dma_queue.h>
+
+#include <linux/time.h>
+
+#include <arch/mpipe_def.h>
+#include <arch/mpipe_shm.h>
+
+#include <hv/drv_mpipe_intf.h>
+#include <hv/iorpc.h>
+
+/*
+ *
+ * The TILE-Gx mPIPE&tm; shim provides Ethernet connectivity, packet
+ * classification, and packet load balancing services. The
+ * gxio_mpipe_ API, declared in <gxio/mpipe.h>, allows applications to
+ * allocate mPIPE IO channels, configure packet distribution
+ * parameters, and send and receive Ethernet packets. The API is
+ * designed to be a minimal wrapper around the mPIPE hardware, making
+ * system calls only where necessary to preserve inter-process
+ * protection guarantees.
+ *
+ * The APIs described below allow the programmer to allocate and
+ * configure mPIPE resources. As described below, the mPIPE is a
+ * single shared hardware device that provides partitionable resources
+ * that are shared between all applications in the system. The
+ * gxio_mpipe_ API allows userspace code to make resource request
+ * calls to the hypervisor, which in turns keeps track of the
+ * resources in use by all applications, maintains protection
+ * guarantees, and resets resources upon application shutdown.
+ *
+ * We strongly recommend reading the mPIPE section of the IO Device
+ * Guide (UG404) before working with this API. Most functions in the
+ * gxio_mpipe_ API are directly analogous to hardware interfaces and
+ * the documentation assumes that the reader understands those
+ * hardware interfaces.
+ *
+ * @section mpipe__ingress mPIPE Ingress Hardware Resources
+ *
+ * The mPIPE ingress hardware provides extensive hardware offload for
+ * tasks like packet header parsing, load balancing, and memory
+ * management. This section provides a brief introduction to the
+ * hardware components and the gxio_mpipe_ calls used to manage them;
+ * see the IO Device Guide for a much more detailed description of the
+ * mPIPE's capabilities.
+ *
+ * When a packet arrives at one of the mPIPE's Ethernet MACs, it is
+ * assigned a channel number indicating which MAC received it. It
+ * then proceeds through the following hardware pipeline:
+ *
+ * @subsection mpipe__classification Classification
+ *
+ * A set of classification processors run header parsing code on each
+ * incoming packet, extracting information including the destination
+ * MAC address, VLAN, Ethernet type, and five-tuple hash. Some of
+ * this information is then used to choose which buffer stack will be
+ * used to hold the packet, and which bucket will be used by the load
+ * balancer to determine which application will receive the packet.
+ *
+ * The rules by which the buffer stack and bucket are chosen can be
+ * configured via the @ref gxio_mpipe_classifier API. A given app can
+ * specify multiple rules, each one specifying a bucket range, and a
+ * set of buffer stacks, to be used for packets matching the rule.
+ * Each rule can optionally specify a restricted set of channels,
+ * VLANs, and/or dMACs, in which it is interested. By default, a
+ * given rule starts out matching all channels associated with the
+ * mPIPE context's set of open links; all VLANs; and all dMACs.
+ * Subsequent restrictions can then be added.
+ *
+ * @subsection mpipe__load_balancing Load Balancing
+ *
+ * The mPIPE load balancer is responsible for choosing the NotifRing
+ * to which the packet will be delivered. This decision is based on
+ * the bucket number indicated by the classification program. In
+ * general, the bucket number is based on some number of low bits of
+ * the packet's flow hash (applications that aren't interested in flow
+ * hashing use a single bucket). Each load balancer bucket keeps a
+ * record of the NotifRing to which packets directed to that bucket
+ * are currently being delivered. Based on the bucket's load
+ * balancing mode (@ref gxio_mpipe_bucket_mode_t), the load balancer
+ * either forwards the packet to the previously assigned NotifRing or
+ * decides to choose a new NotifRing. If a new NotifRing is required,
+ * the load balancer chooses the least loaded ring in the NotifGroup
+ * associated with the bucket.
+ *
+ * The load balancer is a shared resource. Each application needs to
+ * explicitly allocate NotifRings, NotifGroups, and buckets, using
+ * gxio_mpipe_alloc_notif_rings(), gxio_mpipe_alloc_notif_groups(),
+ * and gxio_mpipe_alloc_buckets(). Then the application needs to
+ * configure them using gxio_mpipe_init_notif_ring() and
+ * gxio_mpipe_init_notif_group_and_buckets().
+ *
+ * @subsection mpipe__buffers Buffer Selection and Packet Delivery
+ *
+ * Once the load balancer has chosen the destination NotifRing, the
+ * mPIPE DMA engine pops at least one buffer off of the 'buffer stack'
+ * chosen by the classification program and DMAs the packet data into
+ * that buffer. Each buffer stack provides a hardware-accelerated
+ * stack of data buffers with the same size. If the packet data is
+ * larger than the buffers provided by the chosen buffer stack, the
+ * mPIPE hardware pops off multiple buffers and chains the packet data
+ * through a multi-buffer linked list. Once the packet data is
+ * delivered to the buffer(s), the mPIPE hardware writes the
+ * ::gxio_mpipe_idesc_t metadata object (calculated by the classifier)
+ * into the NotifRing and increments the number of packets delivered
+ * to that ring.
+ *
+ * Applications can push buffers onto a buffer stack by calling
+ * gxio_mpipe_push_buffer() or by egressing a packet with the
+ * ::gxio_mpipe_edesc_t::hwb bit set, indicating that the egressed
+ * buffers should be returned to the stack.
+ *
+ * Applications can allocate and initialize buffer stacks with the
+ * gxio_mpipe_alloc_buffer_stacks() and gxio_mpipe_init_buffer_stack()
+ * APIs.
+ *
+ * The application must also register the memory pages that will hold
+ * packets. This requires calling gxio_mpipe_register_page() for each
+ * memory page that will hold packets allocated by the application for
+ * a given buffer stack. Since each buffer stack is limited to 16
+ * registered pages, it may be necessary to use huge pages, or even
+ * extremely huge pages, to hold all the buffers.
+ *
+ * @subsection mpipe__iqueue NotifRings
+ *
+ * Each NotifRing is a region of shared memory, allocated by the
+ * application, to which the mPIPE delivers packet descriptors
+ * (::gxio_mpipe_idesc_t). The application can allocate them via
+ * gxio_mpipe_alloc_notif_rings(). The application can then either
+ * explicitly initialize them with gxio_mpipe_init_notif_ring() and
+ * then read from them manually, or can make use of the convenience
+ * wrappers provided by @ref gxio_mpipe_wrappers.
+ *
+ * @section mpipe__egress mPIPE Egress Hardware
+ *
+ * Applications use eDMA rings to queue packets for egress. The
+ * application can allocate them via gxio_mpipe_alloc_edma_rings().
+ * The application can then either explicitly initialize them with
+ * gxio_mpipe_init_edma_ring() and then write to them manually, or
+ * can make use of the convenience wrappers provided by
+ * @ref gxio_mpipe_wrappers.
+ *
+ * @section gxio__shortcomings Plans for Future API Revisions
+ *
+ * The API defined here is only an initial version of the mPIPE API.
+ * Future plans include:
+ *
+ * - Higher level wrapper functions to provide common initialization
+ * patterns. This should help users start writing mPIPE programs
+ * without having to learn the details of the hardware.
+ *
+ * - Support for reset and deallocation of resources, including
+ * cleanup upon application shutdown.
+ *
+ * - Support for calling these APIs in the BME.
+ *
+ * - Support for IO interrupts.
+ *
+ * - Clearer definitions of thread safety guarantees.
+ *
+ * @section gxio__mpipe_examples Examples
+ *
+ * See the following mPIPE example programs for more information about
+ * allocating mPIPE resources and using them in real applications:
+ *
+ * - @ref mpipe/ingress/app.c : Receiving packets.
+ *
+ * - @ref mpipe/forward/app.c : Forwarding packets.
+ *
+ * Note that there are several more examples.
+ */
+
+/* Flags that can be passed to resource allocation functions. */
+enum gxio_mpipe_alloc_flags_e {
+ /* Require an allocation to start at a specified resource index. */
+ GXIO_MPIPE_ALLOC_FIXED = HV_MPIPE_ALLOC_FIXED,
+};
+
+/* Flags that can be passed to memory registration functions. */
+enum gxio_mpipe_mem_flags_e {
+ /* Do not fill L3 when writing, and invalidate lines upon egress. */
+ GXIO_MPIPE_MEM_FLAG_NT_HINT = IORPC_MEM_BUFFER_FLAG_NT_HINT,
+
+ /* L3 cache fills should only populate IO cache ways. */
+ GXIO_MPIPE_MEM_FLAG_IO_PIN = IORPC_MEM_BUFFER_FLAG_IO_PIN,
+};
+
+/* An ingress packet descriptor. When a packet arrives, the mPIPE
+ * hardware generates this structure and writes it into a NotifRing.
+ */
+typedef MPIPE_PDESC_t gxio_mpipe_idesc_t;
+
+/* An egress command descriptor. Applications write this structure
+ * into eDMA rings and the hardware performs the indicated operation
+ * (normally involving egressing some bytes). Note that egressing a
+ * single packet may involve multiple egress command descriptors.
+ */
+typedef MPIPE_EDMA_DESC_t gxio_mpipe_edesc_t;
+
+/*
+ * Max # of mpipe instances. 2 currently.
+ */
+#define GXIO_MPIPE_INSTANCE_MAX HV_MPIPE_INSTANCE_MAX
+
+#define NR_MPIPE_MAX GXIO_MPIPE_INSTANCE_MAX
+
+/* Get the "va" field from an "idesc".
+ *
+ * This is the address at which the ingress hardware copied the first
+ * byte of the packet.
+ *
+ * If the classifier detected a custom header, then this will point to
+ * the custom header, and gxio_mpipe_idesc_get_l2_start() will point
+ * to the actual L2 header.
+ *
+ * Note that this value may be misleading if "idesc->be" is set.
+ *
+ * @param idesc An ingress packet descriptor.
+ */
+static inline unsigned char *gxio_mpipe_idesc_get_va(gxio_mpipe_idesc_t *idesc)
+{
+ return (unsigned char *)(long)idesc->va;
+}
+
+/* Get the "xfer_size" from an "idesc".
+ *
+ * This is the actual number of packet bytes transferred into memory
+ * by the hardware.
+ *
+ * Note that this value may be misleading if "idesc->be" is set.
+ *
+ * @param idesc An ingress packet descriptor.
+ *
+ * ISSUE: Is this the best name for this?
+ * FIXME: Add more docs about chaining, clipping, etc.
+ */
+static inline unsigned int gxio_mpipe_idesc_get_xfer_size(gxio_mpipe_idesc_t
+ *idesc)
+{
+ return idesc->l2_size;
+}
+
+/* Get the "l2_offset" from an "idesc".
+ *
+ * Extremely customized classifiers might not support this function.
+ *
+ * This is the number of bytes between the "va" and the L2 header.
+ *
+ * The L2 header consists of a destination mac address, a source mac
+ * address, and an initial ethertype. Various initial ethertypes
+ * allow encoding extra information in the L2 header, often including
+ * a vlan, and/or a new ethertype.
+ *
+ * Note that the "l2_offset" will be non-zero if (and only if) the
+ * classifier processed a custom header for the packet.
+ *
+ * @param idesc An ingress packet descriptor.
+ */
+static inline uint8_t gxio_mpipe_idesc_get_l2_offset(gxio_mpipe_idesc_t *idesc)
+{
+ return (idesc->custom1 >> 32) & 0xFF;
+}
+
+/* Get the "l2_start" from an "idesc".
+ *
+ * This is simply gxio_mpipe_idesc_get_va() plus
+ * gxio_mpipe_idesc_get_l2_offset().
+ *
+ * @param idesc An ingress packet descriptor.
+ */
+static inline unsigned char *gxio_mpipe_idesc_get_l2_start(gxio_mpipe_idesc_t
+ *idesc)
+{
+ unsigned char *va = gxio_mpipe_idesc_get_va(idesc);
+ return va + gxio_mpipe_idesc_get_l2_offset(idesc);
+}
+
+/* Get the "l2_length" from an "idesc".
+ *
+ * This is simply gxio_mpipe_idesc_get_xfer_size() minus
+ * gxio_mpipe_idesc_get_l2_offset().
+ *
+ * @param idesc An ingress packet descriptor.
+ */
+static inline unsigned int gxio_mpipe_idesc_get_l2_length(gxio_mpipe_idesc_t
+ *idesc)
+{
+ unsigned int xfer_size = idesc->l2_size;
+ return xfer_size - gxio_mpipe_idesc_get_l2_offset(idesc);
+}
+
+/* A context object used to manage mPIPE hardware resources. */
+typedef struct {
+
+ /* File descriptor for calling up to Linux (and thus the HV). */
+ int fd;
+
+ /* Corresponding mpipe instance #. */
+ int instance;
+
+ /* The VA at which configuration registers are mapped. */
+ char *mmio_cfg_base;
+
+ /* The VA at which IDMA, EDMA, and buffer manager are mapped. */
+ char *mmio_fast_base;
+
+ /* The "initialized" buffer stacks. */
+ gxio_mpipe_rules_stacks_t __stacks;
+
+} gxio_mpipe_context_t;
+
+/* This is only used internally, but it's most easily made visible here. */
+typedef gxio_mpipe_context_t gxio_mpipe_info_context_t;
+
+/* Initialize an mPIPE context.
+ *
+ * This function allocates an mPIPE "service domain" and maps the MMIO
+ * registers into the caller's VA space.
+ *
+ * @param context Context object to be initialized.
+ * @param mpipe_instance Instance number of mPIPE shim to be controlled via
+ * context.
+ */
+extern int gxio_mpipe_init(gxio_mpipe_context_t *context,
+ unsigned int mpipe_instance);
+
+/* Destroy an mPIPE context.
+ *
+ * This function frees the mPIPE "service domain" and unmaps the MMIO
+ * registers from the caller's VA space.
+ *
+ * If a user process exits without calling this routine, the kernel
+ * will destroy the mPIPE context as part of process teardown.
+ *
+ * @param context Context object to be destroyed.
+ */
+extern int gxio_mpipe_destroy(gxio_mpipe_context_t *context);
+
+/*****************************************************************
+ * Buffer Stacks *
+ ******************************************************************/
+
+/* Allocate a set of buffer stacks.
+ *
+ * The return value is NOT interesting if count is zero.
+ *
+ * @param context An initialized mPIPE context.
+ * @param count Number of stacks required.
+ * @param first Index of first stack if ::GXIO_MPIPE_ALLOC_FIXED flag is set,
+ * otherwise ignored.
+ * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e.
+ * @return Index of first allocated buffer stack, or
+ * ::GXIO_MPIPE_ERR_NO_BUFFER_STACK if allocation failed.
+ */
+extern int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context,
+ unsigned int count,
+ unsigned int first,
+ unsigned int flags);
+
+/* Enum codes for buffer sizes supported by mPIPE. */
+typedef enum {
+ /* 128 byte packet data buffer. */
+ GXIO_MPIPE_BUFFER_SIZE_128 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_128,
+ /* 256 byte packet data buffer. */
+ GXIO_MPIPE_BUFFER_SIZE_256 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_256,
+ /* 512 byte packet data buffer. */
+ GXIO_MPIPE_BUFFER_SIZE_512 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_512,
+ /* 1024 byte packet data buffer. */
+ GXIO_MPIPE_BUFFER_SIZE_1024 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1024,
+ /* 1664 byte packet data buffer. */
+ GXIO_MPIPE_BUFFER_SIZE_1664 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1664,
+ /* 4096 byte packet data buffer. */
+ GXIO_MPIPE_BUFFER_SIZE_4096 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_4096,
+ /* 10368 byte packet data buffer. */
+ GXIO_MPIPE_BUFFER_SIZE_10368 =
+ MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_10368,
+ /* 16384 byte packet data buffer. */
+ GXIO_MPIPE_BUFFER_SIZE_16384 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_16384
+} gxio_mpipe_buffer_size_enum_t;
+
+/* Convert a buffer size in bytes into a buffer size enum. */
+extern gxio_mpipe_buffer_size_enum_t
+gxio_mpipe_buffer_size_to_buffer_size_enum(size_t size);
+
+/* Convert a buffer size enum into a buffer size in bytes. */
+extern size_t
+gxio_mpipe_buffer_size_enum_to_buffer_size(gxio_mpipe_buffer_size_enum_t
+ buffer_size_enum);
+
+/* Calculate the number of bytes required to store a given number of
+ * buffers in the memory registered with a buffer stack via
+ * gxio_mpipe_init_buffer_stack().
+ */
+extern size_t gxio_mpipe_calc_buffer_stack_bytes(unsigned long buffers);
+
+/* Initialize a buffer stack. This function binds a region of memory
+ * to be used by the hardware for storing buffer addresses pushed via
+ * gxio_mpipe_push_buffer() or as the result of sending a buffer out
+ * the egress with the 'push to stack when done' bit set. Once this
+ * function returns, the memory region's contents may be arbitrarily
+ * modified by the hardware at any time and software should not access
+ * the memory region again.
+ *
+ * @param context An initialized mPIPE context.
+ * @param stack The buffer stack index.
+ * @param buffer_size_enum The size of each buffer in the buffer stack,
+ * as an enum.
+ * @param mem The address of the buffer stack. This memory must be
+ * physically contiguous and aligned to a 64kB boundary.
+ * @param mem_size The size of the buffer stack, in bytes.
+ * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags.
+ * @return Zero on success, ::GXIO_MPIPE_ERR_INVAL_BUFFER_SIZE if
+ * buffer_size_enum is invalid, ::GXIO_MPIPE_ERR_BAD_BUFFER_STACK if
+ * stack has not been allocated.
+ */
+extern int gxio_mpipe_init_buffer_stack(gxio_mpipe_context_t *context,
+ unsigned int stack,
+ gxio_mpipe_buffer_size_enum_t
+ buffer_size_enum, void *mem,
+ size_t mem_size,
+ unsigned int mem_flags);
+
+/* Push a buffer onto a previously initialized buffer stack.
+ *
+ * The size of the buffer being pushed must match the size that was
+ * registered with gxio_mpipe_init_buffer_stack(). All packet buffer
+ * addresses are 128-byte aligned; the low 7 bits of the specified
+ * buffer address will be ignored.
+ *
+ * @param context An initialized mPIPE context.
+ * @param stack The buffer stack index.
+ * @param buffer The buffer (the low seven bits are ignored).
+ */
+static inline void gxio_mpipe_push_buffer(gxio_mpipe_context_t *context,
+ unsigned int stack, void *buffer)
+{
+ MPIPE_BSM_REGION_ADDR_t offset = { {0} };
+ MPIPE_BSM_REGION_VAL_t val = { {0} };
+
+ /*
+ * The mmio_fast_base region starts at the IDMA region, so subtract
+ * off that initial offset.
+ */
+ offset.region =
+ MPIPE_MMIO_ADDR__REGION_VAL_BSM -
+ MPIPE_MMIO_ADDR__REGION_VAL_IDMA;
+ offset.stack = stack;
+
+#if __SIZEOF_POINTER__ == 4
+ val.va = ((ulong) buffer) >> MPIPE_BSM_REGION_VAL__VA_SHIFT;
+#else
+ val.va = ((long)buffer) >> MPIPE_BSM_REGION_VAL__VA_SHIFT;
+#endif
+
+ __gxio_mmio_write(context->mmio_fast_base + offset.word, val.word);
+}
+
+/* Pop a buffer off of a previously initialized buffer stack.
+ *
+ * @param context An initialized mPIPE context.
+ * @param stack The buffer stack index.
+ * @return The buffer, or NULL if the stack is empty.
+ */
+static inline void *gxio_mpipe_pop_buffer(gxio_mpipe_context_t *context,
+ unsigned int stack)
+{
+ MPIPE_BSM_REGION_ADDR_t offset = { {0} };
+
+ /*
+ * The mmio_fast_base region starts at the IDMA region, so subtract
+ * off that initial offset.
+ */
+ offset.region =
+ MPIPE_MMIO_ADDR__REGION_VAL_BSM -
+ MPIPE_MMIO_ADDR__REGION_VAL_IDMA;
+ offset.stack = stack;
+
+ while (1) {
+ /*
+ * Case 1: val.c == ..._UNCHAINED, va is non-zero.
+ * Case 2: val.c == ..._INVALID, va is zero.
+ * Case 3: val.c == ..._NOT_RDY, va is zero.
+ */
+ MPIPE_BSM_REGION_VAL_t val;
+ val.word =
+ __gxio_mmio_read(context->mmio_fast_base +
+ offset.word);
+
+ /*
+ * Handle case 1 and 2 by returning the buffer (or NULL).
+ * Handle case 3 by waiting for the prefetch buffer to refill.
+ */
+ if (val.c != MPIPE_EDMA_DESC_WORD1__C_VAL_NOT_RDY)
+ return (void *)((unsigned long)val.
+ va << MPIPE_BSM_REGION_VAL__VA_SHIFT);
+ }
+}
+
+/*****************************************************************
+ * NotifRings *
+ ******************************************************************/
+
+/* Allocate a set of NotifRings.
+ *
+ * The return value is NOT interesting if count is zero.
+ *
+ * Note that NotifRings are allocated in chunks, so allocating one at
+ * a time is much less efficient than allocating several at once.
+ *
+ * @param context An initialized mPIPE context.
+ * @param count Number of NotifRings required.
+ * @param first Index of first NotifRing if ::GXIO_MPIPE_ALLOC_FIXED flag
+ * is set, otherwise ignored.
+ * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e.
+ * @return Index of first allocated buffer NotifRing, or
+ * ::GXIO_MPIPE_ERR_NO_NOTIF_RING if allocation failed.
+ */
+extern int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
+/* Initialize a NotifRing, using the given memory and size.
+ *
+ * @param context An initialized mPIPE context.
+ * @param ring The NotifRing index.
+ * @param mem A physically contiguous region of memory to be filled
+ * with a ring of ::gxio_mpipe_idesc_t structures.
+ * @param mem_size Number of bytes in the ring. Must be 128, 512,
+ * 2048, or 65536 * sizeof(gxio_mpipe_idesc_t).
+ * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags.
+ *
+ * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_NOTIF_RING or
+ * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure.
+ */
+extern int gxio_mpipe_init_notif_ring(gxio_mpipe_context_t *context,
+ unsigned int ring,
+ void *mem, size_t mem_size,
+ unsigned int mem_flags);
+
+/* Configure an interrupt to be sent to a tile on incoming NotifRing
+ * traffic. Once an interrupt is sent for a particular ring, no more
+ * will be sent until gxio_mica_enable_notif_ring_interrupt() is called.
+ *
+ * @param context An initialized mPIPE context.
+ * @param x X coordinate of interrupt target tile.
+ * @param y Y coordinate of interrupt target tile.
+ * @param i Index of the IPI register which will receive the interrupt.
+ * @param e Specific event which will be set in the target IPI register when
+ * the interrupt occurs.
+ * @param ring The NotifRing index.
+ * @return Zero on success, GXIO_ERR_INVAL if params are out of range.
+ */
+extern int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t
+ *context, int x, int y,
+ int i, int e,
+ unsigned int ring);
+
+/* Enable an interrupt on incoming NotifRing traffic.
+ *
+ * @param context An initialized mPIPE context.
+ * @param ring The NotifRing index.
+ * @return Zero on success, GXIO_ERR_INVAL if params are out of range.
+ */
+extern int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t
+ *context, unsigned int ring);
+
+/* Map all of a client's memory via the given IOTLB.
+ * @param context An initialized mPIPE context.
+ * @param iotlb IOTLB index.
+ * @param pte Page table entry.
+ * @param flags Flags.
+ * @return Zero on success, or a negative error code.
+ */
+extern int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context,
+ unsigned int iotlb, HV_PTE pte,
+ unsigned int flags);
+
+/*****************************************************************
+ * Notif Groups *
+ ******************************************************************/
+
+/* Allocate a set of NotifGroups.
+ *
+ * The return value is NOT interesting if count is zero.
+ *
+ * @param context An initialized mPIPE context.
+ * @param count Number of NotifGroups required.
+ * @param first Index of first NotifGroup if ::GXIO_MPIPE_ALLOC_FIXED flag
+ * is set, otherwise ignored.
+ * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e.
+ * @return Index of first allocated buffer NotifGroup, or
+ * ::GXIO_MPIPE_ERR_NO_NOTIF_GROUP if allocation failed.
+ */
+extern int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context,
+ unsigned int count,
+ unsigned int first,
+ unsigned int flags);
+
+/* Add a NotifRing to a NotifGroup. This only sets a bit in the
+ * application's 'group' object; the hardware NotifGroup can be
+ * initialized by passing 'group' to gxio_mpipe_init_notif_group() or
+ * gxio_mpipe_init_notif_group_and_buckets().
+ */
+static inline void
+gxio_mpipe_notif_group_add_ring(gxio_mpipe_notif_group_bits_t *bits, int ring)
+{
+ bits->ring_mask[ring / 64] |= (1ull << (ring % 64));
+}
+
+/* Set a particular NotifGroup bitmask. Since the load balancer
+ * makes decisions based on both bucket and NotifGroup state, most
+ * applications should use gxio_mpipe_init_notif_group_and_buckets()
+ * rather than using this function to configure just a NotifGroup.
+ */
+extern int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context,
+ unsigned int group,
+ gxio_mpipe_notif_group_bits_t bits);
+
+/*****************************************************************
+ * Load Balancer *
+ ******************************************************************/
+
+/* Allocate a set of load balancer buckets.
+ *
+ * The return value is NOT interesting if count is zero.
+ *
+ * Note that buckets are allocated in chunks, so allocating one at
+ * a time is much less efficient than allocating several at once.
+ *
+ * Note that the buckets are actually divided into two sub-ranges, of
+ * different sizes, and different chunk sizes, and the range you get
+ * by default is determined by the size of the request. Allocations
+ * cannot span the two sub-ranges.
+ *
+ * @param context An initialized mPIPE context.
+ * @param count Number of buckets required.
+ * @param first Index of first bucket if ::GXIO_MPIPE_ALLOC_FIXED flag is set,
+ * otherwise ignored.
+ * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e.
+ * @return Index of first allocated buffer bucket, or
+ * ::GXIO_MPIPE_ERR_NO_BUCKET if allocation failed.
+ */
+extern int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
+/* The legal modes for gxio_mpipe_bucket_info_t and
+ * gxio_mpipe_init_notif_group_and_buckets().
+ *
+ * All modes except ::GXIO_MPIPE_BUCKET_ROUND_ROBIN expect that the user
+ * will allocate a power-of-two number of buckets and initialize them
+ * to the same mode. The classifier program then uses the appropriate
+ * number of low bits from the incoming packet's flow hash to choose a
+ * load balancer bucket. Based on that bucket's load balancing mode,
+ * reference count, and currently active NotifRing, the load balancer
+ * chooses the NotifRing to which the packet will be delivered.
+ */
+typedef enum {
+ /* All packets for a bucket go to the same NotifRing unless the
+ * NotifRing gets full, in which case packets will be dropped. If
+ * the bucket reference count ever reaches zero, a new NotifRing may
+ * be chosen.
+ */
+ GXIO_MPIPE_BUCKET_DYNAMIC_FLOW_AFFINITY =
+ MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_DFA,
+
+ /* All packets for a bucket always go to the same NotifRing.
+ */
+ GXIO_MPIPE_BUCKET_STATIC_FLOW_AFFINITY =
+ MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_FIXED,
+
+ /* All packets for a bucket go to the least full NotifRing in the
+ * group, providing load balancing round robin behavior.
+ */
+ GXIO_MPIPE_BUCKET_ROUND_ROBIN =
+ MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_ALWAYS_PICK,
+
+ /* All packets for a bucket go to the same NotifRing unless the
+ * NotifRing gets full, at which point the bucket starts using the
+ * least full NotifRing in the group. If all NotifRings in the
+ * group are full, packets will be dropped.
+ */
+ GXIO_MPIPE_BUCKET_STICKY_FLOW_LOCALITY =
+ MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY,
+
+ /* All packets for a bucket go to the same NotifRing unless the
+ * NotifRing gets full, or a random timer fires, at which point the
+ * bucket starts using the least full NotifRing in the group. If
+ * all NotifRings in the group are full, packets will be dropped.
+ * WARNING: This mode is BROKEN on chips with fewer than 64 tiles.
+ */
+ GXIO_MPIPE_BUCKET_PREFER_FLOW_LOCALITY =
+ MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY_RAND,
+
+} gxio_mpipe_bucket_mode_t;
+
+/* Copy a set of bucket initialization values into the mPIPE
+ * hardware. Since the load balancer makes decisions based on both
+ * bucket and NotifGroup state, most applications should use
+ * gxio_mpipe_init_notif_group_and_buckets() rather than using this
+ * function to configure a single bucket.
+ *
+ * @param context An initialized mPIPE context.
+ * @param bucket Bucket index to be initialized.
+ * @param bucket_info Initial reference count, NotifRing index, and mode.
+ * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_BUCKET on failure.
+ */
+extern int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context,
+ unsigned int bucket,
+ gxio_mpipe_bucket_info_t bucket_info);
+
+/* Initializes a group and range of buckets and range of rings such
+ * that the load balancer runs a particular load balancing function.
+ *
+ * First, the group is initialized with the given rings.
+ *
+ * Second, each bucket is initialized with the mode and group, and a
+ * ring chosen round-robin from the given rings.
+ *
+ * Normally, the classifier picks a bucket, and then the load balancer
+ * picks a ring, based on the bucket's mode, group, and current ring,
+ * possibly updating the bucket's ring.
+ *
+ * @param context An initialized mPIPE context.
+ * @param group The group.
+ * @param ring The first ring.
+ * @param num_rings The number of rings.
+ * @param bucket The first bucket.
+ * @param num_buckets The number of buckets.
+ * @param mode The load balancing mode.
+ *
+ * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_BUCKET,
+ * ::GXIO_MPIPE_ERR_BAD_NOTIF_GROUP, or
+ * ::GXIO_MPIPE_ERR_BAD_NOTIF_RING on failure.
+ */
+extern int gxio_mpipe_init_notif_group_and_buckets(gxio_mpipe_context_t
+ *context,
+ unsigned int group,
+ unsigned int ring,
+ unsigned int num_rings,
+ unsigned int bucket,
+ unsigned int num_buckets,
+ gxio_mpipe_bucket_mode_t
+ mode);
+
+/* Return credits to a NotifRing and/or bucket.
+ *
+ * @param context An initialized mPIPE context.
+ * @param ring The NotifRing index, or -1.
+ * @param bucket The bucket, or -1.
+ * @param count The number of credits to return.
+ */
+static inline void gxio_mpipe_credit(gxio_mpipe_context_t *context,
+ int ring, int bucket, unsigned int count)
+{
+ /* NOTE: Fancy struct initialization would break "C89" header test. */
+
+ MPIPE_IDMA_RELEASE_REGION_ADDR_t offset = { {0} };
+ MPIPE_IDMA_RELEASE_REGION_VAL_t val = { {0} };
+
+ /*
+ * The mmio_fast_base region starts at the IDMA region, so subtract
+ * off that initial offset.
+ */
+ offset.region =
+ MPIPE_MMIO_ADDR__REGION_VAL_IDMA -
+ MPIPE_MMIO_ADDR__REGION_VAL_IDMA;
+ offset.ring = ring;
+ offset.bucket = bucket;
+ offset.ring_enable = (ring >= 0);
+ offset.bucket_enable = (bucket >= 0);
+ val.count = count;
+
+ __gxio_mmio_write(context->mmio_fast_base + offset.word, val.word);
+}
+
+/*****************************************************************
+ * Egress Rings *
+ ******************************************************************/
+
+/* Allocate a set of eDMA rings.
+ *
+ * The return value is NOT interesting if count is zero.
+ *
+ * @param context An initialized mPIPE context.
+ * @param count Number of eDMA rings required.
+ * @param first Index of first eDMA ring if ::GXIO_MPIPE_ALLOC_FIXED flag
+ * is set, otherwise ignored.
+ * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e.
+ * @return Index of first allocated buffer eDMA ring, or
+ * ::GXIO_MPIPE_ERR_NO_EDMA_RING if allocation failed.
+ */
+extern int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
+/* Initialize an eDMA ring, using the given memory and size.
+ *
+ * @param context An initialized mPIPE context.
+ * @param ering The eDMA ring index.
+ * @param channel The channel to use. This must be one of the channels
+ * associated with the context's set of open links.
+ * @param mem A physically contiguous region of memory to be filled
+ * with a ring of ::gxio_mpipe_edesc_t structures.
+ * @param mem_size Number of bytes in the ring. Must be 512, 2048,
+ * 8192 or 65536, times 16 (i.e. sizeof(gxio_mpipe_edesc_t)).
+ * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags.
+ *
+ * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_EDMA_RING or
+ * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure.
+ */
+extern int gxio_mpipe_init_edma_ring(gxio_mpipe_context_t *context,
+ unsigned int ering, unsigned int channel,
+ void *mem, size_t mem_size,
+ unsigned int mem_flags);
+
+/* Set the "max_blks", "min_snf_blks", and "db" fields of
+ * ::MPIPE_EDMA_RG_INIT_DAT_THRESH_t for a given edma ring.
+ *
+ * The global pool of dynamic blocks will be automatically adjusted.
+ *
+ * This function should not be called after any egress has been done
+ * on the edma ring.
+ *
+ * Most applications should just use gxio_mpipe_equeue_set_snf_size().
+ *
+ * @param context An initialized mPIPE context.
+ * @param ering The eDMA ring index.
+ * @param max_blks The number of blocks to dedicate to the ring
+ * (normally min_snf_blks + 1). Must be greater than min_snf_blocks.
+ * @param min_snf_blks The number of blocks which must be stored
+ * prior to starting to send the packet (normally 12).
+ * @param db Whether to allow use of dynamic blocks by the ring
+ * (normally 1).
+ *
+ * @return 0 on success, negative on error.
+ */
+extern int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context,
+ unsigned int ering,
+ unsigned int max_blks,
+ unsigned int min_snf_blks,
+ unsigned int db);
+
+/*****************************************************************
+ * Classifier Program *
+ ******************************************************************/
+
+/*
+ *
+ * Functions for loading or configuring the mPIPE classifier program.
+ *
+ * The mPIPE classification processors all run a special "classifier"
+ * program which, for each incoming packet, parses the packet headers,
+ * encodes some packet metadata in the "idesc", and either drops the
+ * packet, or picks a notif ring to handle the packet, and a buffer
+ * stack to contain the packet, usually based on the channel, VLAN,
+ * dMAC, flow hash, and packet size, under the guidance of the "rules"
+ * API described below.
+ *
+ * @section gxio_mpipe_classifier_default Default Classifier
+ *
+ * The MDE provides a simple "default" classifier program. It is
+ * shipped as source in "$TILERA_ROOT/src/sys/mpipe/classifier.c",
+ * which serves as its official documentation. It is shipped as a
+ * binary program in "$TILERA_ROOT/tile/boot/classifier", which is
+ * automatically included in bootroms created by "tile-monitor", and
+ * is automatically loaded by the hypervisor at boot time.
+ *
+ * The L2 analysis handles LLC packets, SNAP packets, and "VLAN
+ * wrappers" (keeping the outer VLAN).
+ *
+ * The L3 analysis handles IPv4 and IPv6, dropping packets with bad
+ * IPv4 header checksums, requesting computation of a TCP/UDP checksum
+ * if appropriate, and hashing the dest and src IP addresses, plus the
+ * ports for TCP/UDP packets, into the flow hash. No special analysis
+ * is done for "fragmented" packets or "tunneling" protocols. Thus,
+ * the first fragment of a fragmented TCP/UDP packet is hashed using
+ * src/dest IP address and ports and all subsequent fragments are only
+ * hashed according to src/dest IP address.
+ *
+ * The L3 analysis handles other packets too, hashing the dMAC
+ * smac into a flow hash.
+ *
+ * The channel, VLAN, and dMAC used to pick a "rule" (see the
+ * "rules" APIs below), which in turn is used to pick a buffer stack
+ * (based on the packet size) and a bucket (based on the flow hash).
+ *
+ * To receive traffic matching a particular (channel/VLAN/dMAC
+ * pattern, an application should allocate its own buffer stacks and
+ * load balancer buckets, and map traffic to those stacks and buckets,
+ * as decribed by the "rules" API below.
+ *
+ * Various packet metadata is encoded in the idesc. The flow hash is
+ * four bytes at 0x0C. The VLAN is two bytes at 0x10. The ethtype is
+ * two bytes at 0x12. The l3 start is one byte at 0x14. The l4 start
+ * is one byte at 0x15 for IPv4 and IPv6 packets, and otherwise zero.
+ * The protocol is one byte at 0x16 for IPv4 and IPv6 packets, and
+ * otherwise zero.
+ *
+ * @section gxio_mpipe_classifier_custom Custom Classifiers.
+ *
+ * A custom classifier may be created using "tile-mpipe-cc" with a
+ * customized version of the default classifier sources.
+ *
+ * The custom classifier may be included in bootroms using the
+ * "--classifier" option to "tile-monitor", or loaded dynamically
+ * using gxio_mpipe_classifier_load_from_file().
+ *
+ * Be aware that "extreme" customizations may break the assumptions of
+ * the "rules" APIs described below, but simple customizations, such
+ * as adding new packet metadata, should be fine.
+ */
+
+/* A set of classifier rules, plus a context. */
+typedef struct {
+
+ /* The context. */
+ gxio_mpipe_context_t *context;
+
+ /* The actual rules. */
+ gxio_mpipe_rules_list_t list;
+
+} gxio_mpipe_rules_t;
+
+/* Initialize a classifier program rules list.
+ *
+ * This function can be called on a previously initialized rules list
+ * to discard any previously added rules.
+ *
+ * @param rules Rules list to initialize.
+ * @param context An initialized mPIPE context.
+ */
+extern void gxio_mpipe_rules_init(gxio_mpipe_rules_t *rules,
+ gxio_mpipe_context_t *context);
+
+/* Begin a new rule on the indicated rules list.
+ *
+ * Note that an empty rule matches all packets, but an empty rule list
+ * matches no packets.
+ *
+ * @param rules Rules list to which new rule is appended.
+ * @param bucket First load balancer bucket to which packets will be
+ * delivered.
+ * @param num_buckets Number of buckets (must be a power of two) across
+ * which packets will be distributed based on the "flow hash".
+ * @param stacks Either NULL, to assign each packet to the smallest
+ * initialized buffer stack which does not induce chaining (and to
+ * drop packets which exceed the largest initialized buffer stack
+ * buffer size), or an array, with each entry indicating which buffer
+ * stack should be used for packets up to that size (with 255
+ * indicating that those packets should be dropped).
+ * @return 0 on success, or a negative error code on failure.
+ */
+extern int gxio_mpipe_rules_begin(gxio_mpipe_rules_t *rules,
+ unsigned int bucket,
+ unsigned int num_buckets,
+ gxio_mpipe_rules_stacks_t *stacks);
+
+/* Set the headroom of the current rule.
+ *
+ * @param rules Rules list whose current rule will be modified.
+ * @param headroom The headroom.
+ * @return 0 on success, or a negative error code on failure.
+ */
+extern int gxio_mpipe_rules_set_headroom(gxio_mpipe_rules_t *rules,
+ uint8_t headroom);
+
+/* Indicate that packets from a particular channel can be delivered
+ * to the buckets and buffer stacks associated with the current rule.
+ *
+ * Channels added must be associated with links opened by the mPIPE context
+ * used in gxio_mpipe_rules_init(). A rule with no channels is equivalent
+ * to a rule naming all such associated channels.
+ *
+ * @param rules Rules list whose current rule will be modified.
+ * @param channel The channel to add.
+ * @return 0 on success, or a negative error code on failure.
+ */
+extern int gxio_mpipe_rules_add_channel(gxio_mpipe_rules_t *rules,
+ unsigned int channel);
+
+/* Commit rules.
+ *
+ * The rules are sent to the hypervisor, where they are combined with
+ * the rules from other apps, and used to program the hardware classifier.
+ *
+ * Note that if this function returns an error, then the rules will NOT
+ * have been committed, even if the error is due to interactions with
+ * rules from another app.
+ *
+ * @param rules Rules list to commit.
+ * @return 0 on success, or a negative error code on failure.
+ */
+extern int gxio_mpipe_rules_commit(gxio_mpipe_rules_t *rules);
+
+/*****************************************************************
+ * Ingress Queue Wrapper *
+ ******************************************************************/
+
+/*
+ *
+ * Convenience functions for receiving packets from a NotifRing and
+ * sending packets via an eDMA ring.
+ *
+ * The mpipe ingress and egress hardware uses shared memory packet
+ * descriptors to describe packets that have arrived on ingress or
+ * are destined for egress. These descriptors are stored in shared
+ * memory ring buffers and written or read by hardware as necessary.
+ * The gxio library provides wrapper functions that manage the head and
+ * tail pointers for these rings, allowing the user to easily read or
+ * write packet descriptors.
+ *
+ * The initialization interface for ingress and egress rings is quite
+ * similar. For example, to create an ingress queue, the user passes
+ * a ::gxio_mpipe_iqueue_t state object, a ring number from
+ * gxio_mpipe_alloc_notif_rings(), and the address of memory to hold a
+ * ring buffer to the gxio_mpipe_iqueue_init() function. The function
+ * returns success when the state object has been initialized and the
+ * hardware configured to deliver packets to the specified ring
+ * buffer. Similarly, gxio_mpipe_equeue_init() takes a
+ * ::gxio_mpipe_equeue_t state object, a ring number from
+ * gxio_mpipe_alloc_edma_rings(), and a shared memory buffer.
+ *
+ * @section gxio_mpipe_iqueue Working with Ingress Queues
+ *
+ * Once initialized, the gxio_mpipe_iqueue_t API provides two flows
+ * for getting the ::gxio_mpipe_idesc_t packet descriptor associated
+ * with incoming packets. The simplest is to call
+ * gxio_mpipe_iqueue_get() or gxio_mpipe_iqueue_try_get(). These
+ * functions copy the oldest packet descriptor out of the NotifRing and
+ * into a descriptor provided by the caller. They also immediately
+ * inform the hardware that a descriptor has been processed.
+ *
+ * For applications with stringent performance requirements, higher
+ * efficiency can be achieved by avoiding the packet descriptor copy
+ * and processing multiple descriptors at once. The
+ * gxio_mpipe_iqueue_peek() and gxio_mpipe_iqueue_try_peek() functions
+ * allow such optimizations. These functions provide a pointer to the
+ * next valid ingress descriptor in the NotifRing's shared memory ring
+ * buffer, and a count of how many contiguous descriptors are ready to
+ * be processed. The application can then process any number of those
+ * descriptors in place, calling gxio_mpipe_iqueue_consume() to inform
+ * the hardware after each one has been processed.
+ *
+ * @section gxio_mpipe_equeue Working with Egress Queues
+ *
+ * Similarly, the egress queue API provides a high-performance
+ * interface plus a simple wrapper for use in posting
+ * ::gxio_mpipe_edesc_t egress packet descriptors. The simple
+ * version, gxio_mpipe_equeue_put(), allows the programmer to wait for
+ * an eDMA ring slot to become available and write a single descriptor
+ * into the ring.
+ *
+ * Alternatively, you can reserve slots in the eDMA ring using
+ * gxio_mpipe_equeue_reserve() or gxio_mpipe_equeue_try_reserve(), and
+ * then fill in each slot using gxio_mpipe_equeue_put_at(). This
+ * capability can be used to amortize the cost of reserving slots
+ * across several packets. It also allows gather operations to be
+ * performed on a shared equeue, by ensuring that the edescs for all
+ * the fragments are all contiguous in the eDMA ring.
+ *
+ * The gxio_mpipe_equeue_reserve() and gxio_mpipe_equeue_try_reserve()
+ * functions return a 63-bit "completion slot", which is actually a
+ * sequence number, the low bits of which indicate the ring buffer
+ * index and the high bits the number of times the application has
+ * gone around the egress ring buffer. The extra bits allow an
+ * application to check for egress completion by calling
+ * gxio_mpipe_equeue_is_complete() to see whether a particular 'slot'
+ * number has finished. Given the maximum packet rates of the Gx
+ * processor, the 63-bit slot number will never wrap.
+ *
+ * In practice, most applications use the ::gxio_mpipe_edesc_t::hwb
+ * bit to indicate that the buffers containing egress packet data
+ * should be pushed onto a buffer stack when egress is complete. Such
+ * applications generally do not need to know when an egress operation
+ * completes (since there is no need to free a buffer post-egress),
+ * and thus can use the optimized gxio_mpipe_equeue_reserve_fast() or
+ * gxio_mpipe_equeue_try_reserve_fast() functions, which return a 24
+ * bit "slot", instead of a 63-bit "completion slot".
+ *
+ * Once a slot has been "reserved", it MUST be filled. If the
+ * application reserves a slot and then decides that it does not
+ * actually need it, it can set the ::gxio_mpipe_edesc_t::ns (no send)
+ * bit on the descriptor passed to gxio_mpipe_equeue_put_at() to
+ * indicate that no data should be sent. This technique can also be
+ * used to drop an incoming packet, instead of forwarding it, since
+ * any buffer will still be pushed onto the buffer stack when the
+ * egress descriptor is processed.
+ */
+
+/* A convenient interface to a NotifRing, for use by a single thread.
+ */
+typedef struct {
+
+ /* The context. */
+ gxio_mpipe_context_t *context;
+
+ /* The actual NotifRing. */
+ gxio_mpipe_idesc_t *idescs;
+
+ /* The number of entries. */
+ unsigned long num_entries;
+
+ /* The number of entries minus one. */
+ unsigned long mask_num_entries;
+
+ /* The log2() of the number of entries. */
+ unsigned long log2_num_entries;
+
+ /* The next entry. */
+ unsigned int head;
+
+ /* The NotifRing id. */
+ unsigned int ring;
+
+#ifdef __BIG_ENDIAN__
+ /* The number of byteswapped entries. */
+ unsigned int swapped;
+#endif
+
+} gxio_mpipe_iqueue_t;
+
+/* Initialize an "iqueue".
+ *
+ * Takes the iqueue plus the same args as gxio_mpipe_init_notif_ring().
+ */
+extern int gxio_mpipe_iqueue_init(gxio_mpipe_iqueue_t *iqueue,
+ gxio_mpipe_context_t *context,
+ unsigned int ring,
+ void *mem, size_t mem_size,
+ unsigned int mem_flags);
+
+/* Advance over some old entries in an iqueue.
+ *
+ * Please see the documentation for gxio_mpipe_iqueue_consume().
+ *
+ * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init().
+ * @param count The number of entries to advance over.
+ */
+static inline void gxio_mpipe_iqueue_advance(gxio_mpipe_iqueue_t *iqueue,
+ int count)
+{
+ /* Advance with proper wrap. */
+ int head = iqueue->head + count;
+ iqueue->head =
+ (head & iqueue->mask_num_entries) +
+ (head >> iqueue->log2_num_entries);
+
+#ifdef __BIG_ENDIAN__
+ /* HACK: Track swapped entries. */
+ iqueue->swapped -= count;
+#endif
+}
+
+/* Release the ring and bucket for an old entry in an iqueue.
+ *
+ * Releasing the ring allows more packets to be delivered to the ring.
+ *
+ * Releasing the bucket allows flows using the bucket to be moved to a
+ * new ring when using GXIO_MPIPE_BUCKET_DYNAMIC_FLOW_AFFINITY.
+ *
+ * This function is shorthand for "gxio_mpipe_credit(iqueue->context,
+ * iqueue->ring, idesc->bucket_id, 1)", and it may be more convenient
+ * to make that underlying call, using those values, instead of
+ * tracking the entire "idesc".
+ *
+ * If packet processing is deferred, optimal performance requires that
+ * the releasing be deferred as well.
+ *
+ * Please see the documentation for gxio_mpipe_iqueue_consume().
+ *
+ * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init().
+ * @param idesc The descriptor which was processed.
+ */
+static inline void gxio_mpipe_iqueue_release(gxio_mpipe_iqueue_t *iqueue,
+ gxio_mpipe_idesc_t *idesc)
+{
+ gxio_mpipe_credit(iqueue->context, iqueue->ring, idesc->bucket_id, 1);
+}
+
+/* Consume a packet from an "iqueue".
+ *
+ * After processing packets peeked at via gxio_mpipe_iqueue_peek()
+ * or gxio_mpipe_iqueue_try_peek(), you must call this function, or
+ * gxio_mpipe_iqueue_advance() plus gxio_mpipe_iqueue_release(), to
+ * advance over those entries, and release their rings and buckets.
+ *
+ * You may call this function as each packet is processed, or you can
+ * wait until several packets have been processed.
+ *
+ * Note that if you are using a single bucket, and you are handling
+ * batches of N packets, then you can replace several calls to this
+ * function with calls to "gxio_mpipe_iqueue_advance(iqueue, N)" and
+ * "gxio_mpipe_credit(iqueue->context, iqueue->ring, bucket, N)".
+ *
+ * Note that if your classifier sets "idesc->nr", then you should
+ * explicitly call "gxio_mpipe_iqueue_advance(iqueue, idesc)" plus
+ * "gxio_mpipe_credit(iqueue->context, iqueue->ring, -1, 1)", to
+ * avoid incorrectly crediting the (unused) bucket.
+ *
+ * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init().
+ * @param idesc The descriptor which was processed.
+ */
+static inline void gxio_mpipe_iqueue_consume(gxio_mpipe_iqueue_t *iqueue,
+ gxio_mpipe_idesc_t *idesc)
+{
+ gxio_mpipe_iqueue_advance(iqueue, 1);
+ gxio_mpipe_iqueue_release(iqueue, idesc);
+}
+
+/* Peek at the next packet(s) in an "iqueue", without waiting.
+ *
+ * If no packets are available, fills idesc_ref with NULL, and then
+ * returns ::GXIO_MPIPE_ERR_IQUEUE_EMPTY. Otherwise, fills idesc_ref
+ * with the address of the next valid packet descriptor, and returns
+ * the maximum number of valid descriptors which can be processed.
+ * You may process fewer descriptors if desired.
+ *
+ * Call gxio_mpipe_iqueue_consume() on each packet once it has been
+ * processed (or dropped), to allow more packets to be delivered.
+ *
+ * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init().
+ * @param idesc_ref A pointer to a packet descriptor pointer.
+ * @return The (positive) number of packets which can be processed,
+ * or ::GXIO_MPIPE_ERR_IQUEUE_EMPTY if no packets are available.
+ */
+static inline int gxio_mpipe_iqueue_try_peek(gxio_mpipe_iqueue_t *iqueue,
+ gxio_mpipe_idesc_t **idesc_ref)
+{
+ gxio_mpipe_idesc_t *next;
+
+ uint64_t head = iqueue->head;
+ uint64_t tail = __gxio_mmio_read(iqueue->idescs);
+
+ /* Available entries. */
+ uint64_t avail =
+ (tail >= head) ? (tail - head) : (iqueue->num_entries - head);
+
+ if (avail == 0) {
+ *idesc_ref = NULL;
+ return GXIO_MPIPE_ERR_IQUEUE_EMPTY;
+ }
+
+ next = &iqueue->idescs[head];
+
+ /* ISSUE: Is this helpful? */
+ __insn_prefetch(next);
+
+#ifdef __BIG_ENDIAN__
+ /* HACK: Swap new entries directly in memory. */
+ {
+ int i, j;
+ for (i = iqueue->swapped; i < avail; i++) {
+ for (j = 0; j < 8; j++)
+ next[i].words[j] =
+ __builtin_bswap64(next[i].words[j]);
+ }
+ iqueue->swapped = avail;
+ }
+#endif
+
+ *idesc_ref = next;
+
+ return avail;
+}
+
+/* Drop a packet by pushing its buffer (if appropriate).
+ *
+ * NOTE: The caller must still call gxio_mpipe_iqueue_consume() if idesc
+ * came from gxio_mpipe_iqueue_try_peek() or gxio_mpipe_iqueue_peek().
+ *
+ * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init().
+ * @param idesc A packet descriptor.
+ */
+static inline void gxio_mpipe_iqueue_drop(gxio_mpipe_iqueue_t *iqueue,
+ gxio_mpipe_idesc_t *idesc)
+{
+ /* FIXME: Handle "chaining" properly. */
+
+ if (!idesc->be) {
+ unsigned char *va = gxio_mpipe_idesc_get_va(idesc);
+ gxio_mpipe_push_buffer(iqueue->context, idesc->stack_idx, va);
+ }
+}
+
+/*****************************************************************
+ * Egress Queue Wrapper *
+ ******************************************************************/
+
+/* A convenient, thread-safe interface to an eDMA ring. */
+typedef struct {
+
+ /* State object for tracking head and tail pointers. */
+ __gxio_dma_queue_t dma_queue;
+
+ /* The ring entries. */
+ gxio_mpipe_edesc_t *edescs;
+
+ /* The number of entries minus one. */
+ unsigned long mask_num_entries;
+
+ /* The log2() of the number of entries. */
+ unsigned long log2_num_entries;
+
+ /* The context. */
+ gxio_mpipe_context_t *context;
+
+ /* The ering. */
+ unsigned int ering;
+
+ /* The channel. */
+ unsigned int channel;
+
+} gxio_mpipe_equeue_t;
+
+/* Initialize an "equeue".
+ *
+ * This function uses gxio_mpipe_init_edma_ring() to initialize the
+ * underlying edma_ring using the provided arguments.
+ *
+ * @param equeue An egress queue to be initialized.
+ * @param context An initialized mPIPE context.
+ * @param ering The eDMA ring index.
+ * @param channel The channel to use. This must be one of the channels
+ * associated with the context's set of open links.
+ * @param mem A physically contiguous region of memory to be filled
+ * with a ring of ::gxio_mpipe_edesc_t structures.
+ * @param mem_size Number of bytes in the ring. Must be 512, 2048,
+ * 8192 or 65536, times 16 (i.e. sizeof(gxio_mpipe_edesc_t)).
+ * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags.
+ *
+ * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_EDMA_RING or
+ * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure.
+ */
+extern int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue,
+ gxio_mpipe_context_t *context,
+ unsigned int ering,
+ unsigned int channel,
+ void *mem, unsigned int mem_size,
+ unsigned int mem_flags);
+
+/* Reserve completion slots for edescs.
+ *
+ * Use gxio_mpipe_equeue_put_at() to actually populate the slots.
+ *
+ * This function is slower than gxio_mpipe_equeue_reserve_fast(), but
+ * returns a full 64 bit completion slot, which can be used with
+ * gxio_mpipe_equeue_is_complete().
+ *
+ * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
+ * @param num Number of slots to reserve (must be non-zero).
+ * @return The first reserved completion slot, or a negative error code.
+ */
+static inline int64_t gxio_mpipe_equeue_reserve(gxio_mpipe_equeue_t *equeue,
+ unsigned int num)
+{
+ return __gxio_dma_queue_reserve_aux(&equeue->dma_queue, num, true);
+}
+
+/* Reserve completion slots for edescs, if possible.
+ *
+ * Use gxio_mpipe_equeue_put_at() to actually populate the slots.
+ *
+ * This function is slower than gxio_mpipe_equeue_try_reserve_fast(),
+ * but returns a full 64 bit completion slot, which can be used with
+ * gxio_mpipe_equeue_is_complete().
+ *
+ * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
+ * @param num Number of slots to reserve (must be non-zero).
+ * @return The first reserved completion slot, or a negative error code.
+ */
+static inline int64_t gxio_mpipe_equeue_try_reserve(gxio_mpipe_equeue_t
+ *equeue, unsigned int num)
+{
+ return __gxio_dma_queue_reserve_aux(&equeue->dma_queue, num, false);
+}
+
+/* Reserve slots for edescs.
+ *
+ * Use gxio_mpipe_equeue_put_at() to actually populate the slots.
+ *
+ * This function is faster than gxio_mpipe_equeue_reserve(), but
+ * returns a 24 bit slot (instead of a 64 bit completion slot), which
+ * thus cannot be used with gxio_mpipe_equeue_is_complete().
+ *
+ * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
+ * @param num Number of slots to reserve (should be non-zero).
+ * @return The first reserved slot, or a negative error code.
+ */
+static inline int64_t gxio_mpipe_equeue_reserve_fast(gxio_mpipe_equeue_t
+ *equeue, unsigned int num)
+{
+ return __gxio_dma_queue_reserve(&equeue->dma_queue, num, true, false);
+}
+
+/* Reserve slots for edescs, if possible.
+ *
+ * Use gxio_mpipe_equeue_put_at() to actually populate the slots.
+ *
+ * This function is faster than gxio_mpipe_equeue_try_reserve(), but
+ * returns a 24 bit slot (instead of a 64 bit completion slot), which
+ * thus cannot be used with gxio_mpipe_equeue_is_complete().
+ *
+ * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
+ * @param num Number of slots to reserve (should be non-zero).
+ * @return The first reserved slot, or a negative error code.
+ */
+static inline int64_t gxio_mpipe_equeue_try_reserve_fast(gxio_mpipe_equeue_t
+ *equeue,
+ unsigned int num)
+{
+ return __gxio_dma_queue_reserve(&equeue->dma_queue, num, false, false);
+}
+
+/*
+ * HACK: This helper function tricks gcc 4.6 into avoiding saving
+ * a copy of "edesc->words[0]" on the stack for no obvious reason.
+ */
+
+static inline void gxio_mpipe_equeue_put_at_aux(gxio_mpipe_equeue_t *equeue,
+ uint_reg_t ew[2],
+ unsigned long slot)
+{
+ unsigned long edma_slot = slot & equeue->mask_num_entries;
+ gxio_mpipe_edesc_t *edesc_p = &equeue->edescs[edma_slot];
+
+ /*
+ * ISSUE: Could set eDMA ring to be on generation 1 at start, which
+ * would avoid the negation here, perhaps allowing "__insn_bfins()".
+ */
+ ew[0] |= !((slot >> equeue->log2_num_entries) & 1);
+
+ /*
+ * NOTE: We use "__gxio_mpipe_write()", plus the fact that the eDMA
+ * queue alignment restrictions ensure that these two words are on
+ * the same cacheline, to force proper ordering between the stores.
+ */
+ __gxio_mmio_write64(&edesc_p->words[1], ew[1]);
+ __gxio_mmio_write64(&edesc_p->words[0], ew[0]);
+}
+
+/* Post an edesc to a given slot in an equeue.
+ *
+ * This function copies the supplied edesc into entry "slot mod N" in
+ * the underlying ring, setting the "gen" bit to the appropriate value
+ * based on "(slot mod N*2)", where "N" is the size of the ring. Note
+ * that the higher bits of slot are unused, and thus, this function
+ * can handle "slots" as well as "completion slots".
+ *
+ * Normally this function is used to fill in slots reserved by
+ * gxio_mpipe_equeue_try_reserve(), gxio_mpipe_equeue_reserve(),
+ * gxio_mpipe_equeue_try_reserve_fast(), or
+ * gxio_mpipe_equeue_reserve_fast(),
+ *
+ * This function can also be used without "reserving" slots, if the
+ * application KNOWS that the ring can never overflow, for example, by
+ * pushing fewer buffers into the buffer stacks than there are total
+ * slots in the equeue, but this is NOT recommended.
+ *
+ * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
+ * @param edesc The egress descriptor to be posted.
+ * @param slot An egress slot (only the low bits are actually used).
+ */
+static inline void gxio_mpipe_equeue_put_at(gxio_mpipe_equeue_t *equeue,
+ gxio_mpipe_edesc_t edesc,
+ unsigned long slot)
+{
+ gxio_mpipe_equeue_put_at_aux(equeue, edesc.words, slot);
+}
+
+/* Post an edesc to the next slot in an equeue.
+ *
+ * This is a convenience wrapper around
+ * gxio_mpipe_equeue_reserve_fast() and gxio_mpipe_equeue_put_at().
+ *
+ * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
+ * @param edesc The egress descriptor to be posted.
+ * @return 0 on success.
+ */
+static inline int gxio_mpipe_equeue_put(gxio_mpipe_equeue_t *equeue,
+ gxio_mpipe_edesc_t edesc)
+{
+ int64_t slot = gxio_mpipe_equeue_reserve_fast(equeue, 1);
+ if (slot < 0)
+ return (int)slot;
+
+ gxio_mpipe_equeue_put_at(equeue, edesc, slot);
+
+ return 0;
+}
+
+/* Ask the mPIPE hardware to egress outstanding packets immediately.
+ *
+ * This call is not necessary, but may slightly reduce overall latency.
+ *
+ * Technically, you should flush all gxio_mpipe_equeue_put_at() writes
+ * to memory before calling this function, to ensure the descriptors
+ * are visible in memory before the mPIPE hardware actually looks for
+ * them. But this should be very rare, and the only side effect would
+ * be increased latency, so it is up to the caller to decide whether
+ * or not to flush memory.
+ *
+ * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
+ */
+static inline void gxio_mpipe_equeue_flush(gxio_mpipe_equeue_t *equeue)
+{
+ /* Use "ring_idx = 0" and "count = 0" to "wake up" the eDMA ring. */
+ MPIPE_EDMA_POST_REGION_VAL_t val = { {0} };
+ /* Flush the write buffers. */
+ __insn_flushwb();
+ __gxio_mmio_write(equeue->dma_queue.post_region_addr, val.word);
+}
+
+/* Determine if a given edesc has been completed.
+ *
+ * Note that this function requires a "completion slot", and thus may
+ * NOT be used with a "slot" from gxio_mpipe_equeue_reserve_fast() or
+ * gxio_mpipe_equeue_try_reserve_fast().
+ *
+ * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
+ * @param completion_slot The completion slot used by the edesc.
+ * @param update If true, and the desc does not appear to have completed
+ * yet, then update any software cache of the hardware completion counter,
+ * and check again. This should normally be true.
+ * @return True iff the given edesc has been completed.
+ */
+static inline int gxio_mpipe_equeue_is_complete(gxio_mpipe_equeue_t *equeue,
+ int64_t completion_slot,
+ int update)
+{
+ return __gxio_dma_queue_is_complete(&equeue->dma_queue,
+ completion_slot, update);
+}
+
+/* Set the snf (store and forward) size for an equeue.
+ *
+ * The snf size for an equeue defaults to 1536, and encodes the size
+ * of the largest packet for which egress is guaranteed to avoid
+ * transmission underruns and/or corrupt checksums under heavy load.
+ *
+ * The snf size affects a global resource pool which cannot support,
+ * for example, all 24 equeues each requesting an snf size of 8K.
+ *
+ * To ensure that jumbo packets can be egressed properly, the snf size
+ * should be set to the size of the largest possible packet, which
+ * will usually be limited by the size of the app's largest buffer.
+ *
+ * This is a convenience wrapper around
+ * gxio_mpipe_config_edma_ring_blks().
+ *
+ * This function should not be called after any egress has been done
+ * on the equeue.
+ *
+ * @param equeue An egress queue initialized via gxio_mpipe_equeue_init().
+ * @param size The snf size, in bytes.
+ * @return Zero on success, negative error otherwise.
+ */
+static inline int gxio_mpipe_equeue_set_snf_size(gxio_mpipe_equeue_t *equeue,
+ size_t size)
+{
+ int blks = (size + 127) / 128;
+ return gxio_mpipe_config_edma_ring_blks(equeue->context, equeue->ering,
+ blks + 1, blks, 1);
+}
+
+/*****************************************************************
+ * Link Management *
+ ******************************************************************/
+
+/*
+ *
+ * Functions for manipulating and sensing the state and configuration
+ * of physical network links.
+ *
+ * @section gxio_mpipe_link_perm Link Permissions
+ *
+ * Opening a link (with gxio_mpipe_link_open()) requests a set of link
+ * permissions, which control what may be done with the link, and potentially
+ * what permissions may be granted to other processes.
+ *
+ * Data permission allows the process to receive packets from the link by
+ * specifying the link's channel number in mPIPE packet distribution rules,
+ * and to send packets to the link by using the link's channel number as
+ * the target for an eDMA ring.
+ *
+ * Stats permission allows the process to retrieve link attributes (such as
+ * the speeds it is capable of running at, or whether it is currently up), and
+ * to read and write certain statistics-related registers in the link's MAC.
+ *
+ * Control permission allows the process to retrieve and modify link attributes
+ * (so that it may, for example, bring the link up and take it down), and
+ * read and write many registers in the link's MAC and PHY.
+ *
+ * Any permission may be requested as shared, which allows other processes
+ * to also request shared permission, or exclusive, which prevents other
+ * processes from requesting it. In keeping with GXIO's typical usage in
+ * an embedded environment, the defaults for all permissions are shared.
+ *
+ * Permissions are granted on a first-come, first-served basis, so if two
+ * applications request an exclusive permission on the same link, the one
+ * to run first will win. Note, however, that some system components, like
+ * the kernel Ethernet driver, may get an opportunity to open links before
+ * any applications run.
+ *
+ * @section gxio_mpipe_link_names Link Names
+ *
+ * Link names are of the form gbe<em>number</em> (for Gigabit Ethernet),
+ * xgbe<em>number</em> (for 10 Gigabit Ethernet), loop<em>number</em> (for
+ * internal mPIPE loopback), or ilk<em>number</em>/<em>channel</em>
+ * (for Interlaken links); for instance, gbe0, xgbe1, loop3, and
+ * ilk0/12 are all possible link names. The correspondence between
+ * the link name and an mPIPE instance number or mPIPE channel number is
+ * system-dependent; all links will not exist on all systems, and the set
+ * of numbers used for a particular link type may not start at zero and may
+ * not be contiguous. Use gxio_mpipe_link_enumerate() to retrieve the set of
+ * links which exist on a system, and always use gxio_mpipe_link_instance()
+ * to determine which mPIPE controls a particular link.
+ *
+ * Note that in some cases, links may share hardware, such as PHYs, or
+ * internal mPIPE buffers; in these cases, only one of the links may be
+ * opened at a time. This is especially common with xgbe and gbe ports,
+ * since each xgbe port uses 4 SERDES lanes, each of which may also be
+ * configured as one gbe port.
+ *
+ * @section gxio_mpipe_link_states Link States
+ *
+ * The mPIPE link management model revolves around three different states,
+ * which are maintained for each link:
+ *
+ * 1. The <em>current</em> link state: is the link up now, and if so, at
+ * what speed?
+ *
+ * 2. The <em>desired</em> link state: what do we want the link state to be?
+ * The system is always working to make this state the current state;
+ * thus, if the desired state is up, and the link is down, we'll be
+ * constantly trying to bring it up, automatically.
+ *
+ * 3. The <em>possible</em> link state: what speeds are valid for this
+ * particular link? Or, in other words, what are the capabilities of
+ * the link hardware?
+ *
+ * These link states are not, strictly speaking, related to application
+ * state; they may be manipulated at any time, whether or not the link
+ * is currently being used for data transfer. However, for convenience,
+ * gxio_mpipe_link_open() and gxio_mpipe_link_close() (or application exit)
+ * can affect the link state. These implicit link management operations
+ * may be modified or disabled by the use of link open flags.
+ *
+ * From an application, you can use gxio_mpipe_link_get_attr()
+ * and gxio_mpipe_link_set_attr() to manipulate the link states.
+ * gxio_mpipe_link_get_attr() with ::GXIO_MPIPE_LINK_POSSIBLE_STATE
+ * gets you the possible link state. gxio_mpipe_link_get_attr() with
+ * ::GXIO_MPIPE_LINK_CURRENT_STATE gets you the current link state.
+ * Finally, gxio_mpipe_link_set_attr() and gxio_mpipe_link_get_attr()
+ * with ::GXIO_MPIPE_LINK_DESIRED_STATE allow you to modify or retrieve
+ * the desired link state.
+ *
+ * If you want to manage a link from a part of your application which isn't
+ * involved in packet processing, you can use the ::GXIO_MPIPE_LINK_NO_DATA
+ * flags on a gxio_mpipe_link_open() call. This opens the link, but does
+ * not request data permission, so it does not conflict with any exclusive
+ * permissions which may be held by other processes. You can then can use
+ * gxio_mpipe_link_get_attr() and gxio_mpipe_link_set_attr() on this link
+ * object to bring up or take down the link.
+ *
+ * Some links support link state bits which support various loopback
+ * modes. ::GXIO_MPIPE_LINK_LOOP_MAC tests datapaths within the Tile
+ * Processor itself; ::GXIO_MPIPE_LINK_LOOP_PHY tests the datapath between
+ * the Tile Processor and the external physical layer interface chip; and
+ * ::GXIO_MPIPE_LINK_LOOP_EXT tests the entire network datapath with the
+ * aid of an external loopback connector. In addition to enabling hardware
+ * testing, such configuration can be useful for software testing, as well.
+ *
+ * When LOOP_MAC or LOOP_PHY is enabled, packets transmitted on a channel
+ * will be received by that channel, instead of being emitted on the
+ * physical link, and packets received on the physical link will be ignored.
+ * Other than that, all standard GXIO operations work as you might expect.
+ * Note that loopback operation requires that the link be brought up using
+ * one or more of the GXIO_MPIPE_LINK_SPEED_xxx link state bits.
+ *
+ * Those familiar with previous versions of the MDE on TILEPro hardware
+ * will notice significant similarities between the NetIO link management
+ * model and the mPIPE link management model. However, the NetIO model
+ * was developed in stages, and some of its features -- for instance,
+ * the default setting of certain flags -- were shaped by the need to be
+ * compatible with previous versions of NetIO. Since the features provided
+ * by the mPIPE hardware and the mPIPE GXIO library are significantly
+ * different than those provided by NetIO, in some cases, we have made
+ * different choices in the mPIPE link management API. Thus, please read
+ * this documentation carefully before assuming that mPIPE link management
+ * operations are exactly equivalent to their NetIO counterparts.
+ */
+
+/* An object used to manage mPIPE link state and resources. */
+typedef struct {
+ /* The overall mPIPE context. */
+ gxio_mpipe_context_t *context;
+
+ /* The channel number used by this link. */
+ uint8_t channel;
+
+ /* The MAC index used by this link. */
+ uint8_t mac;
+} gxio_mpipe_link_t;
+
+/* Translate a link name to the instance number of the mPIPE shim which is
+ * connected to that link. This call does not verify whether the link is
+ * currently available, and does not reserve any link resources;
+ * gxio_mpipe_link_open() must be called to perform those functions.
+ *
+ * Typically applications will call this function to translate a link name
+ * to an mPIPE instance number; call gxio_mpipe_init(), passing it that
+ * instance number, to initialize the mPIPE shim; and then call
+ * gxio_mpipe_link_open(), passing it the same link name plus the mPIPE
+ * context, to configure the link.
+ *
+ * @param link_name Name of the link; see @ref gxio_mpipe_link_names.
+ * @return The mPIPE instance number which is associated with the named
+ * link, or a negative error code (::GXIO_ERR_NO_DEVICE) if the link does
+ * not exist.
+ */
+extern int gxio_mpipe_link_instance(const char *link_name);
+
+/* Retrieve one of this system's legal link names, and its MAC address.
+ *
+ * @param index Link name index. If a system supports N legal link names,
+ * then indices between 0 and N - 1, inclusive, each correspond to one of
+ * those names. Thus, to retrieve all of a system's legal link names,
+ * call this function in a loop, starting with an index of zero, and
+ * incrementing it once per iteration until -1 is returned.
+ * @param link_name Pointer to the buffer which will receive the retrieved
+ * link name. The buffer should contain space for at least
+ * ::GXIO_MPIPE_LINK_NAME_LEN bytes; the returned name, including the
+ * terminating null byte, will be no longer than that.
+ * @param link_name Pointer to the buffer which will receive the retrieved
+ * MAC address. The buffer should contain space for at least 6 bytes.
+ * @return Zero if a link name was successfully retrieved; -1 if one was
+ * not.
+ */
+extern int gxio_mpipe_link_enumerate_mac(int index, char *link_name,
+ uint8_t *mac_addr);
+
+/* Open an mPIPE link.
+ *
+ * A link must be opened before it may be used to send or receive packets,
+ * and before its state may be examined or changed. Depending up on the
+ * link's intended use, one or more link permissions may be requested via
+ * the flags parameter; see @ref gxio_mpipe_link_perm. In addition, flags
+ * may request that the link's state be modified at open time. See @ref
+ * gxio_mpipe_link_states and @ref gxio_mpipe_link_open_flags for more detail.
+ *
+ * @param link A link state object, which will be initialized if this
+ * function completes successfully.
+ * @param context An initialized mPIPE context.
+ * @param link_name Name of the link.
+ * @param flags Zero or more @ref gxio_mpipe_link_open_flags, ORed together.
+ * @return 0 if the link was successfully opened, or a negative error code.
+ *
+ */
+extern int gxio_mpipe_link_open(gxio_mpipe_link_t *link,
+ gxio_mpipe_context_t *context,
+ const char *link_name, unsigned int flags);
+
+/* Close an mPIPE link.
+ *
+ * Closing a link makes it available for use by other processes. Once
+ * a link has been closed, packets may no longer be sent on or received
+ * from the link, and its state may not be examined or changed.
+ *
+ * @param link A link state object, which will no longer be initialized
+ * if this function completes successfully.
+ * @return 0 if the link was successfully closed, or a negative error code.
+ *
+ */
+extern int gxio_mpipe_link_close(gxio_mpipe_link_t *link);
+
+/* Return a link's channel number.
+ *
+ * @param link A properly initialized link state object.
+ * @return The channel number for the link.
+ */
+static inline int gxio_mpipe_link_channel(gxio_mpipe_link_t *link)
+{
+ return link->channel;
+}
+
+/* Set a link attribute.
+ *
+ * @param link A properly initialized link state object.
+ * @param attr An attribute from the set of @ref gxio_mpipe_link_attrs.
+ * @param val New value of the attribute.
+ * @return 0 if the attribute was successfully set, or a negative error
+ * code.
+ */
+extern int gxio_mpipe_link_set_attr(gxio_mpipe_link_t *link, uint32_t attr,
+ int64_t val);
+
+///////////////////////////////////////////////////////////////////
+// Timestamp //
+///////////////////////////////////////////////////////////////////
+
+/* Get the timestamp of mPIPE when this routine is called.
+ *
+ * @param context An initialized mPIPE context.
+ * @param ts A timespec structure to store the current clock.
+ * @return If the call was successful, zero; otherwise, a negative error
+ * code.
+ */
+extern int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context,
+ struct timespec *ts);
+
+/* Set the timestamp of mPIPE.
+ *
+ * @param context An initialized mPIPE context.
+ * @param ts A timespec structure to store the requested clock.
+ * @return If the call was successful, zero; otherwise, a negative error
+ * code.
+ */
+extern int gxio_mpipe_set_timestamp(gxio_mpipe_context_t *context,
+ const struct timespec *ts);
+
+/* Adjust the timestamp of mPIPE.
+ *
+ * @param context An initialized mPIPE context.
+ * @param delta A signed time offset to adjust, in nanoseconds.
+ * The absolute value of this parameter must be less than or
+ * equal to 1000000000.
+ * @return If the call was successful, zero; otherwise, a negative error
+ * code.
+ */
+extern int gxio_mpipe_adjust_timestamp(gxio_mpipe_context_t *context,
+ int64_t delta);
+
+/** Adjust the mPIPE timestamp clock frequency.
+ *
+ * @param context An initialized mPIPE context.
+ * @param ppb A 32-bit signed PPB (Parts Per Billion) value to adjust.
+ * The absolute value of ppb must be less than or equal to 1000000000.
+ * Values less than about 30000 will generally cause a GXIO_ERR_INVAL
+ * return due to the granularity of the hardware that converts reference
+ * clock cycles into seconds and nanoseconds.
+ * @return If the call was successful, zero; otherwise, a negative error
+ * code.
+ */
+extern int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t* context,
+ int32_t ppb);
+
+#endif /* !_GXIO_MPIPE_H_ */
diff --git a/arch/tile/include/gxio/trio.h b/arch/tile/include/gxio/trio.h
new file mode 100644
index 00000000000..df10a662cc2
--- /dev/null
+++ b/arch/tile/include/gxio/trio.h
@@ -0,0 +1,298 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ *
+ * An API for allocating, configuring, and manipulating TRIO hardware
+ * resources
+ */
+
+/*
+ *
+ * The TILE-Gx TRIO shim provides connections to external devices via
+ * PCIe or other transaction IO standards. The gxio_trio_ API,
+ * declared in <gxio/trio.h>, allows applications to allocate and
+ * configure TRIO IO resources like DMA command rings, memory map
+ * windows, and device interrupts. The following sections introduce
+ * the various components of the API. We strongly recommend reading
+ * the TRIO section of the IO Device Guide (UG404) before working with
+ * this API.
+ *
+ * @section trio__ingress TRIO Ingress Hardware Resources
+ *
+ * The TRIO ingress hardware is responsible for examining incoming
+ * PCIe or StreamIO packets and choosing a processing mechanism based
+ * on the packets' bus address. The gxio_trio_ API can be used to
+ * configure different handlers for different ranges of bus address
+ * space. The user can configure "mapped memory" and "scatter queue"
+ * regions to match incoming packets within 4kB-aligned ranges of bus
+ * addresses. Each range specifies a different set of mapping
+ * parameters to be applied when handling the ingress packet. The
+ * following sections describe how to work with MapMem and scatter
+ * queue regions.
+ *
+ * @subsection trio__mapmem TRIO MapMem Regions
+ *
+ * TRIO mapped memory (or MapMem) regions allow the user to map
+ * incoming read and write requests directly to the application's
+ * memory space. MapMem regions are allocated via
+ * gxio_trio_alloc_memory_maps(). Given an integer MapMem number,
+ * applications can use gxio_trio_init_memory_map() to specify the
+ * range of bus addresses that will match the region and the range of
+ * virtual addresses to which those packets will be applied.
+ *
+ * As with many other gxio APIs, the programmer must be sure to
+ * register memory pages that will be used with MapMem regions. Pages
+ * can be registered with TRIO by allocating an ASID (address space
+ * identifier) and then using gxio_trio_register_page() to register up to
+ * 16 pages with the hardware. The initialization functions for
+ * resources that require registered memory (MapMem, scatter queues,
+ * push DMA, and pull DMA) then take an 'asid' parameter in order to
+ * configure which set of registered pages is used by each resource.
+ *
+ * @subsection trio__scatter_queue TRIO Scatter Queues
+ *
+ * The TRIO shim's scatter queue regions allow users to dynamically
+ * map buffers from a large address space into a small range of bus
+ * addresses. This is particularly helpful for PCIe endpoint devices,
+ * where the host generally limits the size of BARs to tens of
+ * megabytes.
+ *
+ * Each scatter queue consists of a memory map region, a queue of
+ * tile-side buffer VAs to be mapped to that region, and a bus-mapped
+ * "doorbell" register that the remote endpoint can write to trigger a
+ * dequeue of the current buffer VA, thus swapping in a new buffer.
+ * The VAs pushed onto a scatter queue must be 4kB aligned, so
+ * applications may need to use higher-level protocols to inform
+ * remote entities that they should apply some additional, sub-4kB
+ * offset when reading or writing the scatter queue region. For more
+ * information, see the IO Device Guide (UG404).
+ *
+ * @section trio__egress TRIO Egress Hardware Resources
+ *
+ * The TRIO shim supports two mechanisms for egress packet generation:
+ * programmed IO (PIO) and push/pull DMA. PIO allows applications to
+ * create MMIO mappings for PCIe or StreamIO address space, such that
+ * the application can generate word-sized read or write transactions
+ * by issuing load or store instructions. Push and pull DMA are tuned
+ * for larger transactions; they use specialized hardware engines to
+ * transfer large blocks of data at line rate.
+ *
+ * @subsection trio__pio TRIO Programmed IO
+ *
+ * Programmed IO allows applications to create MMIO mappings for PCIe
+ * or StreamIO address space. The hardware PIO regions support access
+ * to PCIe configuration, IO, and memory space, but the gxio_trio API
+ * only supports memory space accesses. PIO regions are allocated
+ * with gxio_trio_alloc_pio_regions() and initialized via
+ * gxio_trio_init_pio_region(). Once a region is bound to a range of
+ * bus address via the initialization function, the application can
+ * use gxio_trio_map_pio_region() to create MMIO mappings from its VA
+ * space onto the range of bus addresses supported by the PIO region.
+ *
+ * @subsection trio_dma TRIO Push and Pull DMA
+ *
+ * The TRIO push and pull DMA engines allow users to copy blocks of
+ * data between application memory and the bus. Push DMA generates
+ * write packets that copy from application memory to the bus and pull
+ * DMA generates read packets that copy from the bus into application
+ * memory. The DMA engines are managed via an API that is very
+ * similar to the mPIPE eDMA interface. For a detailed explanation of
+ * the eDMA queue API, see @ref gxio_mpipe_wrappers.
+ *
+ * Push and pull DMA queues are allocated via
+ * gxio_trio_alloc_push_dma_ring() / gxio_trio_alloc_pull_dma_ring().
+ * Once allocated, users generally use a ::gxio_trio_dma_queue_t
+ * object to manage the queue, providing easy wrappers for reserving
+ * command slots in the DMA command ring, filling those slots, and
+ * waiting for commands to complete. DMA queues can be initialized
+ * via gxio_trio_init_push_dma_queue() or
+ * gxio_trio_init_pull_dma_queue().
+ *
+ * See @ref trio/push_dma/app.c for an example of how to use push DMA.
+ *
+ * @section trio_shortcomings Plans for Future API Revisions
+ *
+ * The simulation framework is incomplete. Future features include:
+ *
+ * - Support for reset and deallocation of resources.
+ *
+ * - Support for pull DMA.
+ *
+ * - Support for interrupt regions and user-space interrupt delivery.
+ *
+ * - Support for getting BAR mappings and reserving regions of BAR
+ * address space.
+ */
+#ifndef _GXIO_TRIO_H_
+#define _GXIO_TRIO_H_
+
+#include <linux/types.h>
+
+#include <gxio/common.h>
+#include <gxio/dma_queue.h>
+
+#include <arch/trio_constants.h>
+#include <arch/trio.h>
+#include <arch/trio_pcie_intfc.h>
+#include <arch/trio_pcie_rc.h>
+#include <arch/trio_shm.h>
+#include <hv/drv_trio_intf.h>
+#include <hv/iorpc.h>
+
+/* A context object used to manage TRIO hardware resources. */
+typedef struct {
+
+ /* File descriptor for calling up to Linux (and thus the HV). */
+ int fd;
+
+ /* The VA at which the MAC MMIO registers are mapped. */
+ char *mmio_base_mac;
+
+ /* The VA at which the PIO config space are mapped for each PCIe MAC.
+ Gx36 has max 3 PCIe MACs per TRIO shim. */
+ char *mmio_base_pio_cfg[TILEGX_TRIO_PCIES];
+
+#ifdef USE_SHARED_PCIE_CONFIG_REGION
+ /* Index of the shared PIO region for PCI config access. */
+ int pio_cfg_index;
+#else
+ /* Index of the PIO region for PCI config access per MAC. */
+ int pio_cfg_index[TILEGX_TRIO_PCIES];
+#endif
+
+ /* The VA at which the push DMA MMIO registers are mapped. */
+ char *mmio_push_dma[TRIO_NUM_PUSH_DMA_RINGS];
+
+ /* The VA at which the pull DMA MMIO registers are mapped. */
+ char *mmio_pull_dma[TRIO_NUM_PUSH_DMA_RINGS];
+
+ /* Application space ID. */
+ unsigned int asid;
+
+} gxio_trio_context_t;
+
+/* Command descriptor for push or pull DMA. */
+typedef TRIO_DMA_DESC_t gxio_trio_dma_desc_t;
+
+/* A convenient, thread-safe interface to an eDMA ring. */
+typedef struct {
+
+ /* State object for tracking head and tail pointers. */
+ __gxio_dma_queue_t dma_queue;
+
+ /* The ring entries. */
+ gxio_trio_dma_desc_t *dma_descs;
+
+ /* The number of entries minus one. */
+ unsigned long mask_num_entries;
+
+ /* The log2() of the number of entries. */
+ unsigned int log2_num_entries;
+
+} gxio_trio_dma_queue_t;
+
+/* Initialize a TRIO context.
+ *
+ * This function allocates a TRIO "service domain" and maps the MMIO
+ * registers into the the caller's VA space.
+ *
+ * @param trio_index Which TRIO shim; Gx36 must pass 0.
+ * @param context Context object to be initialized.
+ */
+extern int gxio_trio_init(gxio_trio_context_t *context,
+ unsigned int trio_index);
+
+/* This indicates that an ASID hasn't been allocated. */
+#define GXIO_ASID_NULL -1
+
+/* Ordering modes for map memory regions and scatter queue regions. */
+typedef enum gxio_trio_order_mode_e {
+ /* Writes are not ordered. Reads always wait for previous writes. */
+ GXIO_TRIO_ORDER_MODE_UNORDERED =
+ TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_UNORDERED,
+ /* Both writes and reads wait for previous transactions to complete. */
+ GXIO_TRIO_ORDER_MODE_STRICT =
+ TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_STRICT,
+ /* Writes are ordered unless the incoming packet has the
+ relaxed-ordering attributes set. */
+ GXIO_TRIO_ORDER_MODE_OBEY_PACKET =
+ TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_REL_ORD
+} gxio_trio_order_mode_t;
+
+/* Initialize a memory mapping region.
+ *
+ * @param context An initialized TRIO context.
+ * @param map A Memory map region allocated by gxio_trio_alloc_memory_map().
+ * @param target_mem VA of backing memory, should be registered via
+ * gxio_trio_register_page() and aligned to 4kB.
+ * @param target_size Length of the memory mapping, must be a multiple
+ * of 4kB.
+ * @param asid ASID to be used for Tile-side address translation.
+ * @param mac MAC number.
+ * @param bus_address Bus address at which the mapping starts.
+ * @param order_mode Memory ordering mode for this mapping.
+ * @return Zero on success, else ::GXIO_TRIO_ERR_BAD_MEMORY_MAP,
+ * GXIO_TRIO_ERR_BAD_ASID, or ::GXIO_TRIO_ERR_BAD_BUS_RANGE.
+ */
+extern int gxio_trio_init_memory_map(gxio_trio_context_t *context,
+ unsigned int map, void *target_mem,
+ size_t target_size, unsigned int asid,
+ unsigned int mac, uint64_t bus_address,
+ gxio_trio_order_mode_t order_mode);
+
+/* Flags that can be passed to resource allocation functions. */
+enum gxio_trio_alloc_flags_e {
+ GXIO_TRIO_ALLOC_FIXED = HV_TRIO_ALLOC_FIXED,
+};
+
+/* Flags that can be passed to memory registration functions. */
+enum gxio_trio_mem_flags_e {
+ /* Do not fill L3 when writing, and invalidate lines upon egress. */
+ GXIO_TRIO_MEM_FLAG_NT_HINT = IORPC_MEM_BUFFER_FLAG_NT_HINT,
+
+ /* L3 cache fills should only populate IO cache ways. */
+ GXIO_TRIO_MEM_FLAG_IO_PIN = IORPC_MEM_BUFFER_FLAG_IO_PIN,
+};
+
+/* Flag indicating a request generator uses a special traffic
+ class. */
+#define GXIO_TRIO_FLAG_TRAFFIC_CLASS(N) HV_TRIO_FLAG_TC(N)
+
+/* Flag indicating a request generator uses a virtual function
+ number. */
+#define GXIO_TRIO_FLAG_VFUNC(N) HV_TRIO_FLAG_VFUNC(N)
+
+/*****************************************************************
+ * Memory Registration *
+ ******************************************************************/
+
+/* Allocate Application Space Identifiers (ASIDs). Each ASID can
+ * register up to 16 page translations. ASIDs are used by memory map
+ * regions, scatter queues, and DMA queues to translate application
+ * VAs into memory system PAs.
+ *
+ * @param context An initialized TRIO context.
+ * @param count Number of ASIDs required.
+ * @param first Index of first ASID if ::GXIO_TRIO_ALLOC_FIXED flag
+ * is set, otherwise ignored.
+ * @param flags Flag bits, including bits from ::gxio_trio_alloc_flags_e.
+ * @return Index of first ASID, or ::GXIO_TRIO_ERR_NO_ASID if allocation
+ * failed.
+ */
+extern int gxio_trio_alloc_asids(gxio_trio_context_t *context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
+#endif /* ! _GXIO_TRIO_H_ */
diff --git a/arch/tile/include/gxio/uart.h b/arch/tile/include/gxio/uart.h
new file mode 100644
index 00000000000..438ee7e46c7
--- /dev/null
+++ b/arch/tile/include/gxio/uart.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _GXIO_UART_H_
+#define _GXIO_UART_H_
+
+#include "common.h"
+
+#include <hv/drv_uart_intf.h>
+#include <hv/iorpc.h>
+
+/*
+ *
+ * An API for manipulating UART interface.
+ */
+
+/*
+ *
+ * The Rshim allows access to the processor's UART interface.
+ */
+
+/* A context object used to manage UART resources. */
+typedef struct {
+
+ /* File descriptor for calling up to the hypervisor. */
+ int fd;
+
+ /* The VA at which our MMIO registers are mapped. */
+ char *mmio_base;
+
+} gxio_uart_context_t;
+
+/* Request UART interrupts.
+ *
+ * Request that interrupts be delivered to a tile when the UART's
+ * Receive FIFO is written, or the Write FIFO is read.
+ *
+ * @param context Pointer to a properly initialized gxio_uart_context_t.
+ * @param bind_cpu_x X coordinate of CPU to which interrupt will be delivered.
+ * @param bind_cpu_y Y coordinate of CPU to which interrupt will be delivered.
+ * @param bind_interrupt IPI interrupt number.
+ * @param bind_event Sub-interrupt event bit number; a negative value can
+ * disable the interrupt.
+ * @return Zero if all of the requested UART events were successfully
+ * configured to interrupt.
+ */
+extern int gxio_uart_cfg_interrupt(gxio_uart_context_t *context,
+ int bind_cpu_x,
+ int bind_cpu_y,
+ int bind_interrupt, int bind_event);
+
+/* Initialize a UART context.
+ *
+ * A properly initialized context must be obtained before any of the other
+ * gxio_uart routines may be used.
+ *
+ * @param context Pointer to a gxio_uart_context_t, which will be initialized
+ * by this routine, if it succeeds.
+ * @param uart_index Index of the UART to use.
+ * @return Zero if the context was successfully initialized, else a
+ * GXIO_ERR_xxx error code.
+ */
+extern int gxio_uart_init(gxio_uart_context_t *context, int uart_index);
+
+/* Destroy a UART context.
+ *
+ * Once destroyed, a context may not be used with any gxio_uart routines
+ * other than gxio_uart_init(). After this routine returns, no further
+ * interrupts requested on this context will be delivered. The state and
+ * configuration of the pins which had been attached to this context are
+ * unchanged by this operation.
+ *
+ * @param context Pointer to a gxio_uart_context_t.
+ * @return Zero if the context was successfully destroyed, else a
+ * GXIO_ERR_xxx error code.
+ */
+extern int gxio_uart_destroy(gxio_uart_context_t *context);
+
+/* Write UART register.
+ * @param context Pointer to a gxio_uart_context_t.
+ * @param offset UART register offset.
+ * @param word Data will be wrote to UART reigister.
+ */
+extern void gxio_uart_write(gxio_uart_context_t *context, uint64_t offset,
+ uint64_t word);
+
+/* Read UART register.
+ * @param context Pointer to a gxio_uart_context_t.
+ * @param offset UART register offset.
+ * @return Data read from UART register.
+ */
+extern uint64_t gxio_uart_read(gxio_uart_context_t *context, uint64_t offset);
+
+#endif /* _GXIO_UART_H_ */
diff --git a/arch/tile/include/gxio/usb_host.h b/arch/tile/include/gxio/usb_host.h
new file mode 100644
index 00000000000..93c9636d2dd
--- /dev/null
+++ b/arch/tile/include/gxio/usb_host.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+#ifndef _GXIO_USB_H_
+#define _GXIO_USB_H_
+
+#include <gxio/common.h>
+
+#include <hv/drv_usb_host_intf.h>
+#include <hv/iorpc.h>
+
+/*
+ *
+ * An API for manipulating general-purpose I/O pins.
+ */
+
+/*
+ *
+ * The USB shim allows access to the processor's Universal Serial Bus
+ * connections.
+ */
+
+/* A context object used to manage USB hardware resources. */
+typedef struct {
+
+ /* File descriptor for calling up to the hypervisor. */
+ int fd;
+
+ /* The VA at which our MMIO registers are mapped. */
+ char *mmio_base;
+} gxio_usb_host_context_t;
+
+/* Initialize a USB context.
+ *
+ * A properly initialized context must be obtained before any of the other
+ * gxio_usb_host routines may be used.
+ *
+ * @param context Pointer to a gxio_usb_host_context_t, which will be
+ * initialized by this routine, if it succeeds.
+ * @param usb_index Index of the USB shim to use.
+ * @param is_ehci Nonzero to use the EHCI interface; zero to use the OHCI
+ * intereface.
+ * @return Zero if the context was successfully initialized, else a
+ * GXIO_ERR_xxx error code.
+ */
+extern int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index,
+ int is_ehci);
+
+/* Destroy a USB context.
+ *
+ * Once destroyed, a context may not be used with any gxio_usb_host routines
+ * other than gxio_usb_host_init(). After this routine returns, no further
+ * interrupts or signals requested on this context will be delivered. The
+ * state and configuration of the pins which had been attached to this
+ * context are unchanged by this operation.
+ *
+ * @param context Pointer to a gxio_usb_host_context_t.
+ * @return Zero if the context was successfully destroyed, else a
+ * GXIO_ERR_xxx error code.
+ */
+extern int gxio_usb_host_destroy(gxio_usb_host_context_t *context);
+
+/* Retrieve the address of the shim's MMIO registers.
+ *
+ * @param context Pointer to a properly initialized gxio_usb_host_context_t.
+ * @return The address of the shim's MMIO registers.
+ */
+extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context);
+
+/* Retrieve the length of the shim's MMIO registers.
+ *
+ * @param context Pointer to a properly initialized gxio_usb_host_context_t.
+ * @return The length of the shim's MMIO registers.
+ */
+extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context);
+
+#endif /* _GXIO_USB_H_ */
diff --git a/arch/tile/include/hv/drv_mpipe_intf.h b/arch/tile/include/hv/drv_mpipe_intf.h
new file mode 100644
index 00000000000..c97e416dd96
--- /dev/null
+++ b/arch/tile/include/hv/drv_mpipe_intf.h
@@ -0,0 +1,605 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * Interface definitions for the mpipe driver.
+ */
+
+#ifndef _SYS_HV_DRV_MPIPE_INTF_H
+#define _SYS_HV_DRV_MPIPE_INTF_H
+
+#include <arch/mpipe.h>
+#include <arch/mpipe_constants.h>
+
+
+/** Number of mPIPE instances supported */
+#define HV_MPIPE_INSTANCE_MAX (2)
+
+/** Number of buffer stacks (32). */
+#define HV_MPIPE_NUM_BUFFER_STACKS \
+ (MPIPE_MMIO_INIT_DAT_GX36_1__BUFFER_STACK_MASK_WIDTH)
+
+/** Number of NotifRings (256). */
+#define HV_MPIPE_NUM_NOTIF_RINGS (MPIPE_NUM_NOTIF_RINGS)
+
+/** Number of NotifGroups (32). */
+#define HV_MPIPE_NUM_NOTIF_GROUPS (MPIPE_NUM_NOTIF_GROUPS)
+
+/** Number of buckets (4160). */
+#define HV_MPIPE_NUM_BUCKETS (MPIPE_NUM_BUCKETS)
+
+/** Number of "lo" buckets (4096). */
+#define HV_MPIPE_NUM_LO_BUCKETS 4096
+
+/** Number of "hi" buckets (64). */
+#define HV_MPIPE_NUM_HI_BUCKETS \
+ (HV_MPIPE_NUM_BUCKETS - HV_MPIPE_NUM_LO_BUCKETS)
+
+/** Number of edma rings (24). */
+#define HV_MPIPE_NUM_EDMA_RINGS \
+ (MPIPE_MMIO_INIT_DAT_GX36_1__EDMA_POST_MASK_WIDTH)
+
+
+
+
+/** A flag bit indicating a fixed resource allocation. */
+#define HV_MPIPE_ALLOC_FIXED 0x01
+
+/** Offset for the config register MMIO region. */
+#define HV_MPIPE_CONFIG_MMIO_OFFSET \
+ (MPIPE_MMIO_ADDR__REGION_VAL_CFG << MPIPE_MMIO_ADDR__REGION_SHIFT)
+
+/** Size of the config register MMIO region. */
+#define HV_MPIPE_CONFIG_MMIO_SIZE (64 * 1024)
+
+/** Offset for the config register MMIO region. */
+#define HV_MPIPE_FAST_MMIO_OFFSET \
+ (MPIPE_MMIO_ADDR__REGION_VAL_IDMA << MPIPE_MMIO_ADDR__REGION_SHIFT)
+
+/** Size of the fast register MMIO region (IDMA, EDMA, buffer stack). */
+#define HV_MPIPE_FAST_MMIO_SIZE \
+ ((MPIPE_MMIO_ADDR__REGION_VAL_BSM + 1 - MPIPE_MMIO_ADDR__REGION_VAL_IDMA) \
+ << MPIPE_MMIO_ADDR__REGION_SHIFT)
+
+
+/*
+ * Each type of resource allocation comes in quantized chunks, where
+ * XXX_BITS is the number of chunks, and XXX_RES_PER_BIT is the number
+ * of resources in each chunk.
+ */
+
+/** Number of buffer stack chunks available (32). */
+#define HV_MPIPE_ALLOC_BUFFER_STACKS_BITS \
+ MPIPE_MMIO_INIT_DAT_GX36_1__BUFFER_STACK_MASK_WIDTH
+
+/** Granularity of buffer stack allocation (1). */
+#define HV_MPIPE_ALLOC_BUFFER_STACKS_RES_PER_BIT \
+ (HV_MPIPE_NUM_BUFFER_STACKS / HV_MPIPE_ALLOC_BUFFER_STACKS_BITS)
+
+/** Number of NotifRing chunks available (32). */
+#define HV_MPIPE_ALLOC_NOTIF_RINGS_BITS \
+ MPIPE_MMIO_INIT_DAT_GX36_0__NOTIF_RING_MASK_WIDTH
+
+/** Granularity of NotifRing allocation (8). */
+#define HV_MPIPE_ALLOC_NOTIF_RINGS_RES_PER_BIT \
+ (HV_MPIPE_NUM_NOTIF_RINGS / HV_MPIPE_ALLOC_NOTIF_RINGS_BITS)
+
+/** Number of NotifGroup chunks available (32). */
+#define HV_MPIPE_ALLOC_NOTIF_GROUPS_BITS \
+ HV_MPIPE_NUM_NOTIF_GROUPS
+
+/** Granularity of NotifGroup allocation (1). */
+#define HV_MPIPE_ALLOC_NOTIF_GROUPS_RES_PER_BIT \
+ (HV_MPIPE_NUM_NOTIF_GROUPS / HV_MPIPE_ALLOC_NOTIF_GROUPS_BITS)
+
+/** Number of lo bucket chunks available (16). */
+#define HV_MPIPE_ALLOC_LO_BUCKETS_BITS \
+ MPIPE_MMIO_INIT_DAT_GX36_0__BUCKET_RELEASE_MASK_LO_WIDTH
+
+/** Granularity of lo bucket allocation (256). */
+#define HV_MPIPE_ALLOC_LO_BUCKETS_RES_PER_BIT \
+ (HV_MPIPE_NUM_LO_BUCKETS / HV_MPIPE_ALLOC_LO_BUCKETS_BITS)
+
+/** Number of hi bucket chunks available (16). */
+#define HV_MPIPE_ALLOC_HI_BUCKETS_BITS \
+ MPIPE_MMIO_INIT_DAT_GX36_0__BUCKET_RELEASE_MASK_HI_WIDTH
+
+/** Granularity of hi bucket allocation (4). */
+#define HV_MPIPE_ALLOC_HI_BUCKETS_RES_PER_BIT \
+ (HV_MPIPE_NUM_HI_BUCKETS / HV_MPIPE_ALLOC_HI_BUCKETS_BITS)
+
+/** Number of eDMA ring chunks available (24). */
+#define HV_MPIPE_ALLOC_EDMA_RINGS_BITS \
+ MPIPE_MMIO_INIT_DAT_GX36_1__EDMA_POST_MASK_WIDTH
+
+/** Granularity of eDMA ring allocation (1). */
+#define HV_MPIPE_ALLOC_EDMA_RINGS_RES_PER_BIT \
+ (HV_MPIPE_NUM_EDMA_RINGS / HV_MPIPE_ALLOC_EDMA_RINGS_BITS)
+
+
+
+
+/** Bit vector encoding which NotifRings are in a NotifGroup. */
+typedef struct
+{
+ /** The actual bits. */
+ uint64_t ring_mask[4];
+
+} gxio_mpipe_notif_group_bits_t;
+
+
+/** Another name for MPIPE_LBL_INIT_DAT_BSTS_TBL_t. */
+typedef MPIPE_LBL_INIT_DAT_BSTS_TBL_t gxio_mpipe_bucket_info_t;
+
+
+
+/** Eight buffer stack ids. */
+typedef struct
+{
+ /** The stacks. */
+ uint8_t stacks[8];
+
+} gxio_mpipe_rules_stacks_t;
+
+
+/** A destination mac address. */
+typedef struct
+{
+ /** The octets. */
+ uint8_t octets[6];
+
+} gxio_mpipe_rules_dmac_t;
+
+
+/** A vlan. */
+typedef uint16_t gxio_mpipe_rules_vlan_t;
+
+
+
+/** Maximum number of characters in a link name. */
+#define GXIO_MPIPE_LINK_NAME_LEN 32
+
+
+/** Structure holding a link name. Only needed, and only typedef'ed,
+ * because the IORPC stub generator only handles types which are single
+ * words coming before the parameter name. */
+typedef struct
+{
+ /** The name itself. */
+ char name[GXIO_MPIPE_LINK_NAME_LEN];
+}
+_gxio_mpipe_link_name_t;
+
+/** Maximum number of characters in a symbol name. */
+#define GXIO_MPIPE_SYMBOL_NAME_LEN 128
+
+
+/** Structure holding a symbol name. Only needed, and only typedef'ed,
+ * because the IORPC stub generator only handles types which are single
+ * words coming before the parameter name. */
+typedef struct
+{
+ /** The name itself. */
+ char name[GXIO_MPIPE_SYMBOL_NAME_LEN];
+}
+_gxio_mpipe_symbol_name_t;
+
+
+/** Structure holding a MAC address. */
+typedef struct
+{
+ /** The address. */
+ uint8_t mac[6];
+}
+_gxio_mpipe_link_mac_t;
+
+
+
+/** Request shared data permission -- that is, the ability to send and
+ * receive packets -- on the specified link. Other processes may also
+ * request shared data permission on the same link.
+ *
+ * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA,
+ * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open()
+ * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed.
+ */
+#define GXIO_MPIPE_LINK_DATA 0x00000001UL
+
+/** Do not request data permission on the specified link.
+ *
+ * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA,
+ * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open()
+ * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed.
+ */
+#define GXIO_MPIPE_LINK_NO_DATA 0x00000002UL
+
+/** Request exclusive data permission -- that is, the ability to send and
+ * receive packets -- on the specified link. No other processes may
+ * request data permission on this link, and if any process already has
+ * data permission on it, this open will fail.
+ *
+ * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA,
+ * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open()
+ * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed.
+ */
+#define GXIO_MPIPE_LINK_EXCL_DATA 0x00000004UL
+
+/** Request shared stats permission -- that is, the ability to read and write
+ * registers which contain link statistics, and to get link attributes --
+ * on the specified link. Other processes may also request shared stats
+ * permission on the same link.
+ *
+ * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS,
+ * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open()
+ * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed.
+ */
+#define GXIO_MPIPE_LINK_STATS 0x00000008UL
+
+/** Do not request stats permission on the specified link.
+ *
+ * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS,
+ * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open()
+ * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed.
+ */
+#define GXIO_MPIPE_LINK_NO_STATS 0x00000010UL
+
+/** Request exclusive stats permission -- that is, the ability to read and
+ * write registers which contain link statistics, and to get link
+ * attributes -- on the specified link. No other processes may request
+ * stats permission on this link, and if any process already
+ * has stats permission on it, this open will fail.
+ *
+ * Requesting exclusive stats permission is normally a very bad idea, since
+ * it prevents programs like mpipe-stat from providing information on this
+ * link. Applications should only do this if they use MAC statistics
+ * registers, and cannot tolerate any of the clear-on-read registers being
+ * reset by other statistics programs.
+ *
+ * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS,
+ * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open()
+ * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed.
+ */
+#define GXIO_MPIPE_LINK_EXCL_STATS 0x00000020UL
+
+/** Request shared control permission -- that is, the ability to modify link
+ * attributes, and read and write MAC and MDIO registers -- on the
+ * specified link. Other processes may also request shared control
+ * permission on the same link.
+ *
+ * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL,
+ * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open()
+ * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed.
+ */
+#define GXIO_MPIPE_LINK_CTL 0x00000040UL
+
+/** Do not request control permission on the specified link.
+ *
+ * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL,
+ * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open()
+ * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed.
+ */
+#define GXIO_MPIPE_LINK_NO_CTL 0x00000080UL
+
+/** Request exclusive control permission -- that is, the ability to modify
+ * link attributes, and read and write MAC and MDIO registers -- on the
+ * specified link. No other processes may request control permission on
+ * this link, and if any process already has control permission on it,
+ * this open will fail.
+ *
+ * Requesting exclusive control permission is not always a good idea, since
+ * it prevents programs like mpipe-link from configuring the link.
+ *
+ * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL,
+ * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open()
+ * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed.
+ */
+#define GXIO_MPIPE_LINK_EXCL_CTL 0x00000100UL
+
+/** Set the desired state of the link to up, allowing any speeds which are
+ * supported by the link hardware, as part of this open operation; do not
+ * change the desired state of the link when it is closed or the process
+ * exits. No more than one of ::GXIO_MPIPE_LINK_AUTO_UP,
+ * ::GXIO_MPIPE_LINK_AUTO_UPDOWN, ::GXIO_MPIPE_LINK_AUTO_DOWN, or
+ * ::GXIO_MPIPE_LINK_AUTO_NONE may be specifed in a gxio_mpipe_link_open()
+ * call. If none are specified, ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed.
+ */
+#define GXIO_MPIPE_LINK_AUTO_UP 0x00000200UL
+
+/** Set the desired state of the link to up, allowing any speeds which are
+ * supported by the link hardware, as part of this open operation; when the
+ * link is closed or this process exits, if no other process has the link
+ * open, set the desired state of the link to down. No more than one of
+ * ::GXIO_MPIPE_LINK_AUTO_UP, ::GXIO_MPIPE_LINK_AUTO_UPDOWN,
+ * ::GXIO_MPIPE_LINK_AUTO_DOWN, or ::GXIO_MPIPE_LINK_AUTO_NONE may be
+ * specifed in a gxio_mpipe_link_open() call. If none are specified,
+ * ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed.
+ */
+#define GXIO_MPIPE_LINK_AUTO_UPDOWN 0x00000400UL
+
+/** Do not change the desired state of the link as part of the open
+ * operation; when the link is closed or this process exits, if no other
+ * process has the link open, set the desired state of the link to down.
+ * No more than one of ::GXIO_MPIPE_LINK_AUTO_UP,
+ * ::GXIO_MPIPE_LINK_AUTO_UPDOWN, ::GXIO_MPIPE_LINK_AUTO_DOWN, or
+ * ::GXIO_MPIPE_LINK_AUTO_NONE may be specifed in a gxio_mpipe_link_open()
+ * call. If none are specified, ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed.
+ */
+#define GXIO_MPIPE_LINK_AUTO_DOWN 0x00000800UL
+
+/** Do not change the desired state of the link as part of the open
+ * operation; do not change the desired state of the link when it is
+ * closed or the process exits. No more than one of
+ * ::GXIO_MPIPE_LINK_AUTO_UP, ::GXIO_MPIPE_LINK_AUTO_UPDOWN,
+ * ::GXIO_MPIPE_LINK_AUTO_DOWN, or ::GXIO_MPIPE_LINK_AUTO_NONE may be
+ * specifed in a gxio_mpipe_link_open() call. If none are specified,
+ * ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed.
+ */
+#define GXIO_MPIPE_LINK_AUTO_NONE 0x00001000UL
+
+/** Request that this open call not complete until the network link is up.
+ * The process will wait as long as necessary for this to happen;
+ * applications which wish to abandon waiting for the link after a
+ * specific time period should not specify this flag when opening a link,
+ * but should instead call gxio_mpipe_link_wait() afterward. The link
+ * must be opened with stats permission. Note that this flag by itself
+ * does not change the desired link state; if other open flags or previous
+ * link state changes have not requested a desired state of up, the open
+ * call will never complete. This flag is not available to kernel
+ * clients.
+ */
+#define GXIO_MPIPE_LINK_WAIT 0x00002000UL
+
+
+/*
+ * Note: link attributes must fit in 24 bits, since we use the top 8 bits
+ * of the IORPC offset word for the channel number.
+ */
+
+/** Determine whether jumbo frames may be received. If this attribute's
+ * value value is nonzero, the MAC will accept frames of up to 10240 bytes.
+ * If the value is zero, the MAC will only accept frames of up to 1544
+ * bytes. The default value is zero. */
+#define GXIO_MPIPE_LINK_RECEIVE_JUMBO 0x010000
+
+/** Determine whether to send pause frames on this link if the mPIPE packet
+ * FIFO is nearly full. If the value is zero, pause frames are not sent.
+ * If the value is nonzero, it is the delay value which will be sent in any
+ * pause frames which are output, in units of 512 bit times.
+ *
+ * Bear in mind that in almost all circumstances, the mPIPE packet FIFO
+ * will never fill up, since mPIPE will empty it as fast as or faster than
+ * the incoming data rate, by either delivering or dropping packets. The
+ * only situation in which this is not true is if the memory and cache
+ * subsystem is extremely heavily loaded, and mPIPE cannot perform DMA of
+ * packet data to memory in a timely fashion. In particular, pause frames
+ * will <em>not</em> be sent if packets cannot be delivered because
+ * NotifRings are full, buckets are full, or buffers are not available in
+ * a buffer stack. */
+#define GXIO_MPIPE_LINK_SEND_PAUSE 0x020000
+
+/** Determine whether to suspend output on the receipt of pause frames.
+ * If the value is nonzero, mPIPE shim will suspend output on the link's
+ * channel when a pause frame is received. If the value is zero, pause
+ * frames will be ignored. The default value is zero. */
+#define GXIO_MPIPE_LINK_RECEIVE_PAUSE 0x030000
+
+/** Interface MAC address. The value is a 6-byte MAC address, in the least
+ * significant 48 bits of the value; in other words, an address which would
+ * be printed as '12:34:56:78:90:AB' in IEEE 802 canonical format would
+ * be returned as 0x12345678ab.
+ *
+ * Depending upon the overall system design, a MAC address may or may not
+ * be available for each interface. Note that the interface's MAC address
+ * does not limit the packets received on its channel, although the
+ * classifier's rules could be configured to do that. Similarly, the MAC
+ * address is not used when transmitting packets, although applications
+ * could certainly decide to use the assigned address as a source MAC
+ * address when doing so. This attribute may only be retrieved with
+ * gxio_mpipe_link_get_attr(); it may not be modified.
+ */
+#define GXIO_MPIPE_LINK_MAC 0x040000
+
+/** Determine whether to discard egress packets on link down. If this value
+ * is nonzero, packets sent on this link while the link is down will be
+ * discarded. If this value is zero, no packets will be sent on this link
+ * while it is down. The default value is one. */
+#define GXIO_MPIPE_LINK_DISCARD_IF_DOWN 0x050000
+
+/** Possible link state. The value is a combination of link state flags,
+ * ORed together, that indicate link modes which are actually supported by
+ * the hardware. This attribute may only be retrieved with
+ * gxio_mpipe_link_get_attr(); it may not be modified. */
+#define GXIO_MPIPE_LINK_POSSIBLE_STATE 0x060000
+
+/** Current link state. The value is a combination of link state flags,
+ * ORed together, that indicate the current state of the hardware. If the
+ * link is down, the value ANDed with ::GXIO_MPIPE_LINK_SPEED will be zero;
+ * if the link is up, the value ANDed with ::GXIO_MPIPE_LINK_SPEED will
+ * result in exactly one of the speed values, indicating the current speed.
+ * This attribute may only be retrieved with gxio_mpipe_link_get_attr(); it
+ * may not be modified. */
+#define GXIO_MPIPE_LINK_CURRENT_STATE 0x070000
+
+/** Desired link state. The value is a conbination of flags, which specify
+ * the desired state for the link. With gxio_mpipe_link_set_attr(), this
+ * will, in the background, attempt to bring up the link using whichever of
+ * the requested flags are reasonable, or take down the link if the flags
+ * are zero. The actual link up or down operation may happen after this
+ * call completes. If the link state changes in the future, the system
+ * will continue to try to get back to the desired link state; for
+ * instance, if the link is brought up successfully, and then the network
+ * cable is disconnected, the link will go down. However, the desired
+ * state of the link is still up, so if the cable is reconnected, the link
+ * will be brought up again.
+ *
+ * With gxio_mpipe_link_set_attr(), this will indicate the desired state
+ * for the link, as set with a previous gxio_mpipe_link_set_attr() call,
+ * or implicitly by a gxio_mpipe_link_open() or link close operation.
+ * This may not reflect the current state of the link; to get that, use
+ * ::GXIO_MPIPE_LINK_CURRENT_STATE.
+ */
+#define GXIO_MPIPE_LINK_DESIRED_STATE 0x080000
+
+
+
+/** Link can run, should run, or is running at 10 Mbps. */
+#define GXIO_MPIPE_LINK_10M 0x0000000000000001UL
+
+/** Link can run, should run, or is running at 100 Mbps. */
+#define GXIO_MPIPE_LINK_100M 0x0000000000000002UL
+
+/** Link can run, should run, or is running at 1 Gbps. */
+#define GXIO_MPIPE_LINK_1G 0x0000000000000004UL
+
+/** Link can run, should run, or is running at 10 Gbps. */
+#define GXIO_MPIPE_LINK_10G 0x0000000000000008UL
+
+/** Link can run, should run, or is running at 20 Gbps. */
+#define GXIO_MPIPE_LINK_20G 0x0000000000000010UL
+
+/** Link can run, should run, or is running at 25 Gbps. */
+#define GXIO_MPIPE_LINK_25G 0x0000000000000020UL
+
+/** Link can run, should run, or is running at 50 Gbps. */
+#define GXIO_MPIPE_LINK_50G 0x0000000000000040UL
+
+/** Link should run at the highest speed supported by the link and by
+ * the device connected to the link. Only usable as a value for
+ * the link's desired state; never returned as a value for the current
+ * or possible states. */
+#define GXIO_MPIPE_LINK_ANYSPEED 0x0000000000000800UL
+
+/** All legal link speeds. This value is provided for use in extracting
+ * the speed-related subset of the link state flags; it is not intended
+ * to be set directly as a value for one of the GXIO_MPIPE_LINK_xxx_STATE
+ * attributes. A link is up or is requested to be up if its current or
+ * desired state, respectively, ANDED with this value, is nonzero. */
+#define GXIO_MPIPE_LINK_SPEED_MASK 0x0000000000000FFFUL
+
+/** Link can run, should run, or is running in MAC loopback mode. This
+ * loops transmitted packets back to the receiver, inside the Tile
+ * Processor. */
+#define GXIO_MPIPE_LINK_LOOP_MAC 0x0000000000001000UL
+
+/** Link can run, should run, or is running in PHY loopback mode. This
+ * loops transmitted packets back to the receiver, inside the external
+ * PHY chip. */
+#define GXIO_MPIPE_LINK_LOOP_PHY 0x0000000000002000UL
+
+/** Link can run, should run, or is running in external loopback mode.
+ * This requires that an external loopback plug be installed on the
+ * Ethernet port. Note that only some links require that this be
+ * configured via the gxio_mpipe_link routines; other links can do
+ * external loopack with the plug and no special configuration. */
+#define GXIO_MPIPE_LINK_LOOP_EXT 0x0000000000004000UL
+
+/** All legal loopback types. */
+#define GXIO_MPIPE_LINK_LOOP_MASK 0x000000000000F000UL
+
+/** Link can run, should run, or is running in full-duplex mode.
+ * If neither ::GXIO_MPIPE_LINK_FDX nor ::GXIO_MPIPE_LINK_HDX are
+ * specified in a set of desired state flags, both are assumed. */
+#define GXIO_MPIPE_LINK_FDX 0x0000000000010000UL
+
+/** Link can run, should run, or is running in half-duplex mode.
+ * If neither ::GXIO_MPIPE_LINK_FDX nor ::GXIO_MPIPE_LINK_HDX are
+ * specified in a set of desired state flags, both are assumed. */
+#define GXIO_MPIPE_LINK_HDX 0x0000000000020000UL
+
+
+/** An individual rule. */
+typedef struct
+{
+ /** The total size. */
+ uint16_t size;
+
+ /** The priority. */
+ int16_t priority;
+
+ /** The "headroom" in each buffer. */
+ uint8_t headroom;
+
+ /** The "tailroom" in each buffer. */
+ uint8_t tailroom;
+
+ /** The "capacity" of the largest buffer. */
+ uint16_t capacity;
+
+ /** The mask for converting a flow hash into a bucket. */
+ uint16_t bucket_mask;
+
+ /** The offset for converting a flow hash into a bucket. */
+ uint16_t bucket_first;
+
+ /** The buffer stack ids. */
+ gxio_mpipe_rules_stacks_t stacks;
+
+ /** The actual channels. */
+ uint32_t channel_bits;
+
+ /** The number of dmacs. */
+ uint16_t num_dmacs;
+
+ /** The number of vlans. */
+ uint16_t num_vlans;
+
+ /** The actual dmacs and vlans. */
+ uint8_t dmacs_and_vlans[];
+
+} gxio_mpipe_rules_rule_t;
+
+
+/** A list of classifier rules. */
+typedef struct
+{
+ /** The offset to the end of the current rule. */
+ uint16_t tail;
+
+ /** The offset to the start of the current rule. */
+ uint16_t head;
+
+ /** The actual rules. */
+ uint8_t rules[4096 - 4];
+
+} gxio_mpipe_rules_list_t;
+
+
+
+
+/** mPIPE statistics structure. These counters include all relevant
+ * events occurring on all links within the mPIPE shim. */
+typedef struct
+{
+ /** Number of ingress packets dropped for any reason. */
+ uint64_t ingress_drops;
+ /** Number of ingress packets dropped because a buffer stack was empty. */
+ uint64_t ingress_drops_no_buf;
+ /** Number of ingress packets dropped or truncated due to lack of space in
+ * the iPkt buffer. */
+ uint64_t ingress_drops_ipkt;
+ /** Number of ingress packets dropped by the classifier or load balancer */
+ uint64_t ingress_drops_cls_lb;
+ /** Total number of ingress packets. */
+ uint64_t ingress_packets;
+ /** Total number of egress packets. */
+ uint64_t egress_packets;
+ /** Total number of ingress bytes. */
+ uint64_t ingress_bytes;
+ /** Total number of egress bytes. */
+ uint64_t egress_bytes;
+}
+gxio_mpipe_stats_t;
+
+
+#endif /* _SYS_HV_DRV_MPIPE_INTF_H */
diff --git a/arch/tile/include/hv/drv_trio_intf.h b/arch/tile/include/hv/drv_trio_intf.h
new file mode 100644
index 00000000000..237e04dee66
--- /dev/null
+++ b/arch/tile/include/hv/drv_trio_intf.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * Interface definitions for the trio driver.
+ */
+
+#ifndef _SYS_HV_DRV_TRIO_INTF_H
+#define _SYS_HV_DRV_TRIO_INTF_H
+
+#include <arch/trio.h>
+
+/** The vendor ID for all Tilera processors. */
+#define TILERA_VENDOR_ID 0x1a41
+
+/** The device ID for the Gx36 processor. */
+#define TILERA_GX36_DEV_ID 0x0200
+
+/** Device ID for our internal bridge when running as RC. */
+#define TILERA_GX36_RC_DEV_ID 0x2000
+
+/** Maximum number of TRIO interfaces. */
+#define TILEGX_NUM_TRIO 2
+
+/** Gx36 has max 3 PCIe MACs per TRIO interface. */
+#define TILEGX_TRIO_PCIES 3
+
+/** Specify port properties for a PCIe MAC. */
+struct pcie_port_property
+{
+ /** If true, the link can be configured in PCIe root complex mode. */
+ uint8_t allow_rc: 1;
+
+ /** If true, the link can be configured in PCIe endpoint mode. */
+ uint8_t allow_ep: 1;
+
+ /** If true, the link can be configured in StreamIO mode. */
+ uint8_t allow_sio: 1;
+
+ /** If true, the link is allowed to support 1-lane operation. Software
+ * will not consider it an error if the link comes up as a x1 link. */
+ uint8_t allow_x1: 1;
+
+ /** If true, the link is allowed to support 2-lane operation. Software
+ * will not consider it an error if the link comes up as a x2 link. */
+ uint8_t allow_x2: 1;
+
+ /** If true, the link is allowed to support 4-lane operation. Software
+ * will not consider it an error if the link comes up as a x4 link. */
+ uint8_t allow_x4: 1;
+
+ /** If true, the link is allowed to support 8-lane operation. Software
+ * will not consider it an error if the link comes up as a x8 link. */
+ uint8_t allow_x8: 1;
+
+ /** If true, this link is connected to a device which may or may not
+ * be present. */
+ uint8_t removable: 1;
+
+};
+
+/** Configurations can be issued to configure a char stream interrupt. */
+typedef enum pcie_stream_intr_config_sel_e
+{
+ /** Interrupt configuration for memory map regions. */
+ MEM_MAP_SEL,
+
+ /** Interrupt configuration for push DMAs. */
+ PUSH_DMA_SEL,
+
+ /** Interrupt configuration for pull DMAs. */
+ PULL_DMA_SEL,
+}
+pcie_stream_intr_config_sel_t;
+
+
+/** The mmap file offset (PA) of the TRIO config region. */
+#define HV_TRIO_CONFIG_OFFSET \
+ ((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_CFG << \
+ TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT)
+
+/** The maximum size of the TRIO config region. */
+#define HV_TRIO_CONFIG_SIZE \
+ (1ULL << TRIO_CFG_REGION_ADDR__REGION_SHIFT)
+
+/** Size of the config region mapped into client. We can't use
+ * TRIO_MMIO_ADDRESS_SPACE__OFFSET_WIDTH because it
+ * will require the kernel to allocate 4GB VA space
+ * from the VMALLOC region which has a total range
+ * of 4GB.
+ */
+#define HV_TRIO_CONFIG_IOREMAP_SIZE \
+ ((uint64_t) 1 << TRIO_CFG_REGION_ADDR__PROT_SHIFT)
+
+/** The mmap file offset (PA) of a scatter queue region. */
+#define HV_TRIO_SQ_OFFSET(queue) \
+ (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_MAP_SQ << \
+ TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \
+ ((queue) << TRIO_MAP_SQ_REGION_ADDR__SQ_SEL_SHIFT))
+
+/** The maximum size of a scatter queue region. */
+#define HV_TRIO_SQ_SIZE \
+ (1ULL << TRIO_MAP_SQ_REGION_ADDR__SQ_SEL_SHIFT)
+
+
+/** The "hardware MMIO region" of the first PIO region. */
+#define HV_TRIO_FIRST_PIO_REGION 8
+
+/** The mmap file offset (PA) of a PIO region. */
+#define HV_TRIO_PIO_OFFSET(region) \
+ (((unsigned long long)(region) + HV_TRIO_FIRST_PIO_REGION) \
+ << TRIO_PIO_REGIONS_ADDR__REGION_SHIFT)
+
+/** The maximum size of a PIO region. */
+#define HV_TRIO_PIO_SIZE (1ULL << TRIO_PIO_REGIONS_ADDR__ADDR_WIDTH)
+
+
+/** The mmap file offset (PA) of a push DMA region. */
+#define HV_TRIO_PUSH_DMA_OFFSET(ring) \
+ (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_PUSH_DMA << \
+ TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \
+ ((ring) << TRIO_PUSH_DMA_REGION_ADDR__RING_SEL_SHIFT))
+
+/** The mmap file offset (PA) of a pull DMA region. */
+#define HV_TRIO_PULL_DMA_OFFSET(ring) \
+ (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_PULL_DMA << \
+ TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \
+ ((ring) << TRIO_PULL_DMA_REGION_ADDR__RING_SEL_SHIFT))
+
+/** The maximum size of a DMA region. */
+#define HV_TRIO_DMA_REGION_SIZE \
+ (1ULL << TRIO_PUSH_DMA_REGION_ADDR__RING_SEL_SHIFT)
+
+
+/** The mmap file offset (PA) of a Mem-Map interrupt region. */
+#define HV_TRIO_MEM_MAP_INTR_OFFSET(map) \
+ (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_MAP_MEM << \
+ TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \
+ ((map) << TRIO_MAP_MEM_REGION_ADDR__MAP_SEL_SHIFT))
+
+/** The maximum size of a Mem-Map interrupt region. */
+#define HV_TRIO_MEM_MAP_INTR_SIZE \
+ (1ULL << TRIO_MAP_MEM_REGION_ADDR__MAP_SEL_SHIFT)
+
+
+/** A flag bit indicating a fixed resource allocation. */
+#define HV_TRIO_ALLOC_FIXED 0x01
+
+/** TRIO requires that all mappings have 4kB aligned start addresses. */
+#define HV_TRIO_PAGE_SHIFT 12
+
+/** TRIO requires that all mappings have 4kB aligned start addresses. */
+#define HV_TRIO_PAGE_SIZE (1ull << HV_TRIO_PAGE_SHIFT)
+
+
+/* Specify all PCIe port properties for a TRIO. */
+struct pcie_trio_ports_property
+{
+ struct pcie_port_property ports[TILEGX_TRIO_PCIES];
+
+ /** Set if this TRIO belongs to a Gx72 device. */
+ uint8_t is_gx72;
+};
+
+/* Flags indicating traffic class. */
+#define HV_TRIO_FLAG_TC_SHIFT 4
+#define HV_TRIO_FLAG_TC_RMASK 0xf
+#define HV_TRIO_FLAG_TC(N) \
+ ((((N) & HV_TRIO_FLAG_TC_RMASK) + 1) << HV_TRIO_FLAG_TC_SHIFT)
+
+/* Flags indicating virtual functions. */
+#define HV_TRIO_FLAG_VFUNC_SHIFT 8
+#define HV_TRIO_FLAG_VFUNC_RMASK 0xff
+#define HV_TRIO_FLAG_VFUNC(N) \
+ ((((N) & HV_TRIO_FLAG_VFUNC_RMASK) + 1) << HV_TRIO_FLAG_VFUNC_SHIFT)
+
+
+/* Flag indicating an ordered PIO region. */
+#define HV_TRIO_PIO_FLAG_ORDERED (1 << 16)
+
+/* Flags indicating special types of PIO regions. */
+#define HV_TRIO_PIO_FLAG_SPACE_SHIFT 17
+#define HV_TRIO_PIO_FLAG_SPACE_MASK (0x3 << HV_TRIO_PIO_FLAG_SPACE_SHIFT)
+#define HV_TRIO_PIO_FLAG_CONFIG_SPACE (0x1 << HV_TRIO_PIO_FLAG_SPACE_SHIFT)
+#define HV_TRIO_PIO_FLAG_IO_SPACE (0x2 << HV_TRIO_PIO_FLAG_SPACE_SHIFT)
+
+
+#endif /* _SYS_HV_DRV_TRIO_INTF_H */
diff --git a/arch/tile/include/hv/drv_uart_intf.h b/arch/tile/include/hv/drv_uart_intf.h
new file mode 100644
index 00000000000..f5379e2404f
--- /dev/null
+++ b/arch/tile/include/hv/drv_uart_intf.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * Interface definitions for the UART driver.
+ */
+
+#ifndef _SYS_HV_DRV_UART_INTF_H
+#define _SYS_HV_DRV_UART_INTF_H
+
+#include <arch/uart.h>
+
+/** Number of UART ports supported. */
+#define TILEGX_UART_NR 2
+
+/** The mmap file offset (PA) of the UART MMIO region. */
+#define HV_UART_MMIO_OFFSET 0
+
+/** The maximum size of the UARTs MMIO region (64K Bytes). */
+#define HV_UART_MMIO_SIZE (1UL << 16)
+
+#endif /* _SYS_HV_DRV_UART_INTF_H */
diff --git a/arch/tile/include/hv/drv_usb_host_intf.h b/arch/tile/include/hv/drv_usb_host_intf.h
new file mode 100644
index 00000000000..24ce774a3f1
--- /dev/null
+++ b/arch/tile/include/hv/drv_usb_host_intf.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * Interface definitions for the USB host driver.
+ */
+
+#ifndef _SYS_HV_DRV_USB_HOST_INTF_H
+#define _SYS_HV_DRV_USB_HOST_INTF_H
+
+#include <arch/usb_host.h>
+
+
+/** Offset for the EHCI register MMIO region. */
+#define HV_USB_HOST_MMIO_OFFSET_EHCI ((uint64_t) USB_HOST_HCCAPBASE_REG)
+
+/** Offset for the OHCI register MMIO region. */
+#define HV_USB_HOST_MMIO_OFFSET_OHCI ((uint64_t) USB_HOST_OHCD_HC_REVISION_REG)
+
+/** Size of the register MMIO region. This turns out to be the same for
+ * both EHCI and OHCI. */
+#define HV_USB_HOST_MMIO_SIZE ((uint64_t) 0x1000)
+
+/** The number of service domains supported by the USB host shim. */
+#define HV_USB_HOST_NUM_SVC_DOM 1
+
+
+#endif /* _SYS_HV_DRV_USB_HOST_INTF_H */
diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h
index f13188ac281..2a20b266d94 100644
--- a/arch/tile/include/hv/drv_xgbe_intf.h
+++ b/arch/tile/include/hv/drv_xgbe_intf.h
@@ -460,7 +460,7 @@ typedef void* lepp_comp_t;
* linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for
* our page size of exactly 65536. We add one for a "body" fragment.
*/
-#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1)
+#define LEPP_MAX_FRAGS (65536 / HV_DEFAULT_PAGE_SIZE_SMALL + 2 + 1)
/** Total number of bytes needed for an lepp_tso_cmd_t. */
#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index 72ec1e972f1..dfcdeb61ba3 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -17,8 +17,8 @@
* The hypervisor's public API.
*/
-#ifndef _TILE_HV_H
-#define _TILE_HV_H
+#ifndef _HV_HV_H
+#define _HV_HV_H
#include <arch/chip.h>
@@ -42,25 +42,45 @@
*/
#define HV_L1_SPAN (__HV_SIZE_ONE << HV_LOG2_L1_SPAN)
-/** The log2 of the size of small pages, in bytes. This value should
- * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL).
+/** The log2 of the initial size of small pages, in bytes.
+ * See HV_DEFAULT_PAGE_SIZE_SMALL.
*/
-#define HV_LOG2_PAGE_SIZE_SMALL 16
+#define HV_LOG2_DEFAULT_PAGE_SIZE_SMALL 16
-/** The size of small pages, in bytes. This value should be verified
+/** The initial size of small pages, in bytes. This value should be verified
* at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL).
+ * It may also be modified when installing a new context.
*/
-#define HV_PAGE_SIZE_SMALL (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_SMALL)
+#define HV_DEFAULT_PAGE_SIZE_SMALL \
+ (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_SMALL)
-/** The log2 of the size of large pages, in bytes. This value should be
- * verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE).
+/** The log2 of the initial size of large pages, in bytes.
+ * See HV_DEFAULT_PAGE_SIZE_LARGE.
*/
-#define HV_LOG2_PAGE_SIZE_LARGE 24
+#define HV_LOG2_DEFAULT_PAGE_SIZE_LARGE 24
-/** The size of large pages, in bytes. This value should be verified
+/** The initial size of large pages, in bytes. This value should be verified
* at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE).
+ * It may also be modified when installing a new context.
*/
-#define HV_PAGE_SIZE_LARGE (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_LARGE)
+#define HV_DEFAULT_PAGE_SIZE_LARGE \
+ (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_LARGE)
+
+#if CHIP_VA_WIDTH() > 32
+
+/** The log2 of the initial size of jumbo pages, in bytes.
+ * See HV_DEFAULT_PAGE_SIZE_JUMBO.
+ */
+#define HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO 32
+
+/** The initial size of jumbo pages, in bytes. This value should
+ * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO).
+ * It may also be modified when installing a new context.
+ */
+#define HV_DEFAULT_PAGE_SIZE_JUMBO \
+ (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO)
+
+#endif
/** The log2 of the granularity at which page tables must be aligned;
* in other words, the CPA for a page table must have this many zero
@@ -87,7 +107,22 @@
#define HV_DISPATCH_ENTRY_SIZE 32
/** Version of the hypervisor interface defined by this file */
-#define _HV_VERSION 11
+#define _HV_VERSION 13
+
+/** Last version of the hypervisor interface with old hv_init() ABI.
+ *
+ * The change from version 12 to version 13 corresponds to launching
+ * the client by default at PL2 instead of PL1 (corresponding to the
+ * hv itself running at PL3 instead of PL2). To make this explicit,
+ * the hv_init() API was also extended so the client can report its
+ * desired PL, resulting in a more helpful failure diagnostic. If you
+ * call hv_init() with _HV_VERSION_OLD_HV_INIT and omit the client_pl
+ * argument, the hypervisor will assume client_pl = 1.
+ *
+ * Note that this is a deprecated solution and we do not expect to
+ * support clients of the Tilera hypervisor running at PL1 indefinitely.
+ */
+#define _HV_VERSION_OLD_HV_INIT 12
/* Index into hypervisor interface dispatch code blocks.
*
@@ -280,8 +315,14 @@
#define HV_DISPATCH_GET_IPI_PTE 56
#endif
+/** hv_set_pte_super_shift */
+#define HV_DISPATCH_SET_PTE_SUPER_SHIFT 57
+
+/** hv_console_set_ipi */
+#define HV_DISPATCH_CONSOLE_SET_IPI 63
+
/** One more than the largest dispatch value */
-#define _HV_DISPATCH_END 57
+#define _HV_DISPATCH_END 64
#ifndef __ASSEMBLER__
@@ -354,7 +395,11 @@ typedef int HV_Errno;
#ifndef __ASSEMBLER__
/** Pass HV_VERSION to hv_init to request this version of the interface. */
-typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber;
+typedef enum {
+ HV_VERSION = _HV_VERSION,
+ HV_VERSION_OLD_HV_INIT = _HV_VERSION_OLD_HV_INIT,
+
+} HV_VersionNumber;
/** Initializes the hypervisor.
*
@@ -362,9 +407,11 @@ typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber;
* that this program expects, typically HV_VERSION.
* @param chip_num Architecture number of the chip the client was built for.
* @param chip_rev_num Revision number of the chip the client was built for.
+ * @param client_pl Privilege level the client is built for
+ * (not required if interface_version_number == HV_VERSION_OLD_HV_INIT).
*/
void hv_init(HV_VersionNumber interface_version_number,
- int chip_num, int chip_rev_num);
+ int chip_num, int chip_rev_num, int client_pl);
/** Queries we can make for hv_sysconf().
@@ -401,7 +448,18 @@ typedef enum {
* that the temperature has hit an upper limit and is no longer being
* accurately tracked.
*/
- HV_SYSCONF_BOARD_TEMP = 6
+ HV_SYSCONF_BOARD_TEMP = 6,
+
+ /** Legal page size bitmask for hv_install_context().
+ * For example, if 16KB and 64KB small pages are supported,
+ * it would return "HV_CTX_PG_SM_16K | HV_CTX_PG_SM_64K".
+ */
+ HV_SYSCONF_VALID_PAGE_SIZES = 7,
+
+ /** The size of jumbo pages, in bytes.
+ * If no jumbo pages are available, zero will be returned.
+ */
+ HV_SYSCONF_PAGE_SIZE_JUMBO = 8,
} HV_SysconfQuery;
@@ -474,14 +532,36 @@ typedef enum {
HV_CONFSTR_SWITCH_CONTROL = 14,
/** Chip revision level. */
- HV_CONFSTR_CHIP_REV = 15
+ HV_CONFSTR_CHIP_REV = 15,
+
+ /** CPU module part number. */
+ HV_CONFSTR_CPUMOD_PART_NUM = 16,
+
+ /** CPU module serial number. */
+ HV_CONFSTR_CPUMOD_SERIAL_NUM = 17,
+
+ /** CPU module revision level. */
+ HV_CONFSTR_CPUMOD_REV = 18,
+
+ /** Human-readable CPU module description. */
+ HV_CONFSTR_CPUMOD_DESC = 19,
+
+ /** Per-tile hypervisor statistics. When this identifier is specified,
+ * the hv_confstr call takes two extra arguments. The first is the
+ * HV_XY_TO_LOTAR of the target tile's coordinates. The second is
+ * a flag word. The only current flag is the lowest bit, which means
+ * "zero out the stats instead of retrieving them"; in this case the
+ * buffer and buffer length are ignored. */
+ HV_CONFSTR_HV_STATS = 20
} HV_ConfstrQuery;
/** Query a configuration string from the hypervisor.
*
* @param query Identifier for the specific string to be retrieved
- * (HV_CONFSTR_xxx).
+ * (HV_CONFSTR_xxx). Some strings may require or permit extra
+ * arguments to be appended which select specific objects to be
+ * described; see the string descriptions above.
* @param buf Buffer in which to place the string.
* @param len Length of the buffer.
* @return If query is valid, then the length of the corresponding string,
@@ -489,7 +569,7 @@ typedef enum {
* was truncated. If query is invalid, HV_EINVAL. If the specified
* buffer is not writable by the client, HV_EFAULT.
*/
-int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len);
+int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len, ...);
/** Tile coordinate */
typedef struct
@@ -513,6 +593,30 @@ typedef struct
*/
int hv_get_ipi_pte(HV_Coord tile, int pl, HV_PTE* pte);
+/** Configure the console interrupt.
+ *
+ * When the console client interrupt is enabled, the hypervisor will
+ * deliver the specified IPI to the client in the following situations:
+ *
+ * - The console has at least one character available for input.
+ *
+ * - The console can accept new characters for output, and the last call
+ * to hv_console_write() did not write all of the characters requested
+ * by the client.
+ *
+ * Note that in some system configurations, console interrupt will not
+ * be available; clients should be prepared for this routine to fail and
+ * to fall back to periodic console polling in that case.
+ *
+ * @param ipi Index of the IPI register which will receive the interrupt.
+ * @param event IPI event number for console interrupt. If less than 0,
+ * disable the console IPI interrupt.
+ * @param coord Tile to be targeted for console interrupt.
+ * @return 0 on success, otherwise, HV_EINVAL if illegal parameter,
+ * HV_ENOTSUP if console interrupt are not available.
+ */
+int hv_console_set_ipi(int ipi, int event, HV_Coord coord);
+
#else /* !CHIP_HAS_IPI() */
/** A set of interrupts. */
@@ -649,6 +753,12 @@ void hv_set_rtc(HV_RTCTime time);
* new page table does not need to contain any mapping for the
* hv_install_context address itself.
*
+ * At most one HV_CTX_PG_SM_* flag may be specified in "flags";
+ * if multiple flags are specified, HV_EINVAL is returned.
+ * Specifying none of the flags results in using the default page size.
+ * All cores participating in a given client must request the same
+ * page size, or the results are undefined.
+ *
* @param page_table Root of the page table.
* @param access PTE providing info on how to read the page table. This
* value must be consistent between multiple tiles sharing a page table,
@@ -667,8 +777,36 @@ int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid,
#define HV_CTX_DIRECTIO 0x1 /**< Direct I/O requests are accepted from
PL0. */
+#define HV_CTX_PG_SM_4K 0x10 /**< Use 4K small pages, if available. */
+#define HV_CTX_PG_SM_16K 0x20 /**< Use 16K small pages, if available. */
+#define HV_CTX_PG_SM_64K 0x40 /**< Use 64K small pages, if available. */
+#define HV_CTX_PG_SM_MASK 0xf0 /**< Mask of all possible small pages. */
+
#ifndef __ASSEMBLER__
+
+/** Set the number of pages ganged together by HV_PTE_SUPER at a
+ * particular level of the page table.
+ *
+ * The current TILE-Gx hardware only supports powers of four
+ * (i.e. log2_count must be a multiple of two), and the requested
+ * "super" page size must be less than the span of the next level in
+ * the page table. The largest size that can be requested is 64GB.
+ *
+ * The shift value is initially "0" for all page table levels,
+ * indicating that the HV_PTE_SUPER bit is effectively ignored.
+ *
+ * If you change the count from one non-zero value to another, the
+ * hypervisor will flush the entire TLB and TSB to avoid confusion.
+ *
+ * @param level Page table level (0, 1, or 2)
+ * @param log2_count Base-2 log of the number of pages to gang together,
+ * i.e. how much to shift left the base page size for the super page size.
+ * @return Zero on success, or a hypervisor error code on failure.
+ */
+int hv_set_pte_super_shift(int level, int log2_count);
+
+
/** Value returned from hv_inquire_context(). */
typedef struct
{
@@ -1238,11 +1376,14 @@ HV_Errno hv_set_command_line(HV_VirtAddr buf, int length);
* with the existing priority pages) or "red/black" (if they don't).
* The bitmask provides information on which parts of the cache
* have been used for pinned pages so far on this tile; if (1 << N)
- * appears in the bitmask, that indicates that a page has been marked
- * "priority" whose PFN equals N, mod 8.
+ * appears in the bitmask, that indicates that a 4KB region of the
+ * cache starting at (N * 4KB) is in use by a "priority" page.
+ * The portion of cache used by a particular page can be computed
+ * by taking the page's PA, modulo CHIP_L2_CACHE_SIZE(), and setting
+ * all the "4KB" bits corresponding to the actual page size.
* @param bitmask A bitmap of priority page set values
*/
-void hv_set_caching(unsigned int bitmask);
+void hv_set_caching(unsigned long bitmask);
/** Zero out a specified number of pages.
@@ -1851,12 +1992,12 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
#define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */
#define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */
#define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */
- /* Bits 13-15 are reserved for
+ /* Bits 13-14 are reserved for
future use. */
+#define HV_PTE_INDEX_SUPER 15 /**< Pages ganged together for TLB */
#define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */
#define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */
- /* Bit 19 is reserved for
- future use. */
+#define HV_PTE_INDEX_CLIENT2 19 /**< Page client state 2 */
#define HV_PTE_INDEX_LOTAR 20 /**< Page's LOTAR; must be high bits
of word */
#define HV_PTE_LOTAR_BITS 12 /**< Number of bits in a LOTAR */
@@ -1869,15 +2010,6 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
of word */
#define HV_PTE_PTFN_BITS 29 /**< Number of bits in a PTFN */
-/** Position of the PFN field within the PTE (subset of the PTFN). */
-#define HV_PTE_INDEX_PFN (HV_PTE_INDEX_PTFN + (HV_LOG2_PAGE_SIZE_SMALL - \
- HV_LOG2_PAGE_TABLE_ALIGN))
-
-/** Length of the PFN field within the PTE (subset of the PTFN). */
-#define HV_PTE_INDEX_PFN_BITS (HV_PTE_INDEX_PTFN_BITS - \
- (HV_LOG2_PAGE_SIZE_SMALL - \
- HV_LOG2_PAGE_TABLE_ALIGN))
-
/*
* Legal values for the PTE's mode field
*/
@@ -1957,7 +2089,10 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
/** Does this PTE map a page?
*
- * If this bit is set in the level-1 page table, the entry should be
+ * If this bit is set in a level-0 page table, the entry should be
+ * interpreted as a level-2 page table entry mapping a jumbo page.
+ *
+ * If this bit is set in a level-1 page table, the entry should be
* interpreted as a level-2 page table entry mapping a large page.
*
* This bit should not be modified by the client while PRESENT is set, as
@@ -1967,6 +2102,18 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
*/
#define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE)
+/** Does this PTE implicitly reference multiple pages?
+ *
+ * If this bit is set in the page table (either in the level-2 page table,
+ * or in a higher level page table in conjunction with the PAGE bit)
+ * then the PTE specifies a range of contiguous pages, not a single page.
+ * The hv_set_pte_super_shift() allows you to specify the count for
+ * each level of the page table.
+ *
+ * Note: this bit is not supported on TILEPro systems.
+ */
+#define HV_PTE_SUPER (__HV_PTE_ONE << HV_PTE_INDEX_SUPER)
+
/** Is this a global (non-ASID) mapping?
*
* If this bit is set, the translations established by this PTE will
@@ -2046,6 +2193,13 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
*/
#define HV_PTE_CLIENT1 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT1)
+/** Client-private bit in PTE.
+ *
+ * This bit is guaranteed not to be inspected or modified by the
+ * hypervisor.
+ */
+#define HV_PTE_CLIENT2 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT2)
+
/** Non-coherent (NC) bit in PTE.
*
* If this bit is set, the mapping that is set up will be non-coherent
@@ -2178,8 +2332,10 @@ hv_pte_clear_##name(HV_PTE pte) \
*/
_HV_BIT(present, PRESENT)
_HV_BIT(page, PAGE)
+_HV_BIT(super, SUPER)
_HV_BIT(client0, CLIENT0)
_HV_BIT(client1, CLIENT1)
+_HV_BIT(client2, CLIENT2)
_HV_BIT(migrating, MIGRATING)
_HV_BIT(nc, NC)
_HV_BIT(readable, READABLE)
@@ -2222,40 +2378,11 @@ hv_pte_set_mode(HV_PTE pte, unsigned int val)
*
* This field contains the upper bits of the CPA (client physical
* address) of the target page; the complete CPA is this field with
- * HV_LOG2_PAGE_SIZE_SMALL zero bits appended to it.
- *
- * For PTEs in a level-1 page table where the Page bit is set, the
- * CPA must be aligned modulo the large page size.
- */
-static __inline unsigned int
-hv_pte_get_pfn(const HV_PTE pte)
-{
- return pte.val >> HV_PTE_INDEX_PFN;
-}
-
-
-/** Set the page frame number into a PTE. See hv_pte_get_pfn. */
-static __inline HV_PTE
-hv_pte_set_pfn(HV_PTE pte, unsigned int val)
-{
- /*
- * Note that the use of "PTFN" in the next line is intentional; we
- * don't want any garbage lower bits left in that field.
- */
- pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS) - 1) << HV_PTE_INDEX_PTFN);
- pte.val |= (__hv64) val << HV_PTE_INDEX_PFN;
- return pte;
-}
-
-/** Get the page table frame number from the PTE.
+ * HV_LOG2_PAGE_TABLE_ALIGN zero bits appended to it.
*
- * This field contains the upper bits of the CPA (client physical
- * address) of the target page table; the complete CPA is this field with
- * with HV_PAGE_TABLE_ALIGN zero bits appended to it.
- *
- * For PTEs in a level-1 page table when the Page bit is not set, the
- * CPA must be aligned modulo the sticter of HV_PAGE_TABLE_ALIGN and
- * the level-2 page table size.
+ * For all PTEs in the lowest-level page table, and for all PTEs with
+ * the Page bit set in all page tables, the CPA must be aligned modulo
+ * the relevant page size.
*/
static __inline unsigned long
hv_pte_get_ptfn(const HV_PTE pte)
@@ -2263,7 +2390,6 @@ hv_pte_get_ptfn(const HV_PTE pte)
return pte.val >> HV_PTE_INDEX_PTFN;
}
-
/** Set the page table frame number into a PTE. See hv_pte_get_ptfn. */
static __inline HV_PTE
hv_pte_set_ptfn(HV_PTE pte, unsigned long val)
@@ -2273,6 +2399,20 @@ hv_pte_set_ptfn(HV_PTE pte, unsigned long val)
return pte;
}
+/** Get the client physical address from the PTE. See hv_pte_set_ptfn. */
+static __inline HV_PhysAddr
+hv_pte_get_pa(const HV_PTE pte)
+{
+ return (__hv64) hv_pte_get_ptfn(pte) << HV_LOG2_PAGE_TABLE_ALIGN;
+}
+
+/** Set the client physical address into a PTE. See hv_pte_get_ptfn. */
+static __inline HV_PTE
+hv_pte_set_pa(HV_PTE pte, HV_PhysAddr pa)
+{
+ return hv_pte_set_ptfn(pte, pa >> HV_LOG2_PAGE_TABLE_ALIGN);
+}
+
/** Get the remote tile caching this page.
*
@@ -2308,28 +2448,20 @@ hv_pte_set_lotar(HV_PTE pte, unsigned int val)
#endif /* !__ASSEMBLER__ */
-/** Converts a client physical address to a pfn. */
-#define HV_CPA_TO_PFN(p) ((p) >> HV_LOG2_PAGE_SIZE_SMALL)
-
-/** Converts a pfn to a client physical address. */
-#define HV_PFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_SIZE_SMALL)
-
/** Converts a client physical address to a ptfn. */
#define HV_CPA_TO_PTFN(p) ((p) >> HV_LOG2_PAGE_TABLE_ALIGN)
/** Converts a ptfn to a client physical address. */
#define HV_PTFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_TABLE_ALIGN)
-/** Converts a ptfn to a pfn. */
-#define HV_PTFN_TO_PFN(p) \
- ((p) >> (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN))
-
-/** Converts a pfn to a ptfn. */
-#define HV_PFN_TO_PTFN(p) \
- ((p) << (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN))
-
#if CHIP_VA_WIDTH() > 32
+/*
+ * Note that we currently do not allow customizing the page size
+ * of the L0 pages, but fix them at 4GB, so we do not use the
+ * "_HV_xxx" nomenclature for the L0 macros.
+ */
+
/** Log number of HV_PTE entries in L0 page table */
#define HV_LOG2_L0_ENTRIES (CHIP_VA_WIDTH() - HV_LOG2_L1_SPAN)
@@ -2359,69 +2491,104 @@ hv_pte_set_lotar(HV_PTE pte, unsigned int val)
#endif /* CHIP_VA_WIDTH() > 32 */
/** Log number of HV_PTE entries in L1 page table */
-#define HV_LOG2_L1_ENTRIES (HV_LOG2_L1_SPAN - HV_LOG2_PAGE_SIZE_LARGE)
+#define _HV_LOG2_L1_ENTRIES(log2_page_size_large) \
+ (HV_LOG2_L1_SPAN - log2_page_size_large)
/** Number of HV_PTE entries in L1 page table */
-#define HV_L1_ENTRIES (1 << HV_LOG2_L1_ENTRIES)
+#define _HV_L1_ENTRIES(log2_page_size_large) \
+ (1 << _HV_LOG2_L1_ENTRIES(log2_page_size_large))
/** Log size of L1 page table in bytes */
-#define HV_LOG2_L1_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L1_ENTRIES)
+#define _HV_LOG2_L1_SIZE(log2_page_size_large) \
+ (HV_LOG2_PTE_SIZE + _HV_LOG2_L1_ENTRIES(log2_page_size_large))
/** Size of L1 page table in bytes */
-#define HV_L1_SIZE (1 << HV_LOG2_L1_SIZE)
+#define _HV_L1_SIZE(log2_page_size_large) \
+ (1 << _HV_LOG2_L1_SIZE(log2_page_size_large))
/** Log number of HV_PTE entries in level-2 page table */
-#define HV_LOG2_L2_ENTRIES (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)
+#define _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small) \
+ (log2_page_size_large - log2_page_size_small)
/** Number of HV_PTE entries in level-2 page table */
-#define HV_L2_ENTRIES (1 << HV_LOG2_L2_ENTRIES)
+#define _HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) \
+ (1 << _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small))
/** Log size of level-2 page table in bytes */
-#define HV_LOG2_L2_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L2_ENTRIES)
+#define _HV_LOG2_L2_SIZE(log2_page_size_large, log2_page_size_small) \
+ (HV_LOG2_PTE_SIZE + \
+ _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small))
/** Size of level-2 page table in bytes */
-#define HV_L2_SIZE (1 << HV_LOG2_L2_SIZE)
+#define _HV_L2_SIZE(log2_page_size_large, log2_page_size_small) \
+ (1 << _HV_LOG2_L2_SIZE(log2_page_size_large, log2_page_size_small))
#ifdef __ASSEMBLER__
#if CHIP_VA_WIDTH() > 32
/** Index in L1 for a specific VA */
-#define HV_L1_INDEX(va) \
- (((va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1))
+#define _HV_L1_INDEX(va, log2_page_size_large) \
+ (((va) >> log2_page_size_large) & (_HV_L1_ENTRIES(log2_page_size_large) - 1))
#else /* CHIP_VA_WIDTH() > 32 */
/** Index in L1 for a specific VA */
-#define HV_L1_INDEX(va) \
- (((va) >> HV_LOG2_PAGE_SIZE_LARGE))
+#define _HV_L1_INDEX(va, log2_page_size_large) \
+ (((va) >> log2_page_size_large))
#endif /* CHIP_VA_WIDTH() > 32 */
/** Index in level-2 page table for a specific VA */
-#define HV_L2_INDEX(va) \
- (((va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1))
+#define _HV_L2_INDEX(va, log2_page_size_large, log2_page_size_small) \
+ (((va) >> log2_page_size_small) & \
+ (_HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) - 1))
#else /* __ASSEMBLER __ */
#if CHIP_VA_WIDTH() > 32
/** Index in L1 for a specific VA */
-#define HV_L1_INDEX(va) \
- (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1))
+#define _HV_L1_INDEX(va, log2_page_size_large) \
+ (((HV_VirtAddr)(va) >> log2_page_size_large) & \
+ (_HV_L1_ENTRIES(log2_page_size_large) - 1))
#else /* CHIP_VA_WIDTH() > 32 */
/** Index in L1 for a specific VA */
-#define HV_L1_INDEX(va) \
- (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE))
+#define _HV_L1_INDEX(va, log2_page_size_large) \
+ (((HV_VirtAddr)(va) >> log2_page_size_large))
#endif /* CHIP_VA_WIDTH() > 32 */
/** Index in level-2 page table for a specific VA */
-#define HV_L2_INDEX(va) \
- (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1))
+#define _HV_L2_INDEX(va, log2_page_size_large, log2_page_size_small) \
+ (((HV_VirtAddr)(va) >> log2_page_size_small) & \
+ (_HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) - 1))
#endif /* __ASSEMBLER __ */
-#endif /* _TILE_HV_H */
+/** Position of the PFN field within the PTE (subset of the PTFN). */
+#define _HV_PTE_INDEX_PFN(log2_page_size) \
+ (HV_PTE_INDEX_PTFN + (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN))
+
+/** Length of the PFN field within the PTE (subset of the PTFN). */
+#define _HV_PTE_INDEX_PFN_BITS(log2_page_size) \
+ (HV_PTE_INDEX_PTFN_BITS - (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN))
+
+/** Converts a client physical address to a pfn. */
+#define _HV_CPA_TO_PFN(p, log2_page_size) ((p) >> log2_page_size)
+
+/** Converts a pfn to a client physical address. */
+#define _HV_PFN_TO_CPA(p, log2_page_size) \
+ (((HV_PhysAddr)(p)) << log2_page_size)
+
+/** Converts a ptfn to a pfn. */
+#define _HV_PTFN_TO_PFN(p, log2_page_size) \
+ ((p) >> (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN))
+
+/** Converts a pfn to a ptfn. */
+#define _HV_PFN_TO_PTFN(p, log2_page_size) \
+ ((p) << (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN))
+
+#endif /* _HV_HV_H */
diff --git a/arch/tile/include/hv/iorpc.h b/arch/tile/include/hv/iorpc.h
new file mode 100644
index 00000000000..ddf1604482b
--- /dev/null
+++ b/arch/tile/include/hv/iorpc.h
@@ -0,0 +1,714 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+#ifndef _HV_IORPC_H_
+#define _HV_IORPC_H_
+
+/**
+ *
+ * Error codes and struct definitions for the IO RPC library.
+ *
+ * The hypervisor's IO RPC component provides a convenient way for
+ * driver authors to proxy system calls between user space, linux, and
+ * the hypervisor driver. The core of the system is a set of Python
+ * files that take ".idl" files as input and generates the following
+ * source code:
+ *
+ * - _rpc_call() routines for use in userspace IO libraries. These
+ * routines take an argument list specified in the .idl file, pack the
+ * arguments in to a buffer, and read or write that buffer via the
+ * Linux iorpc driver.
+ *
+ * - dispatch_read() and dispatch_write() routines that hypervisor
+ * drivers can use to implement most of their dev_pread() and
+ * dev_pwrite() methods. These routines decode the incoming parameter
+ * blob, permission check and translate parameters where appropriate,
+ * and then invoke a callback routine for whichever RPC call has
+ * arrived. The driver simply implements the set of callback
+ * routines.
+ *
+ * The IO RPC system also includes the Linux 'iorpc' driver, which
+ * proxies calls between the userspace library and the hypervisor
+ * driver. The Linux driver is almost entirely device agnostic; it
+ * watches for special flags indicating cases where a memory buffer
+ * address might need to be translated, etc. As a result, driver
+ * writers can avoid many of the problem cases related to registering
+ * hardware resources like memory pages or interrupts. However, the
+ * drivers must be careful to obey the conventions documented below in
+ * order to work properly with the generic Linux iorpc driver.
+ *
+ * @section iorpc_domains Service Domains
+ *
+ * All iorpc-based drivers must support a notion of service domains.
+ * A service domain is basically an application context - state
+ * indicating resources that are allocated to that particular app
+ * which it may access and (perhaps) other applications may not
+ * access. Drivers can support any number of service domains they
+ * choose. In some cases the design is limited by a number of service
+ * domains supported by the IO hardware; in other cases the service
+ * domains are a purely software concept and the driver chooses a
+ * maximum number of domains based on how much state memory it is
+ * willing to preallocate.
+ *
+ * For example, the mPIPE driver only supports as many service domains
+ * as are supported by the mPIPE hardware. This limitation is
+ * required because the hardware implements its own MMIO protection
+ * scheme to allow large MMIO mappings while still protecting small
+ * register ranges within the page that should only be accessed by the
+ * hypervisor.
+ *
+ * In contrast, drivers with no hardware service domain limitations
+ * (for instance the TRIO shim) can implement an arbitrary number of
+ * service domains. In these cases, each service domain is limited to
+ * a carefully restricted set of legal MMIO addresses if necessary to
+ * keep one application from corrupting another application's state.
+ *
+ * @section iorpc_conventions System Call Conventions
+ *
+ * The driver's open routine is responsible for allocating a new
+ * service domain for each hv_dev_open() call. By convention, the
+ * return value from open() should be the service domain number on
+ * success, or GXIO_ERR_NO_SVC_DOM if no more service domains are
+ * available.
+ *
+ * The implementations of hv_dev_pread() and hv_dev_pwrite() are
+ * responsible for validating the devhdl value passed up by the
+ * client. Since the device handle returned by hv_dev_open() should
+ * embed the positive service domain number, drivers should make sure
+ * that DRV_HDL2BITS(devhdl) is a legal service domain. If the client
+ * passes an illegal service domain number, the routine should return
+ * GXIO_ERR_INVAL_SVC_DOM. Once the service domain number has been
+ * validated, the driver can copy to/from the client buffer and call
+ * the dispatch_read() or dispatch_write() methods created by the RPC
+ * generator.
+ *
+ * The hv_dev_close() implementation should reset all service domain
+ * state and put the service domain back on a free list for
+ * reallocation by a future application. In most cases, this will
+ * require executing a hardware reset or drain flow and denying any
+ * MMIO regions that were created for the service domain.
+ *
+ * @section iorpc_data Special Data Types
+ *
+ * The .idl file syntax allows the creation of syscalls with special
+ * parameters that require permission checks or translations as part
+ * of the system call path. Because of limitations in the code
+ * generator, APIs are generally limited to just one of these special
+ * parameters per system call, and they are sometimes required to be
+ * the first or last parameter to the call. Special parameters
+ * include:
+ *
+ * @subsection iorpc_mem_buffer MEM_BUFFER
+ *
+ * The MEM_BUFFER() datatype allows user space to "register" memory
+ * buffers with a device. Registering memory accomplishes two tasks:
+ * Linux keeps track of all buffers that might be modified by a
+ * hardware device, and the hardware device drivers bind registered
+ * buffers to particular hardware resources like ingress NotifRings.
+ * The MEM_BUFFER() idl syntax can take extra flags like ALIGN_64KB,
+ * ALIGN_SELF_SIZE, and FLAGS indicating that memory buffers must have
+ * certain alignment or that the user should be able to pass a "memory
+ * flags" word specifying attributes like nt_hint or IO cache pinning.
+ * The parser will accept multiple MEM_BUFFER() flags.
+ *
+ * Implementations must obey the following conventions when
+ * registering memory buffers via the iorpc flow. These rules are a
+ * result of the Linux driver implementation, which needs to keep
+ * track of how many times a particular page has been registered with
+ * the hardware so that it can release the page when all those
+ * registrations are cleared.
+ *
+ * - Memory registrations that refer to a resource which has already
+ * been bound must return GXIO_ERR_ALREADY_INIT. Thus, it is an
+ * error to register memory twice without resetting (i.e. closing) the
+ * resource in between. This convention keeps the Linux driver from
+ * having to track which particular devices a page is bound to.
+ *
+ * - At present, a memory registration is only cleared when the
+ * service domain is reset. In this case, the Linux driver simply
+ * closes the HV device file handle and then decrements the reference
+ * counts of all pages that were previously registered with the
+ * device.
+ *
+ * - In the future, we may add a mechanism for unregistering memory.
+ * One possible implementation would require that the user specify
+ * which buffer is currently registered. The HV would then verify
+ * that that page was actually the one currently mapped and return
+ * success or failure to Linux, which would then only decrement the
+ * page reference count if the addresses were mapped. Another scheme
+ * might allow Linux to pass a token to the HV to be returned when the
+ * resource is unmapped.
+ *
+ * @subsection iorpc_interrupt INTERRUPT
+ *
+ * The INTERRUPT .idl datatype allows the client to bind hardware
+ * interrupts to a particular combination of IPI parameters - CPU, IPI
+ * PL, and event bit number. This data is passed via a special
+ * datatype so that the Linux driver can validate the CPU and PL and
+ * the HV generic iorpc code can translate client CPUs to real CPUs.
+ *
+ * @subsection iorpc_pollfd_setup POLLFD_SETUP
+ *
+ * The POLLFD_SETUP .idl datatype allows the client to set up hardware
+ * interrupt bindings which are received by Linux but which are made
+ * visible to user processes as state transitions on a file descriptor;
+ * this allows user processes to use Linux primitives, such as poll(), to
+ * await particular hardware events. This data is passed via a special
+ * datatype so that the Linux driver may recognize the pollable file
+ * descriptor and translate it to a set of interrupt target information,
+ * and so that the HV generic iorpc code can translate client CPUs to real
+ * CPUs.
+ *
+ * @subsection iorpc_pollfd POLLFD
+ *
+ * The POLLFD .idl datatype allows manipulation of hardware interrupt
+ * bindings set up via the POLLFD_SETUP datatype; common operations are
+ * resetting the state of the requested interrupt events, and unbinding any
+ * bound interrupts. This data is passed via a special datatype so that
+ * the Linux driver may recognize the pollable file descriptor and
+ * translate it to an interrupt identifier previously supplied by the
+ * hypervisor as the result of an earlier pollfd_setup operation.
+ *
+ * @subsection iorpc_blob BLOB
+ *
+ * The BLOB .idl datatype allows the client to write an arbitrary
+ * length string of bytes up to the hypervisor driver. This can be
+ * useful for passing up large, arbitrarily structured data like
+ * classifier programs. The iorpc stack takes care of validating the
+ * buffer VA and CPA as the data passes up to the hypervisor. Unlike
+ * MEM_BUFFER(), the buffer is not registered - Linux does not bump
+ * page refcounts and the HV driver should not reuse the buffer once
+ * the system call is complete.
+ *
+ * @section iorpc_translation Translating User Space Calls
+ *
+ * The ::iorpc_offset structure describes the formatting of the offset
+ * that is passed to pread() or pwrite() as part of the generated RPC code.
+ * When the user calls up to Linux, the rpc code fills in all the fields of
+ * the offset, including a 16-bit opcode, a 16 bit format indicator, and 32
+ * bits of user-specified "sub-offset". The opcode indicates which syscall
+ * is being requested. The format indicates whether there is a "prefix
+ * struct" at the start of the memory buffer passed to pwrite(), and if so
+ * what data is in that prefix struct. These prefix structs are used to
+ * implement special datatypes like MEM_BUFFER() and INTERRUPT - we arrange
+ * to put data that needs translation and permission checks at the start of
+ * the buffer so that the Linux driver and generic portions of the HV iorpc
+ * code can easily access the data. The 32 bits of user-specified
+ * "sub-offset" are most useful for pread() calls where the user needs to
+ * also pass in a few bits indicating which register to read, etc.
+ *
+ * The Linux iorpc driver watches for system calls that contain prefix
+ * structs so that it can translate parameters and bump reference
+ * counts as appropriate. It does not (currently) have any knowledge
+ * of the per-device opcodes - it doesn't care what operation you're
+ * doing to mPIPE, so long as it can do all the generic book-keeping.
+ * The hv/iorpc.h header file defines all of the generic encoding bits
+ * needed to translate iorpc calls without knowing which particular
+ * opcode is being issued.
+ *
+ * @section iorpc_globals Global iorpc Calls
+ *
+ * Implementing mmap() required adding some special iorpc syscalls
+ * that are only called by the Linux driver, never by userspace.
+ * These include get_mmio_base() and check_mmio_offset(). These
+ * routines are described in globals.idl and must be included in every
+ * iorpc driver. By providing these routines in every driver, Linux's
+ * mmap implementation can easily get the PTE bits it needs and
+ * validate the PA offset without needing to know the per-device
+ * opcodes to perform those tasks.
+ *
+ * @section iorpc_kernel Supporting gxio APIs in the Kernel
+ *
+ * The iorpc code generator also supports generation of kernel code
+ * implementing the gxio APIs. This capability is currently used by
+ * the mPIPE network driver, and will likely be used by the TRIO root
+ * complex and endpoint drivers and perhaps an in-kernel crypto
+ * driver. Each driver that wants to instantiate iorpc calls in the
+ * kernel needs to generate a kernel version of the generate rpc code
+ * and (probably) copy any related gxio source files into the kernel.
+ * The mPIPE driver provides a good example of this pattern.
+ */
+
+#ifdef __KERNEL__
+#include <linux/stddef.h>
+#else
+#include <stddef.h>
+#endif
+
+#if defined(__HV__)
+#include <hv/hypervisor.h>
+#elif defined(__KERNEL__)
+#include <hv/hypervisor.h>
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+
+/** Code indicating translation services required within the RPC path.
+ * These indicate whether there is a translatable struct at the start
+ * of the RPC buffer and what information that struct contains.
+ */
+enum iorpc_format_e
+{
+ /** No translation required, no prefix struct. */
+ IORPC_FORMAT_NONE,
+
+ /** No translation required, no prefix struct, no access to this
+ * operation from user space. */
+ IORPC_FORMAT_NONE_NOUSER,
+
+ /** Prefix struct contains user VA and size. */
+ IORPC_FORMAT_USER_MEM,
+
+ /** Prefix struct contains CPA, size, and homing bits. */
+ IORPC_FORMAT_KERNEL_MEM,
+
+ /** Prefix struct contains interrupt. */
+ IORPC_FORMAT_KERNEL_INTERRUPT,
+
+ /** Prefix struct contains user-level interrupt. */
+ IORPC_FORMAT_USER_INTERRUPT,
+
+ /** Prefix struct contains pollfd_setup (interrupt information). */
+ IORPC_FORMAT_KERNEL_POLLFD_SETUP,
+
+ /** Prefix struct contains user-level pollfd_setup (file descriptor). */
+ IORPC_FORMAT_USER_POLLFD_SETUP,
+
+ /** Prefix struct contains pollfd (interrupt cookie). */
+ IORPC_FORMAT_KERNEL_POLLFD,
+
+ /** Prefix struct contains user-level pollfd (file descriptor). */
+ IORPC_FORMAT_USER_POLLFD,
+};
+
+
+/** Generate an opcode given format and code. */
+#define IORPC_OPCODE(FORMAT, CODE) (((FORMAT) << 16) | (CODE))
+
+/** The offset passed through the read() and write() system calls
+ combines an opcode with 32 bits of user-specified offset. */
+union iorpc_offset
+{
+#ifndef __BIG_ENDIAN__
+ uint64_t offset; /**< All bits. */
+
+ struct
+ {
+ uint16_t code; /**< RPC code. */
+ uint16_t format; /**< iorpc_format_e */
+ uint32_t sub_offset; /**< caller-specified offset. */
+ };
+
+ uint32_t opcode; /**< Opcode combines code & format. */
+#else
+ uint64_t offset; /**< All bits. */
+
+ struct
+ {
+ uint32_t sub_offset; /**< caller-specified offset. */
+ uint16_t format; /**< iorpc_format_e */
+ uint16_t code; /**< RPC code. */
+ };
+
+ struct
+ {
+ uint32_t padding;
+ uint32_t opcode; /**< Opcode combines code & format. */
+ };
+#endif
+};
+
+
+/** Homing and cache hinting bits that can be used by IO devices. */
+struct iorpc_mem_attr
+{
+ unsigned int lotar_x:4; /**< lotar X bits (or Gx page_mask). */
+ unsigned int lotar_y:4; /**< lotar Y bits (or Gx page_offset). */
+ unsigned int hfh:1; /**< Uses hash-for-home. */
+ unsigned int nt_hint:1; /**< Non-temporal hint. */
+ unsigned int io_pin:1; /**< Only fill 'IO' cache ways. */
+};
+
+/** Set the nt_hint bit. */
+#define IORPC_MEM_BUFFER_FLAG_NT_HINT (1 << 0)
+
+/** Set the IO pin bit. */
+#define IORPC_MEM_BUFFER_FLAG_IO_PIN (1 << 1)
+
+
+/** A structure used to describe memory registration. Different
+ protection levels describe memory differently, so this union
+ contains all the different possible descriptions. As a request
+ moves up the call chain, each layer translates from one
+ description format to the next. In particular, the Linux iorpc
+ driver translates user VAs into CPAs and homing parameters. */
+union iorpc_mem_buffer
+{
+ struct
+ {
+ uint64_t va; /**< User virtual address. */
+ uint64_t size; /**< Buffer size. */
+ unsigned int flags; /**< nt_hint, IO pin. */
+ }
+ user; /**< Buffer as described by user apps. */
+
+ struct
+ {
+ unsigned long long cpa; /**< Client physical address. */
+#if defined(__KERNEL__) || defined(__HV__)
+ size_t size; /**< Buffer size. */
+ HV_PTE pte; /**< PTE describing memory homing. */
+#else
+ uint64_t size;
+ uint64_t pte;
+#endif
+ unsigned int flags; /**< nt_hint, IO pin. */
+ }
+ kernel; /**< Buffer as described by kernel. */
+
+ struct
+ {
+ unsigned long long pa; /**< Physical address. */
+ size_t size; /**< Buffer size. */
+ struct iorpc_mem_attr attr; /**< Homing and locality hint bits. */
+ }
+ hv; /**< Buffer parameters for HV driver. */
+};
+
+
+/** A structure used to describe interrupts. The format differs slightly
+ * for user and kernel interrupts. As with the mem_buffer_t, translation
+ * between the formats is done at each level. */
+union iorpc_interrupt
+{
+ struct
+ {
+ int cpu; /**< CPU. */
+ int event; /**< evt_num */
+ }
+ user; /**< Interrupt as described by user applications. */
+
+ struct
+ {
+ int x; /**< X coord. */
+ int y; /**< Y coord. */
+ int ipi; /**< int_num */
+ int event; /**< evt_num */
+ }
+ kernel; /**< Interrupt as described by the kernel. */
+
+};
+
+
+/** A structure used to describe interrupts used with poll(). The format
+ * differs significantly for requests from user to kernel, and kernel to
+ * hypervisor. As with the mem_buffer_t, translation between the formats
+ * is done at each level. */
+union iorpc_pollfd_setup
+{
+ struct
+ {
+ int fd; /**< Pollable file descriptor. */
+ }
+ user; /**< pollfd_setup as described by user applications. */
+
+ struct
+ {
+ int x; /**< X coord. */
+ int y; /**< Y coord. */
+ int ipi; /**< int_num */
+ int event; /**< evt_num */
+ }
+ kernel; /**< pollfd_setup as described by the kernel. */
+
+};
+
+
+/** A structure used to describe previously set up interrupts used with
+ * poll(). The format differs significantly for requests from user to
+ * kernel, and kernel to hypervisor. As with the mem_buffer_t, translation
+ * between the formats is done at each level. */
+union iorpc_pollfd
+{
+ struct
+ {
+ int fd; /**< Pollable file descriptor. */
+ }
+ user; /**< pollfd as described by user applications. */
+
+ struct
+ {
+ int cookie; /**< hv cookie returned by the pollfd_setup operation. */
+ }
+ kernel; /**< pollfd as described by the kernel. */
+
+};
+
+
+/** The various iorpc devices use error codes from -1100 to -1299.
+ *
+ * This range is distinct from netio (-700 to -799), the hypervisor
+ * (-800 to -899), tilepci (-900 to -999), ilib (-1000 to -1099),
+ * gxcr (-1300 to -1399) and gxpci (-1400 to -1499).
+ */
+enum gxio_err_e {
+
+ /** Largest iorpc error number. */
+ GXIO_ERR_MAX = -1101,
+
+
+ /********************************************************/
+ /* Generic Error Codes */
+ /********************************************************/
+
+ /** Bad RPC opcode - possible version incompatibility. */
+ GXIO_ERR_OPCODE = -1101,
+
+ /** Invalid parameter. */
+ GXIO_ERR_INVAL = -1102,
+
+ /** Memory buffer did not meet alignment requirements. */
+ GXIO_ERR_ALIGNMENT = -1103,
+
+ /** Memory buffers must be coherent and cacheable. */
+ GXIO_ERR_COHERENCE = -1104,
+
+ /** Resource already initialized. */
+ GXIO_ERR_ALREADY_INIT = -1105,
+
+ /** No service domains available. */
+ GXIO_ERR_NO_SVC_DOM = -1106,
+
+ /** Illegal service domain number. */
+ GXIO_ERR_INVAL_SVC_DOM = -1107,
+
+ /** Illegal MMIO address. */
+ GXIO_ERR_MMIO_ADDRESS = -1108,
+
+ /** Illegal interrupt binding. */
+ GXIO_ERR_INTERRUPT = -1109,
+
+ /** Unreasonable client memory. */
+ GXIO_ERR_CLIENT_MEMORY = -1110,
+
+ /** No more IOTLB entries. */
+ GXIO_ERR_IOTLB_ENTRY = -1111,
+
+ /** Invalid memory size. */
+ GXIO_ERR_INVAL_MEMORY_SIZE = -1112,
+
+ /** Unsupported operation. */
+ GXIO_ERR_UNSUPPORTED_OP = -1113,
+
+ /** Insufficient DMA credits. */
+ GXIO_ERR_DMA_CREDITS = -1114,
+
+ /** Operation timed out. */
+ GXIO_ERR_TIMEOUT = -1115,
+
+ /** No such device or object. */
+ GXIO_ERR_NO_DEVICE = -1116,
+
+ /** Device or resource busy. */
+ GXIO_ERR_BUSY = -1117,
+
+ /** I/O error. */
+ GXIO_ERR_IO = -1118,
+
+ /** Permissions error. */
+ GXIO_ERR_PERM = -1119,
+
+
+
+ /********************************************************/
+ /* Test Device Error Codes */
+ /********************************************************/
+
+ /** Illegal register number. */
+ GXIO_TEST_ERR_REG_NUMBER = -1120,
+
+ /** Illegal buffer slot. */
+ GXIO_TEST_ERR_BUFFER_SLOT = -1121,
+
+
+ /********************************************************/
+ /* MPIPE Error Codes */
+ /********************************************************/
+
+
+ /** Invalid buffer size. */
+ GXIO_MPIPE_ERR_INVAL_BUFFER_SIZE = -1131,
+
+ /** Cannot allocate buffer stack. */
+ GXIO_MPIPE_ERR_NO_BUFFER_STACK = -1140,
+
+ /** Invalid buffer stack number. */
+ GXIO_MPIPE_ERR_BAD_BUFFER_STACK = -1141,
+
+ /** Cannot allocate NotifRing. */
+ GXIO_MPIPE_ERR_NO_NOTIF_RING = -1142,
+
+ /** Invalid NotifRing number. */
+ GXIO_MPIPE_ERR_BAD_NOTIF_RING = -1143,
+
+ /** Cannot allocate NotifGroup. */
+ GXIO_MPIPE_ERR_NO_NOTIF_GROUP = -1144,
+
+ /** Invalid NotifGroup number. */
+ GXIO_MPIPE_ERR_BAD_NOTIF_GROUP = -1145,
+
+ /** Cannot allocate bucket. */
+ GXIO_MPIPE_ERR_NO_BUCKET = -1146,
+
+ /** Invalid bucket number. */
+ GXIO_MPIPE_ERR_BAD_BUCKET = -1147,
+
+ /** Cannot allocate eDMA ring. */
+ GXIO_MPIPE_ERR_NO_EDMA_RING = -1148,
+
+ /** Invalid eDMA ring number. */
+ GXIO_MPIPE_ERR_BAD_EDMA_RING = -1149,
+
+ /** Invalid channel number. */
+ GXIO_MPIPE_ERR_BAD_CHANNEL = -1150,
+
+ /** Bad configuration. */
+ GXIO_MPIPE_ERR_BAD_CONFIG = -1151,
+
+ /** Empty iqueue. */
+ GXIO_MPIPE_ERR_IQUEUE_EMPTY = -1152,
+
+ /** Empty rules. */
+ GXIO_MPIPE_ERR_RULES_EMPTY = -1160,
+
+ /** Full rules. */
+ GXIO_MPIPE_ERR_RULES_FULL = -1161,
+
+ /** Corrupt rules. */
+ GXIO_MPIPE_ERR_RULES_CORRUPT = -1162,
+
+ /** Invalid rules. */
+ GXIO_MPIPE_ERR_RULES_INVALID = -1163,
+
+ /** Classifier is too big. */
+ GXIO_MPIPE_ERR_CLASSIFIER_TOO_BIG = -1170,
+
+ /** Classifier is too complex. */
+ GXIO_MPIPE_ERR_CLASSIFIER_TOO_COMPLEX = -1171,
+
+ /** Classifier has bad header. */
+ GXIO_MPIPE_ERR_CLASSIFIER_BAD_HEADER = -1172,
+
+ /** Classifier has bad contents. */
+ GXIO_MPIPE_ERR_CLASSIFIER_BAD_CONTENTS = -1173,
+
+ /** Classifier encountered invalid symbol. */
+ GXIO_MPIPE_ERR_CLASSIFIER_INVAL_SYMBOL = -1174,
+
+ /** Classifier encountered invalid bounds. */
+ GXIO_MPIPE_ERR_CLASSIFIER_INVAL_BOUNDS = -1175,
+
+ /** Classifier encountered invalid relocation. */
+ GXIO_MPIPE_ERR_CLASSIFIER_INVAL_RELOCATION = -1176,
+
+ /** Classifier encountered undefined symbol. */
+ GXIO_MPIPE_ERR_CLASSIFIER_UNDEF_SYMBOL = -1177,
+
+
+ /********************************************************/
+ /* TRIO Error Codes */
+ /********************************************************/
+
+ /** Cannot allocate memory map region. */
+ GXIO_TRIO_ERR_NO_MEMORY_MAP = -1180,
+
+ /** Invalid memory map region number. */
+ GXIO_TRIO_ERR_BAD_MEMORY_MAP = -1181,
+
+ /** Cannot allocate scatter queue. */
+ GXIO_TRIO_ERR_NO_SCATTER_QUEUE = -1182,
+
+ /** Invalid scatter queue number. */
+ GXIO_TRIO_ERR_BAD_SCATTER_QUEUE = -1183,
+
+ /** Cannot allocate push DMA ring. */
+ GXIO_TRIO_ERR_NO_PUSH_DMA_RING = -1184,
+
+ /** Invalid push DMA ring index. */
+ GXIO_TRIO_ERR_BAD_PUSH_DMA_RING = -1185,
+
+ /** Cannot allocate pull DMA ring. */
+ GXIO_TRIO_ERR_NO_PULL_DMA_RING = -1186,
+
+ /** Invalid pull DMA ring index. */
+ GXIO_TRIO_ERR_BAD_PULL_DMA_RING = -1187,
+
+ /** Cannot allocate PIO region. */
+ GXIO_TRIO_ERR_NO_PIO = -1188,
+
+ /** Invalid PIO region index. */
+ GXIO_TRIO_ERR_BAD_PIO = -1189,
+
+ /** Cannot allocate ASID. */
+ GXIO_TRIO_ERR_NO_ASID = -1190,
+
+ /** Invalid ASID. */
+ GXIO_TRIO_ERR_BAD_ASID = -1191,
+
+
+ /********************************************************/
+ /* MICA Error Codes */
+ /********************************************************/
+
+ /** No such accelerator type. */
+ GXIO_MICA_ERR_BAD_ACCEL_TYPE = -1220,
+
+ /** Cannot allocate context. */
+ GXIO_MICA_ERR_NO_CONTEXT = -1221,
+
+ /** PKA command queue is full, can't add another command. */
+ GXIO_MICA_ERR_PKA_CMD_QUEUE_FULL = -1222,
+
+ /** PKA result queue is empty, can't get a result from the queue. */
+ GXIO_MICA_ERR_PKA_RESULT_QUEUE_EMPTY = -1223,
+
+ /********************************************************/
+ /* GPIO Error Codes */
+ /********************************************************/
+
+ /** Pin not available. Either the physical pin does not exist, or
+ * it is reserved by the hypervisor for system usage. */
+ GXIO_GPIO_ERR_PIN_UNAVAILABLE = -1240,
+
+ /** Pin busy. The pin exists, and is available for use via GXIO, but
+ * it has been attached by some other process or driver. */
+ GXIO_GPIO_ERR_PIN_BUSY = -1241,
+
+ /** Cannot access unattached pin. One or more of the pins being
+ * manipulated by this call are not attached to the requesting
+ * context. */
+ GXIO_GPIO_ERR_PIN_UNATTACHED = -1242,
+
+ /** Invalid I/O mode for pin. The wiring of the pin in the system
+ * is such that the I/O mode or electrical control parameters
+ * requested could cause damage. */
+ GXIO_GPIO_ERR_PIN_INVALID_MODE = -1243,
+
+ /** Smallest iorpc error number. */
+ GXIO_ERR_MIN = -1299
+};
+
+
+#endif /* !_HV_IORPC_H_ */
diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild
new file mode 100644
index 00000000000..97dfbecec6b
--- /dev/null
+++ b/arch/tile/include/uapi/arch/Kbuild
@@ -0,0 +1,17 @@
+# UAPI Header export list
+header-y += abi.h
+header-y += chip.h
+header-y += chip_tilegx.h
+header-y += chip_tilepro.h
+header-y += icache.h
+header-y += interrupts.h
+header-y += interrupts_32.h
+header-y += interrupts_64.h
+header-y += opcode.h
+header-y += opcode_tilegx.h
+header-y += opcode_tilepro.h
+header-y += sim.h
+header-y += sim_def.h
+header-y += spr_def.h
+header-y += spr_def_32.h
+header-y += spr_def_64.h
diff --git a/arch/tile/include/arch/abi.h b/arch/tile/include/uapi/arch/abi.h
index c55a3d43264..c55a3d43264 100644
--- a/arch/tile/include/arch/abi.h
+++ b/arch/tile/include/uapi/arch/abi.h
diff --git a/arch/tile/include/arch/chip.h b/arch/tile/include/uapi/arch/chip.h
index 926d3db0e91..4c91f90b936 100644
--- a/arch/tile/include/arch/chip.h
+++ b/arch/tile/include/uapi/arch/chip.h
@@ -12,9 +12,7 @@
* more details.
*/
-#if __tile_chip__ == 0
-#include <arch/chip_tile64.h>
-#elif __tile_chip__ == 1
+#if __tile_chip__ == 1
#include <arch/chip_tilepro.h>
#elif defined(__tilegx__)
#include <arch/chip_tilegx.h>
diff --git a/arch/tile/include/arch/chip_tilegx.h b/arch/tile/include/uapi/arch/chip_tilegx.h
index ea8e4f2c948..ea8e4f2c948 100644
--- a/arch/tile/include/arch/chip_tilegx.h
+++ b/arch/tile/include/uapi/arch/chip_tilegx.h
diff --git a/arch/tile/include/arch/chip_tilepro.h b/arch/tile/include/uapi/arch/chip_tilepro.h
index 70017699a74..70017699a74 100644
--- a/arch/tile/include/arch/chip_tilepro.h
+++ b/arch/tile/include/uapi/arch/chip_tilepro.h
diff --git a/arch/tile/include/arch/icache.h b/arch/tile/include/uapi/arch/icache.h
index 762eafa8a11..762eafa8a11 100644
--- a/arch/tile/include/arch/icache.h
+++ b/arch/tile/include/uapi/arch/icache.h
diff --git a/arch/tile/include/arch/interrupts.h b/arch/tile/include/uapi/arch/interrupts.h
index 20f8f07d2de..20f8f07d2de 100644
--- a/arch/tile/include/arch/interrupts.h
+++ b/arch/tile/include/uapi/arch/interrupts.h
diff --git a/arch/tile/include/uapi/arch/interrupts_32.h b/arch/tile/include/uapi/arch/interrupts_32.h
new file mode 100644
index 00000000000..2efe3f68b2d
--- /dev/null
+++ b/arch/tile/include/uapi/arch/interrupts_32.h
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __ARCH_INTERRUPTS_H__
+#define __ARCH_INTERRUPTS_H__
+
+#ifndef __KERNEL__
+/** Mask for an interrupt. */
+/* Note: must handle breaking interrupts into high and low words manually. */
+#define INT_MASK_LO(intno) (1 << (intno))
+#define INT_MASK_HI(intno) (1 << ((intno) - 32))
+
+#ifndef __ASSEMBLER__
+#define INT_MASK(intno) (1ULL << (intno))
+#endif
+#endif
+
+
+/** Where a given interrupt executes */
+#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8))
+
+/** Where to store a vector for a given interrupt. */
+#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0)
+
+/** The base address of user-level interrupts. */
+#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0)
+
+
+/** Additional synthetic interrupt. */
+#define INT_BREAKPOINT (63)
+
+#define INT_ITLB_MISS 0
+#define INT_MEM_ERROR 1
+#define INT_ILL 2
+#define INT_GPV 3
+#define INT_SN_ACCESS 4
+#define INT_IDN_ACCESS 5
+#define INT_UDN_ACCESS 6
+#define INT_IDN_REFILL 7
+#define INT_UDN_REFILL 8
+#define INT_IDN_COMPLETE 9
+#define INT_UDN_COMPLETE 10
+#define INT_SWINT_3 11
+#define INT_SWINT_2 12
+#define INT_SWINT_1 13
+#define INT_SWINT_0 14
+#define INT_UNALIGN_DATA 15
+#define INT_DTLB_MISS 16
+#define INT_DTLB_ACCESS 17
+#define INT_DMATLB_MISS 18
+#define INT_DMATLB_ACCESS 19
+#define INT_SNITLB_MISS 20
+#define INT_SN_NOTIFY 21
+#define INT_SN_FIREWALL 22
+#define INT_IDN_FIREWALL 23
+#define INT_UDN_FIREWALL 24
+#define INT_TILE_TIMER 25
+#define INT_IDN_TIMER 26
+#define INT_UDN_TIMER 27
+#define INT_DMA_NOTIFY 28
+#define INT_IDN_CA 29
+#define INT_UDN_CA 30
+#define INT_IDN_AVAIL 31
+#define INT_UDN_AVAIL 32
+#define INT_PERF_COUNT 33
+#define INT_INTCTRL_3 34
+#define INT_INTCTRL_2 35
+#define INT_INTCTRL_1 36
+#define INT_INTCTRL_0 37
+#define INT_BOOT_ACCESS 38
+#define INT_WORLD_ACCESS 39
+#define INT_I_ASID 40
+#define INT_D_ASID 41
+#define INT_DMA_ASID 42
+#define INT_SNI_ASID 43
+#define INT_DMA_CPL 44
+#define INT_SN_CPL 45
+#define INT_DOUBLE_FAULT 46
+#define INT_SN_STATIC_ACCESS 47
+#define INT_AUX_PERF_COUNT 48
+
+#define NUM_INTERRUPTS 49
+
+#ifndef __ASSEMBLER__
+#define QUEUED_INTERRUPTS ( \
+ (1ULL << INT_MEM_ERROR) | \
+ (1ULL << INT_DMATLB_MISS) | \
+ (1ULL << INT_DMATLB_ACCESS) | \
+ (1ULL << INT_SNITLB_MISS) | \
+ (1ULL << INT_SN_NOTIFY) | \
+ (1ULL << INT_SN_FIREWALL) | \
+ (1ULL << INT_IDN_FIREWALL) | \
+ (1ULL << INT_UDN_FIREWALL) | \
+ (1ULL << INT_TILE_TIMER) | \
+ (1ULL << INT_IDN_TIMER) | \
+ (1ULL << INT_UDN_TIMER) | \
+ (1ULL << INT_DMA_NOTIFY) | \
+ (1ULL << INT_IDN_CA) | \
+ (1ULL << INT_UDN_CA) | \
+ (1ULL << INT_IDN_AVAIL) | \
+ (1ULL << INT_UDN_AVAIL) | \
+ (1ULL << INT_PERF_COUNT) | \
+ (1ULL << INT_INTCTRL_3) | \
+ (1ULL << INT_INTCTRL_2) | \
+ (1ULL << INT_INTCTRL_1) | \
+ (1ULL << INT_INTCTRL_0) | \
+ (1ULL << INT_BOOT_ACCESS) | \
+ (1ULL << INT_WORLD_ACCESS) | \
+ (1ULL << INT_I_ASID) | \
+ (1ULL << INT_D_ASID) | \
+ (1ULL << INT_DMA_ASID) | \
+ (1ULL << INT_SNI_ASID) | \
+ (1ULL << INT_DMA_CPL) | \
+ (1ULL << INT_SN_CPL) | \
+ (1ULL << INT_DOUBLE_FAULT) | \
+ (1ULL << INT_AUX_PERF_COUNT) | \
+ 0)
+#define NONQUEUED_INTERRUPTS ( \
+ (1ULL << INT_ITLB_MISS) | \
+ (1ULL << INT_ILL) | \
+ (1ULL << INT_GPV) | \
+ (1ULL << INT_SN_ACCESS) | \
+ (1ULL << INT_IDN_ACCESS) | \
+ (1ULL << INT_UDN_ACCESS) | \
+ (1ULL << INT_IDN_REFILL) | \
+ (1ULL << INT_UDN_REFILL) | \
+ (1ULL << INT_IDN_COMPLETE) | \
+ (1ULL << INT_UDN_COMPLETE) | \
+ (1ULL << INT_SWINT_3) | \
+ (1ULL << INT_SWINT_2) | \
+ (1ULL << INT_SWINT_1) | \
+ (1ULL << INT_SWINT_0) | \
+ (1ULL << INT_UNALIGN_DATA) | \
+ (1ULL << INT_DTLB_MISS) | \
+ (1ULL << INT_DTLB_ACCESS) | \
+ (1ULL << INT_SN_STATIC_ACCESS) | \
+ 0)
+#define CRITICAL_MASKED_INTERRUPTS ( \
+ (1ULL << INT_MEM_ERROR) | \
+ (1ULL << INT_DMATLB_MISS) | \
+ (1ULL << INT_DMATLB_ACCESS) | \
+ (1ULL << INT_SNITLB_MISS) | \
+ (1ULL << INT_SN_NOTIFY) | \
+ (1ULL << INT_SN_FIREWALL) | \
+ (1ULL << INT_IDN_FIREWALL) | \
+ (1ULL << INT_UDN_FIREWALL) | \
+ (1ULL << INT_TILE_TIMER) | \
+ (1ULL << INT_IDN_TIMER) | \
+ (1ULL << INT_UDN_TIMER) | \
+ (1ULL << INT_DMA_NOTIFY) | \
+ (1ULL << INT_IDN_CA) | \
+ (1ULL << INT_UDN_CA) | \
+ (1ULL << INT_IDN_AVAIL) | \
+ (1ULL << INT_UDN_AVAIL) | \
+ (1ULL << INT_PERF_COUNT) | \
+ (1ULL << INT_INTCTRL_3) | \
+ (1ULL << INT_INTCTRL_2) | \
+ (1ULL << INT_INTCTRL_1) | \
+ (1ULL << INT_INTCTRL_0) | \
+ (1ULL << INT_AUX_PERF_COUNT) | \
+ 0)
+#define CRITICAL_UNMASKED_INTERRUPTS ( \
+ (1ULL << INT_ITLB_MISS) | \
+ (1ULL << INT_ILL) | \
+ (1ULL << INT_GPV) | \
+ (1ULL << INT_SN_ACCESS) | \
+ (1ULL << INT_IDN_ACCESS) | \
+ (1ULL << INT_UDN_ACCESS) | \
+ (1ULL << INT_IDN_REFILL) | \
+ (1ULL << INT_UDN_REFILL) | \
+ (1ULL << INT_IDN_COMPLETE) | \
+ (1ULL << INT_UDN_COMPLETE) | \
+ (1ULL << INT_SWINT_3) | \
+ (1ULL << INT_SWINT_2) | \
+ (1ULL << INT_SWINT_1) | \
+ (1ULL << INT_SWINT_0) | \
+ (1ULL << INT_UNALIGN_DATA) | \
+ (1ULL << INT_DTLB_MISS) | \
+ (1ULL << INT_DTLB_ACCESS) | \
+ (1ULL << INT_BOOT_ACCESS) | \
+ (1ULL << INT_WORLD_ACCESS) | \
+ (1ULL << INT_I_ASID) | \
+ (1ULL << INT_D_ASID) | \
+ (1ULL << INT_DMA_ASID) | \
+ (1ULL << INT_SNI_ASID) | \
+ (1ULL << INT_DMA_CPL) | \
+ (1ULL << INT_SN_CPL) | \
+ (1ULL << INT_DOUBLE_FAULT) | \
+ (1ULL << INT_SN_STATIC_ACCESS) | \
+ 0)
+#define MASKABLE_INTERRUPTS ( \
+ (1ULL << INT_MEM_ERROR) | \
+ (1ULL << INT_IDN_REFILL) | \
+ (1ULL << INT_UDN_REFILL) | \
+ (1ULL << INT_IDN_COMPLETE) | \
+ (1ULL << INT_UDN_COMPLETE) | \
+ (1ULL << INT_DMATLB_MISS) | \
+ (1ULL << INT_DMATLB_ACCESS) | \
+ (1ULL << INT_SNITLB_MISS) | \
+ (1ULL << INT_SN_NOTIFY) | \
+ (1ULL << INT_SN_FIREWALL) | \
+ (1ULL << INT_IDN_FIREWALL) | \
+ (1ULL << INT_UDN_FIREWALL) | \
+ (1ULL << INT_TILE_TIMER) | \
+ (1ULL << INT_IDN_TIMER) | \
+ (1ULL << INT_UDN_TIMER) | \
+ (1ULL << INT_DMA_NOTIFY) | \
+ (1ULL << INT_IDN_CA) | \
+ (1ULL << INT_UDN_CA) | \
+ (1ULL << INT_IDN_AVAIL) | \
+ (1ULL << INT_UDN_AVAIL) | \
+ (1ULL << INT_PERF_COUNT) | \
+ (1ULL << INT_INTCTRL_3) | \
+ (1ULL << INT_INTCTRL_2) | \
+ (1ULL << INT_INTCTRL_1) | \
+ (1ULL << INT_INTCTRL_0) | \
+ (1ULL << INT_AUX_PERF_COUNT) | \
+ 0)
+#define UNMASKABLE_INTERRUPTS ( \
+ (1ULL << INT_ITLB_MISS) | \
+ (1ULL << INT_ILL) | \
+ (1ULL << INT_GPV) | \
+ (1ULL << INT_SN_ACCESS) | \
+ (1ULL << INT_IDN_ACCESS) | \
+ (1ULL << INT_UDN_ACCESS) | \
+ (1ULL << INT_SWINT_3) | \
+ (1ULL << INT_SWINT_2) | \
+ (1ULL << INT_SWINT_1) | \
+ (1ULL << INT_SWINT_0) | \
+ (1ULL << INT_UNALIGN_DATA) | \
+ (1ULL << INT_DTLB_MISS) | \
+ (1ULL << INT_DTLB_ACCESS) | \
+ (1ULL << INT_BOOT_ACCESS) | \
+ (1ULL << INT_WORLD_ACCESS) | \
+ (1ULL << INT_I_ASID) | \
+ (1ULL << INT_D_ASID) | \
+ (1ULL << INT_DMA_ASID) | \
+ (1ULL << INT_SNI_ASID) | \
+ (1ULL << INT_DMA_CPL) | \
+ (1ULL << INT_SN_CPL) | \
+ (1ULL << INT_DOUBLE_FAULT) | \
+ (1ULL << INT_SN_STATIC_ACCESS) | \
+ 0)
+#define SYNC_INTERRUPTS ( \
+ (1ULL << INT_ITLB_MISS) | \
+ (1ULL << INT_ILL) | \
+ (1ULL << INT_GPV) | \
+ (1ULL << INT_SN_ACCESS) | \
+ (1ULL << INT_IDN_ACCESS) | \
+ (1ULL << INT_UDN_ACCESS) | \
+ (1ULL << INT_IDN_REFILL) | \
+ (1ULL << INT_UDN_REFILL) | \
+ (1ULL << INT_IDN_COMPLETE) | \
+ (1ULL << INT_UDN_COMPLETE) | \
+ (1ULL << INT_SWINT_3) | \
+ (1ULL << INT_SWINT_2) | \
+ (1ULL << INT_SWINT_1) | \
+ (1ULL << INT_SWINT_0) | \
+ (1ULL << INT_UNALIGN_DATA) | \
+ (1ULL << INT_DTLB_MISS) | \
+ (1ULL << INT_DTLB_ACCESS) | \
+ (1ULL << INT_SN_STATIC_ACCESS) | \
+ 0)
+#define NON_SYNC_INTERRUPTS ( \
+ (1ULL << INT_MEM_ERROR) | \
+ (1ULL << INT_DMATLB_MISS) | \
+ (1ULL << INT_DMATLB_ACCESS) | \
+ (1ULL << INT_SNITLB_MISS) | \
+ (1ULL << INT_SN_NOTIFY) | \
+ (1ULL << INT_SN_FIREWALL) | \
+ (1ULL << INT_IDN_FIREWALL) | \
+ (1ULL << INT_UDN_FIREWALL) | \
+ (1ULL << INT_TILE_TIMER) | \
+ (1ULL << INT_IDN_TIMER) | \
+ (1ULL << INT_UDN_TIMER) | \
+ (1ULL << INT_DMA_NOTIFY) | \
+ (1ULL << INT_IDN_CA) | \
+ (1ULL << INT_UDN_CA) | \
+ (1ULL << INT_IDN_AVAIL) | \
+ (1ULL << INT_UDN_AVAIL) | \
+ (1ULL << INT_PERF_COUNT) | \
+ (1ULL << INT_INTCTRL_3) | \
+ (1ULL << INT_INTCTRL_2) | \
+ (1ULL << INT_INTCTRL_1) | \
+ (1ULL << INT_INTCTRL_0) | \
+ (1ULL << INT_BOOT_ACCESS) | \
+ (1ULL << INT_WORLD_ACCESS) | \
+ (1ULL << INT_I_ASID) | \
+ (1ULL << INT_D_ASID) | \
+ (1ULL << INT_DMA_ASID) | \
+ (1ULL << INT_SNI_ASID) | \
+ (1ULL << INT_DMA_CPL) | \
+ (1ULL << INT_SN_CPL) | \
+ (1ULL << INT_DOUBLE_FAULT) | \
+ (1ULL << INT_AUX_PERF_COUNT) | \
+ 0)
+#endif /* !__ASSEMBLER__ */
+#endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/arch/tile/include/uapi/arch/interrupts_64.h b/arch/tile/include/uapi/arch/interrupts_64.h
new file mode 100644
index 00000000000..13c9f918234
--- /dev/null
+++ b/arch/tile/include/uapi/arch/interrupts_64.h
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __ARCH_INTERRUPTS_H__
+#define __ARCH_INTERRUPTS_H__
+
+#ifndef __KERNEL__
+/** Mask for an interrupt. */
+#ifdef __ASSEMBLER__
+/* Note: must handle breaking interrupts into high and low words manually. */
+#define INT_MASK(intno) (1 << (intno))
+#else
+#define INT_MASK(intno) (1ULL << (intno))
+#endif
+#endif
+
+
+/** Where a given interrupt executes */
+#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8))
+
+/** Where to store a vector for a given interrupt. */
+#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0)
+
+/** The base address of user-level interrupts. */
+#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0)
+
+
+/** Additional synthetic interrupt. */
+#define INT_BREAKPOINT (63)
+
+#define INT_MEM_ERROR 0
+#define INT_SINGLE_STEP_3 1
+#define INT_SINGLE_STEP_2 2
+#define INT_SINGLE_STEP_1 3
+#define INT_SINGLE_STEP_0 4
+#define INT_IDN_COMPLETE 5
+#define INT_UDN_COMPLETE 6
+#define INT_ITLB_MISS 7
+#define INT_ILL 8
+#define INT_GPV 9
+#define INT_IDN_ACCESS 10
+#define INT_UDN_ACCESS 11
+#define INT_SWINT_3 12
+#define INT_SWINT_2 13
+#define INT_SWINT_1 14
+#define INT_SWINT_0 15
+#define INT_ILL_TRANS 16
+#define INT_UNALIGN_DATA 17
+#define INT_DTLB_MISS 18
+#define INT_DTLB_ACCESS 19
+#define INT_IDN_FIREWALL 20
+#define INT_UDN_FIREWALL 21
+#define INT_TILE_TIMER 22
+#define INT_AUX_TILE_TIMER 23
+#define INT_IDN_TIMER 24
+#define INT_UDN_TIMER 25
+#define INT_IDN_AVAIL 26
+#define INT_UDN_AVAIL 27
+#define INT_IPI_3 28
+#define INT_IPI_2 29
+#define INT_IPI_1 30
+#define INT_IPI_0 31
+#define INT_PERF_COUNT 32
+#define INT_AUX_PERF_COUNT 33
+#define INT_INTCTRL_3 34
+#define INT_INTCTRL_2 35
+#define INT_INTCTRL_1 36
+#define INT_INTCTRL_0 37
+#define INT_BOOT_ACCESS 38
+#define INT_WORLD_ACCESS 39
+#define INT_I_ASID 40
+#define INT_D_ASID 41
+#define INT_DOUBLE_FAULT 42
+
+#define NUM_INTERRUPTS 43
+
+#ifndef __ASSEMBLER__
+#define QUEUED_INTERRUPTS ( \
+ (1ULL << INT_MEM_ERROR) | \
+ (1ULL << INT_IDN_COMPLETE) | \
+ (1ULL << INT_UDN_COMPLETE) | \
+ (1ULL << INT_IDN_FIREWALL) | \
+ (1ULL << INT_UDN_FIREWALL) | \
+ (1ULL << INT_TILE_TIMER) | \
+ (1ULL << INT_AUX_TILE_TIMER) | \
+ (1ULL << INT_IDN_TIMER) | \
+ (1ULL << INT_UDN_TIMER) | \
+ (1ULL << INT_IDN_AVAIL) | \
+ (1ULL << INT_UDN_AVAIL) | \
+ (1ULL << INT_IPI_3) | \
+ (1ULL << INT_IPI_2) | \
+ (1ULL << INT_IPI_1) | \
+ (1ULL << INT_IPI_0) | \
+ (1ULL << INT_PERF_COUNT) | \
+ (1ULL << INT_AUX_PERF_COUNT) | \
+ (1ULL << INT_INTCTRL_3) | \
+ (1ULL << INT_INTCTRL_2) | \
+ (1ULL << INT_INTCTRL_1) | \
+ (1ULL << INT_INTCTRL_0) | \
+ (1ULL << INT_BOOT_ACCESS) | \
+ (1ULL << INT_WORLD_ACCESS) | \
+ (1ULL << INT_I_ASID) | \
+ (1ULL << INT_D_ASID) | \
+ (1ULL << INT_DOUBLE_FAULT) | \
+ 0)
+#define NONQUEUED_INTERRUPTS ( \
+ (1ULL << INT_SINGLE_STEP_3) | \
+ (1ULL << INT_SINGLE_STEP_2) | \
+ (1ULL << INT_SINGLE_STEP_1) | \
+ (1ULL << INT_SINGLE_STEP_0) | \
+ (1ULL << INT_ITLB_MISS) | \
+ (1ULL << INT_ILL) | \
+ (1ULL << INT_GPV) | \
+ (1ULL << INT_IDN_ACCESS) | \
+ (1ULL << INT_UDN_ACCESS) | \
+ (1ULL << INT_SWINT_3) | \
+ (1ULL << INT_SWINT_2) | \
+ (1ULL << INT_SWINT_1) | \
+ (1ULL << INT_SWINT_0) | \
+ (1ULL << INT_ILL_TRANS) | \
+ (1ULL << INT_UNALIGN_DATA) | \
+ (1ULL << INT_DTLB_MISS) | \
+ (1ULL << INT_DTLB_ACCESS) | \
+ 0)
+#define CRITICAL_MASKED_INTERRUPTS ( \
+ (1ULL << INT_MEM_ERROR) | \
+ (1ULL << INT_SINGLE_STEP_3) | \
+ (1ULL << INT_SINGLE_STEP_2) | \
+ (1ULL << INT_SINGLE_STEP_1) | \
+ (1ULL << INT_SINGLE_STEP_0) | \
+ (1ULL << INT_IDN_COMPLETE) | \
+ (1ULL << INT_UDN_COMPLETE) | \
+ (1ULL << INT_IDN_FIREWALL) | \
+ (1ULL << INT_UDN_FIREWALL) | \
+ (1ULL << INT_TILE_TIMER) | \
+ (1ULL << INT_AUX_TILE_TIMER) | \
+ (1ULL << INT_IDN_TIMER) | \
+ (1ULL << INT_UDN_TIMER) | \
+ (1ULL << INT_IDN_AVAIL) | \
+ (1ULL << INT_UDN_AVAIL) | \
+ (1ULL << INT_IPI_3) | \
+ (1ULL << INT_IPI_2) | \
+ (1ULL << INT_IPI_1) | \
+ (1ULL << INT_IPI_0) | \
+ (1ULL << INT_PERF_COUNT) | \
+ (1ULL << INT_AUX_PERF_COUNT) | \
+ (1ULL << INT_INTCTRL_3) | \
+ (1ULL << INT_INTCTRL_2) | \
+ (1ULL << INT_INTCTRL_1) | \
+ (1ULL << INT_INTCTRL_0) | \
+ 0)
+#define CRITICAL_UNMASKED_INTERRUPTS ( \
+ (1ULL << INT_ITLB_MISS) | \
+ (1ULL << INT_ILL) | \
+ (1ULL << INT_GPV) | \
+ (1ULL << INT_IDN_ACCESS) | \
+ (1ULL << INT_UDN_ACCESS) | \
+ (1ULL << INT_SWINT_3) | \
+ (1ULL << INT_SWINT_2) | \
+ (1ULL << INT_SWINT_1) | \
+ (1ULL << INT_SWINT_0) | \
+ (1ULL << INT_ILL_TRANS) | \
+ (1ULL << INT_UNALIGN_DATA) | \
+ (1ULL << INT_DTLB_MISS) | \
+ (1ULL << INT_DTLB_ACCESS) | \
+ (1ULL << INT_BOOT_ACCESS) | \
+ (1ULL << INT_WORLD_ACCESS) | \
+ (1ULL << INT_I_ASID) | \
+ (1ULL << INT_D_ASID) | \
+ (1ULL << INT_DOUBLE_FAULT) | \
+ 0)
+#define MASKABLE_INTERRUPTS ( \
+ (1ULL << INT_MEM_ERROR) | \
+ (1ULL << INT_SINGLE_STEP_3) | \
+ (1ULL << INT_SINGLE_STEP_2) | \
+ (1ULL << INT_SINGLE_STEP_1) | \
+ (1ULL << INT_SINGLE_STEP_0) | \
+ (1ULL << INT_IDN_COMPLETE) | \
+ (1ULL << INT_UDN_COMPLETE) | \
+ (1ULL << INT_IDN_FIREWALL) | \
+ (1ULL << INT_UDN_FIREWALL) | \
+ (1ULL << INT_TILE_TIMER) | \
+ (1ULL << INT_AUX_TILE_TIMER) | \
+ (1ULL << INT_IDN_TIMER) | \
+ (1ULL << INT_UDN_TIMER) | \
+ (1ULL << INT_IDN_AVAIL) | \
+ (1ULL << INT_UDN_AVAIL) | \
+ (1ULL << INT_IPI_3) | \
+ (1ULL << INT_IPI_2) | \
+ (1ULL << INT_IPI_1) | \
+ (1ULL << INT_IPI_0) | \
+ (1ULL << INT_PERF_COUNT) | \
+ (1ULL << INT_AUX_PERF_COUNT) | \
+ (1ULL << INT_INTCTRL_3) | \
+ (1ULL << INT_INTCTRL_2) | \
+ (1ULL << INT_INTCTRL_1) | \
+ (1ULL << INT_INTCTRL_0) | \
+ 0)
+#define UNMASKABLE_INTERRUPTS ( \
+ (1ULL << INT_ITLB_MISS) | \
+ (1ULL << INT_ILL) | \
+ (1ULL << INT_GPV) | \
+ (1ULL << INT_IDN_ACCESS) | \
+ (1ULL << INT_UDN_ACCESS) | \
+ (1ULL << INT_SWINT_3) | \
+ (1ULL << INT_SWINT_2) | \
+ (1ULL << INT_SWINT_1) | \
+ (1ULL << INT_SWINT_0) | \
+ (1ULL << INT_ILL_TRANS) | \
+ (1ULL << INT_UNALIGN_DATA) | \
+ (1ULL << INT_DTLB_MISS) | \
+ (1ULL << INT_DTLB_ACCESS) | \
+ (1ULL << INT_BOOT_ACCESS) | \
+ (1ULL << INT_WORLD_ACCESS) | \
+ (1ULL << INT_I_ASID) | \
+ (1ULL << INT_D_ASID) | \
+ (1ULL << INT_DOUBLE_FAULT) | \
+ 0)
+#define SYNC_INTERRUPTS ( \
+ (1ULL << INT_SINGLE_STEP_3) | \
+ (1ULL << INT_SINGLE_STEP_2) | \
+ (1ULL << INT_SINGLE_STEP_1) | \
+ (1ULL << INT_SINGLE_STEP_0) | \
+ (1ULL << INT_IDN_COMPLETE) | \
+ (1ULL << INT_UDN_COMPLETE) | \
+ (1ULL << INT_ITLB_MISS) | \
+ (1ULL << INT_ILL) | \
+ (1ULL << INT_GPV) | \
+ (1ULL << INT_IDN_ACCESS) | \
+ (1ULL << INT_UDN_ACCESS) | \
+ (1ULL << INT_SWINT_3) | \
+ (1ULL << INT_SWINT_2) | \
+ (1ULL << INT_SWINT_1) | \
+ (1ULL << INT_SWINT_0) | \
+ (1ULL << INT_ILL_TRANS) | \
+ (1ULL << INT_UNALIGN_DATA) | \
+ (1ULL << INT_DTLB_MISS) | \
+ (1ULL << INT_DTLB_ACCESS) | \
+ 0)
+#define NON_SYNC_INTERRUPTS ( \
+ (1ULL << INT_MEM_ERROR) | \
+ (1ULL << INT_IDN_FIREWALL) | \
+ (1ULL << INT_UDN_FIREWALL) | \
+ (1ULL << INT_TILE_TIMER) | \
+ (1ULL << INT_AUX_TILE_TIMER) | \
+ (1ULL << INT_IDN_TIMER) | \
+ (1ULL << INT_UDN_TIMER) | \
+ (1ULL << INT_IDN_AVAIL) | \
+ (1ULL << INT_UDN_AVAIL) | \
+ (1ULL << INT_IPI_3) | \
+ (1ULL << INT_IPI_2) | \
+ (1ULL << INT_IPI_1) | \
+ (1ULL << INT_IPI_0) | \
+ (1ULL << INT_PERF_COUNT) | \
+ (1ULL << INT_AUX_PERF_COUNT) | \
+ (1ULL << INT_INTCTRL_3) | \
+ (1ULL << INT_INTCTRL_2) | \
+ (1ULL << INT_INTCTRL_1) | \
+ (1ULL << INT_INTCTRL_0) | \
+ (1ULL << INT_BOOT_ACCESS) | \
+ (1ULL << INT_WORLD_ACCESS) | \
+ (1ULL << INT_I_ASID) | \
+ (1ULL << INT_D_ASID) | \
+ (1ULL << INT_DOUBLE_FAULT) | \
+ 0)
+#endif /* !__ASSEMBLER__ */
+#endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/arch/tile/include/arch/opcode.h b/arch/tile/include/uapi/arch/opcode.h
index 92d15229ece..92d15229ece 100644
--- a/arch/tile/include/arch/opcode.h
+++ b/arch/tile/include/uapi/arch/opcode.h
diff --git a/arch/tile/include/arch/opcode_tilegx.h b/arch/tile/include/uapi/arch/opcode_tilegx.h
index c14d02c8160..d76ff2db745 100644
--- a/arch/tile/include/arch/opcode_tilegx.h
+++ b/arch/tile/include/uapi/arch/opcode_tilegx.h
@@ -61,6 +61,7 @@ typedef tilegx_bundle_bits tile_bundle_bits;
#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES
#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \
TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES
+#define TILE_BPT_BUNDLE TILEGX_BPT_BUNDLE
/* 64-bit pattern for a { bpt ; nop } bundle. */
#define TILEGX_BPT_BUNDLE 0x286a44ae51485000ULL
diff --git a/arch/tile/include/arch/opcode_tilepro.h b/arch/tile/include/uapi/arch/opcode_tilepro.h
index 71b763b8ce8..4451cff1a86 100644
--- a/arch/tile/include/arch/opcode_tilepro.h
+++ b/arch/tile/include/uapi/arch/opcode_tilepro.h
@@ -71,6 +71,7 @@ typedef tilepro_bundle_bits tile_bundle_bits;
#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES
#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \
TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES
+#define TILE_BPT_BUNDLE TILEPRO_BPT_BUNDLE
/* 64-bit pattern for a { bpt ; nop } bundle. */
#define TILEPRO_BPT_BUNDLE 0x400b3cae70166000ULL
diff --git a/arch/tile/include/arch/sim.h b/arch/tile/include/uapi/arch/sim.h
index e54b7b0527f..e54b7b0527f 100644
--- a/arch/tile/include/arch/sim.h
+++ b/arch/tile/include/uapi/arch/sim.h
diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/uapi/arch/sim_def.h
index 4b44a2b6a09..4b44a2b6a09 100644
--- a/arch/tile/include/arch/sim_def.h
+++ b/arch/tile/include/uapi/arch/sim_def.h
diff --git a/arch/tile/include/uapi/arch/spr_def.h b/arch/tile/include/uapi/arch/spr_def.h
new file mode 100644
index 00000000000..c250c5adb1a
--- /dev/null
+++ b/arch/tile/include/uapi/arch/spr_def.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _UAPI__ARCH_SPR_DEF_H__
+#define _UAPI__ARCH_SPR_DEF_H__
+
+/* Include the proper base SPR definition file. */
+#ifdef __tilegx__
+#include <arch/spr_def_64.h>
+#else
+#include <arch/spr_def_32.h>
+#endif
+
+
+#endif /* _UAPI__ARCH_SPR_DEF_H__ */
diff --git a/arch/tile/include/arch/spr_def_32.h b/arch/tile/include/uapi/arch/spr_def_32.h
index bbc1f4c924e..78daa3146d2 100644
--- a/arch/tile/include/arch/spr_def_32.h
+++ b/arch/tile/include/uapi/arch/spr_def_32.h
@@ -14,8 +14,8 @@
#ifndef __DOXYGEN__
-#ifndef __ARCH_SPR_DEF_H__
-#define __ARCH_SPR_DEF_H__
+#ifndef __ARCH_SPR_DEF_32_H__
+#define __ARCH_SPR_DEF_32_H__
#define SPR_AUX_PERF_COUNT_0 0x6005
#define SPR_AUX_PERF_COUNT_1 0x6006
@@ -65,6 +65,31 @@
#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1
#define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4
#define SPR_FAIL 0x4e09
+#define SPR_IDN_AVAIL_EN 0x3e05
+#define SPR_IDN_CA_DATA 0x0b00
+#define SPR_IDN_DATA_AVAIL 0x0b03
+#define SPR_IDN_DEADLOCK_TIMEOUT 0x3406
+#define SPR_IDN_DEMUX_CA_COUNT 0x0a05
+#define SPR_IDN_DEMUX_COUNT_0 0x0a06
+#define SPR_IDN_DEMUX_COUNT_1 0x0a07
+#define SPR_IDN_DEMUX_CTL 0x0a08
+#define SPR_IDN_DEMUX_QUEUE_SEL 0x0a0a
+#define SPR_IDN_DEMUX_STATUS 0x0a0b
+#define SPR_IDN_DEMUX_WRITE_FIFO 0x0a0c
+#define SPR_IDN_DIRECTION_PROTECT 0x2e05
+#define SPR_IDN_PENDING 0x0a0e
+#define SPR_IDN_REFILL_EN 0x0e05
+#define SPR_IDN_SP_FIFO_DATA 0x0a0f
+#define SPR_IDN_SP_FIFO_SEL 0x0a10
+#define SPR_IDN_SP_FREEZE 0x0a11
+#define SPR_IDN_SP_FREEZE__SP_FRZ_MASK 0x1
+#define SPR_IDN_SP_FREEZE__DEMUX_FRZ_MASK 0x2
+#define SPR_IDN_SP_FREEZE__NON_DEST_EXT_MASK 0x4
+#define SPR_IDN_SP_STATE 0x0a12
+#define SPR_IDN_TAG_0 0x0a13
+#define SPR_IDN_TAG_1 0x0a14
+#define SPR_IDN_TAG_VALID 0x0a15
+#define SPR_IDN_TILE_COORD 0x0a16
#define SPR_INTCTRL_0_STATUS 0x4a07
#define SPR_INTCTRL_1_STATUS 0x4807
#define SPR_INTCTRL_2_STATUS 0x4607
@@ -87,12 +112,36 @@
#define SPR_INTERRUPT_MASK_SET_1_1 0x480e
#define SPR_INTERRUPT_MASK_SET_2_0 0x460c
#define SPR_INTERRUPT_MASK_SET_2_1 0x460d
+#define SPR_MPL_AUX_PERF_COUNT_SET_0 0x6000
+#define SPR_MPL_AUX_PERF_COUNT_SET_1 0x6001
+#define SPR_MPL_AUX_PERF_COUNT_SET_2 0x6002
#define SPR_MPL_DMA_CPL_SET_0 0x5800
#define SPR_MPL_DMA_CPL_SET_1 0x5801
#define SPR_MPL_DMA_CPL_SET_2 0x5802
#define SPR_MPL_DMA_NOTIFY_SET_0 0x3800
#define SPR_MPL_DMA_NOTIFY_SET_1 0x3801
#define SPR_MPL_DMA_NOTIFY_SET_2 0x3802
+#define SPR_MPL_IDN_ACCESS_SET_0 0x0a00
+#define SPR_MPL_IDN_ACCESS_SET_1 0x0a01
+#define SPR_MPL_IDN_ACCESS_SET_2 0x0a02
+#define SPR_MPL_IDN_AVAIL_SET_0 0x3e00
+#define SPR_MPL_IDN_AVAIL_SET_1 0x3e01
+#define SPR_MPL_IDN_AVAIL_SET_2 0x3e02
+#define SPR_MPL_IDN_CA_SET_0 0x3a00
+#define SPR_MPL_IDN_CA_SET_1 0x3a01
+#define SPR_MPL_IDN_CA_SET_2 0x3a02
+#define SPR_MPL_IDN_COMPLETE_SET_0 0x1200
+#define SPR_MPL_IDN_COMPLETE_SET_1 0x1201
+#define SPR_MPL_IDN_COMPLETE_SET_2 0x1202
+#define SPR_MPL_IDN_FIREWALL_SET_0 0x2e00
+#define SPR_MPL_IDN_FIREWALL_SET_1 0x2e01
+#define SPR_MPL_IDN_FIREWALL_SET_2 0x2e02
+#define SPR_MPL_IDN_REFILL_SET_0 0x0e00
+#define SPR_MPL_IDN_REFILL_SET_1 0x0e01
+#define SPR_MPL_IDN_REFILL_SET_2 0x0e02
+#define SPR_MPL_IDN_TIMER_SET_0 0x3400
+#define SPR_MPL_IDN_TIMER_SET_1 0x3401
+#define SPR_MPL_IDN_TIMER_SET_2 0x3402
#define SPR_MPL_INTCTRL_0_SET_0 0x4a00
#define SPR_MPL_INTCTRL_0_SET_1 0x4a01
#define SPR_MPL_INTCTRL_0_SET_2 0x4a02
@@ -102,6 +151,9 @@
#define SPR_MPL_INTCTRL_2_SET_0 0x4600
#define SPR_MPL_INTCTRL_2_SET_1 0x4601
#define SPR_MPL_INTCTRL_2_SET_2 0x4602
+#define SPR_MPL_PERF_COUNT_SET_0 0x4200
+#define SPR_MPL_PERF_COUNT_SET_1 0x4201
+#define SPR_MPL_PERF_COUNT_SET_2 0x4202
#define SPR_MPL_SN_ACCESS_SET_0 0x0800
#define SPR_MPL_SN_ACCESS_SET_1 0x0801
#define SPR_MPL_SN_ACCESS_SET_2 0x0802
@@ -148,8 +200,6 @@
#define SPR_SIM_CONTROL 0x4e0c
#define SPR_SNCTL 0x0805
#define SPR_SNCTL__FRZFABRIC_MASK 0x1
-#define SPR_SNCTL__FRZPROC_MASK 0x2
-#define SPR_SNPC 0x080b
#define SPR_SNSTATIC 0x080c
#define SPR_SYSTEM_SAVE_0_0 0x4b00
#define SPR_SYSTEM_SAVE_0_1 0x4b01
@@ -181,6 +231,7 @@
#define SPR_UDN_DEMUX_STATUS 0x0c0d
#define SPR_UDN_DEMUX_WRITE_FIFO 0x0c0e
#define SPR_UDN_DIRECTION_PROTECT 0x3005
+#define SPR_UDN_PENDING 0x0c10
#define SPR_UDN_REFILL_EN 0x1005
#define SPR_UDN_SP_FIFO_DATA 0x0c11
#define SPR_UDN_SP_FIFO_SEL 0x0c12
@@ -195,7 +246,10 @@
#define SPR_UDN_TAG_3 0x0c18
#define SPR_UDN_TAG_VALID 0x0c19
#define SPR_UDN_TILE_COORD 0x0c1a
+#define SPR_WATCH_CTL 0x4209
+#define SPR_WATCH_MASK 0x420a
+#define SPR_WATCH_VAL 0x420b
-#endif /* !defined(__ARCH_SPR_DEF_H__) */
+#endif /* !defined(__ARCH_SPR_DEF_32_H__) */
#endif /* !defined(__DOXYGEN__) */
diff --git a/arch/tile/include/arch/spr_def_64.h b/arch/tile/include/uapi/arch/spr_def_64.h
index cd3e5f95d5f..67a6c1751e3 100644
--- a/arch/tile/include/arch/spr_def_64.h
+++ b/arch/tile/include/uapi/arch/spr_def_64.h
@@ -14,8 +14,8 @@
#ifndef __DOXYGEN__
-#ifndef __ARCH_SPR_DEF_H__
-#define __ARCH_SPR_DEF_H__
+#ifndef __ARCH_SPR_DEF_64_H__
+#define __ARCH_SPR_DEF_64_H__
#define SPR_AUX_PERF_COUNT_0 0x2105
#define SPR_AUX_PERF_COUNT_1 0x2106
@@ -52,6 +52,13 @@
#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1
#define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4
#define SPR_FAIL 0x2707
+#define SPR_IDN_AVAIL_EN 0x1a05
+#define SPR_IDN_DATA_AVAIL 0x0a80
+#define SPR_IDN_DEADLOCK_TIMEOUT 0x1806
+#define SPR_IDN_DEMUX_COUNT_0 0x0a05
+#define SPR_IDN_DEMUX_COUNT_1 0x0a06
+#define SPR_IDN_DIRECTION_PROTECT 0x1405
+#define SPR_IDN_PENDING 0x0a08
#define SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK 0x1
#define SPR_INTCTRL_0_STATUS 0x2505
#define SPR_INTCTRL_1_STATUS 0x2405
@@ -88,9 +95,27 @@
#define SPR_IPI_MASK_SET_0 0x1f0a
#define SPR_IPI_MASK_SET_1 0x1e0a
#define SPR_IPI_MASK_SET_2 0x1d0a
+#define SPR_MPL_AUX_PERF_COUNT_SET_0 0x2100
+#define SPR_MPL_AUX_PERF_COUNT_SET_1 0x2101
+#define SPR_MPL_AUX_PERF_COUNT_SET_2 0x2102
#define SPR_MPL_AUX_TILE_TIMER_SET_0 0x1700
#define SPR_MPL_AUX_TILE_TIMER_SET_1 0x1701
#define SPR_MPL_AUX_TILE_TIMER_SET_2 0x1702
+#define SPR_MPL_IDN_ACCESS_SET_0 0x0a00
+#define SPR_MPL_IDN_ACCESS_SET_1 0x0a01
+#define SPR_MPL_IDN_ACCESS_SET_2 0x0a02
+#define SPR_MPL_IDN_AVAIL_SET_0 0x1a00
+#define SPR_MPL_IDN_AVAIL_SET_1 0x1a01
+#define SPR_MPL_IDN_AVAIL_SET_2 0x1a02
+#define SPR_MPL_IDN_COMPLETE_SET_0 0x0500
+#define SPR_MPL_IDN_COMPLETE_SET_1 0x0501
+#define SPR_MPL_IDN_COMPLETE_SET_2 0x0502
+#define SPR_MPL_IDN_FIREWALL_SET_0 0x1400
+#define SPR_MPL_IDN_FIREWALL_SET_1 0x1401
+#define SPR_MPL_IDN_FIREWALL_SET_2 0x1402
+#define SPR_MPL_IDN_TIMER_SET_0 0x1800
+#define SPR_MPL_IDN_TIMER_SET_1 0x1801
+#define SPR_MPL_IDN_TIMER_SET_2 0x1802
#define SPR_MPL_INTCTRL_0_SET_0 0x2500
#define SPR_MPL_INTCTRL_0_SET_1 0x2501
#define SPR_MPL_INTCTRL_0_SET_2 0x2502
@@ -100,6 +125,21 @@
#define SPR_MPL_INTCTRL_2_SET_0 0x2300
#define SPR_MPL_INTCTRL_2_SET_1 0x2301
#define SPR_MPL_INTCTRL_2_SET_2 0x2302
+#define SPR_MPL_IPI_0 0x1f04
+#define SPR_MPL_IPI_0_SET_0 0x1f00
+#define SPR_MPL_IPI_0_SET_1 0x1f01
+#define SPR_MPL_IPI_0_SET_2 0x1f02
+#define SPR_MPL_IPI_1 0x1e04
+#define SPR_MPL_IPI_1_SET_0 0x1e00
+#define SPR_MPL_IPI_1_SET_1 0x1e01
+#define SPR_MPL_IPI_1_SET_2 0x1e02
+#define SPR_MPL_IPI_2 0x1d04
+#define SPR_MPL_IPI_2_SET_0 0x1d00
+#define SPR_MPL_IPI_2_SET_1 0x1d01
+#define SPR_MPL_IPI_2_SET_2 0x1d02
+#define SPR_MPL_PERF_COUNT_SET_0 0x2000
+#define SPR_MPL_PERF_COUNT_SET_1 0x2001
+#define SPR_MPL_PERF_COUNT_SET_2 0x2002
#define SPR_MPL_UDN_ACCESS_SET_0 0x0b00
#define SPR_MPL_UDN_ACCESS_SET_1 0x0b01
#define SPR_MPL_UDN_ACCESS_SET_2 0x0b02
@@ -167,7 +207,10 @@
#define SPR_UDN_DEMUX_COUNT_2 0x0b07
#define SPR_UDN_DEMUX_COUNT_3 0x0b08
#define SPR_UDN_DIRECTION_PROTECT 0x1505
+#define SPR_UDN_PENDING 0x0b0a
+#define SPR_WATCH_MASK 0x200a
+#define SPR_WATCH_VAL 0x200b
-#endif /* !defined(__ARCH_SPR_DEF_H__) */
+#endif /* !defined(__ARCH_SPR_DEF_64_H__) */
#endif /* !defined(__DOXYGEN__) */
diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..c20db8e428b
--- /dev/null
+++ b/arch/tile/include/uapi/asm/Kbuild
@@ -0,0 +1,21 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+header-y += auxvec.h
+header-y += bitsperlong.h
+header-y += byteorder.h
+header-y += cachectl.h
+header-y += hardwall.h
+header-y += kvm_para.h
+header-y += mman.h
+header-y += ptrace.h
+header-y += setup.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += stat.h
+header-y += swab.h
+header-y += ucontext.h
+header-y += unistd.h
+
+generic-y += ucontext.h
diff --git a/arch/tile/include/asm/auxvec.h b/arch/tile/include/uapi/asm/auxvec.h
index 1d393edb064..c93e92709f1 100644
--- a/arch/tile/include/asm/auxvec.h
+++ b/arch/tile/include/uapi/asm/auxvec.h
@@ -15,6 +15,7 @@
#ifndef _ASM_TILE_AUXVEC_H
#define _ASM_TILE_AUXVEC_H
-/* No extensions to auxvec */
+/* The vDSO location. */
+#define AT_SYSINFO_EHDR 33
#endif /* _ASM_TILE_AUXVEC_H */
diff --git a/arch/tile/include/asm/bitsperlong.h b/arch/tile/include/uapi/asm/bitsperlong.h
index 58c771f2af2..58c771f2af2 100644
--- a/arch/tile/include/asm/bitsperlong.h
+++ b/arch/tile/include/uapi/asm/bitsperlong.h
diff --git a/arch/tile/include/uapi/asm/byteorder.h b/arch/tile/include/uapi/asm/byteorder.h
new file mode 100644
index 00000000000..fb72ecf4921
--- /dev/null
+++ b/arch/tile/include/uapi/asm/byteorder.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#if defined (__BIG_ENDIAN__)
+#include <linux/byteorder/big_endian.h>
+#elif defined (__LITTLE_ENDIAN__)
+#include <linux/byteorder/little_endian.h>
+#else
+#error "__BIG_ENDIAN__ or __LITTLE_ENDIAN__ must be defined."
+#endif
diff --git a/arch/tile/include/uapi/asm/cachectl.h b/arch/tile/include/uapi/asm/cachectl.h
new file mode 100644
index 00000000000..572ddcad209
--- /dev/null
+++ b/arch/tile/include/uapi/asm/cachectl.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_CACHECTL_H
+#define _ASM_TILE_CACHECTL_H
+
+/*
+ * Options for cacheflush system call.
+ *
+ * The ICACHE flush is performed on all cores currently running the
+ * current process's address space. The intent is for user
+ * applications to be able to modify code, invoke the system call,
+ * then allow arbitrary other threads in the same address space to see
+ * the newly-modified code. Passing a length of CHIP_L1I_CACHE_SIZE()
+ * or more invalidates the entire icache on all cores in the address
+ * spaces. (Note: currently this option invalidates the entire icache
+ * regardless of the requested address and length, but we may choose
+ * to honor the arguments at some point.)
+ *
+ * Flush and invalidation of memory can normally be performed with the
+ * __insn_flush() and __insn_finv() instructions from userspace.
+ * The DCACHE option to the system call allows userspace
+ * to flush the entire L1+L2 data cache from the core. In this case,
+ * the address and length arguments are not used. The DCACHE flush is
+ * restricted to the current core, not all cores in the address space.
+ */
+#define ICACHE (1<<0) /* invalidate L1 instruction cache */
+#define DCACHE (1<<1) /* flush and invalidate data cache */
+#define BCACHE (ICACHE|DCACHE) /* flush both caches */
+
+#endif /* _ASM_TILE_CACHECTL_H */
diff --git a/arch/tile/include/uapi/asm/hardwall.h b/arch/tile/include/uapi/asm/hardwall.h
new file mode 100644
index 00000000000..c2169d4f401
--- /dev/null
+++ b/arch/tile/include/uapi/asm/hardwall.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Provide methods for access control of per-cpu resources like
+ * UDN, IDN, or IPI.
+ */
+
+#ifndef _UAPI_ASM_TILE_HARDWALL_H
+#define _UAPI_ASM_TILE_HARDWALL_H
+
+#include <arch/chip.h>
+#include <linux/ioctl.h>
+
+#define HARDWALL_IOCTL_BASE 0xa2
+
+/*
+ * The HARDWALL_CREATE() ioctl is a macro with a "size" argument.
+ * The resulting ioctl value is passed to the kernel in conjunction
+ * with a pointer to a standard kernel bitmask of cpus.
+ * For network resources (UDN or IDN) the bitmask must physically
+ * represent a rectangular configuration on the chip.
+ * The "size" is the number of bytes of cpu mask data.
+ */
+#define _HARDWALL_CREATE 1
+#define HARDWALL_CREATE(size) \
+ _IOC(_IOC_READ, HARDWALL_IOCTL_BASE, _HARDWALL_CREATE, (size))
+
+#define _HARDWALL_ACTIVATE 2
+#define HARDWALL_ACTIVATE \
+ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_ACTIVATE)
+
+#define _HARDWALL_DEACTIVATE 3
+#define HARDWALL_DEACTIVATE \
+ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE)
+
+#define _HARDWALL_GET_ID 4
+#define HARDWALL_GET_ID \
+ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID)
+
+
+#endif /* _UAPI_ASM_TILE_HARDWALL_H */
diff --git a/arch/tile/include/uapi/asm/kvm_para.h b/arch/tile/include/uapi/asm/kvm_para.h
new file mode 100644
index 00000000000..14fab8f0b95
--- /dev/null
+++ b/arch/tile/include/uapi/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/tile/include/asm/mman.h b/arch/tile/include/uapi/asm/mman.h
index 81b8fc348d6..81b8fc348d6 100644
--- a/arch/tile/include/asm/mman.h
+++ b/arch/tile/include/uapi/asm/mman.h
diff --git a/arch/tile/include/uapi/asm/ptrace.h b/arch/tile/include/uapi/asm/ptrace.h
new file mode 100644
index 00000000000..7757e1985fb
--- /dev/null
+++ b/arch/tile/include/uapi/asm/ptrace.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _UAPI_ASM_TILE_PTRACE_H
+#define _UAPI_ASM_TILE_PTRACE_H
+
+#include <arch/chip.h>
+#include <arch/abi.h>
+
+/* These must match struct pt_regs, below. */
+#if CHIP_WORD_SIZE() == 32
+#define PTREGS_OFFSET_REG(n) ((n)*4)
+#else
+#define PTREGS_OFFSET_REG(n) ((n)*8)
+#endif
+#define PTREGS_OFFSET_BASE 0
+#define PTREGS_OFFSET_TP PTREGS_OFFSET_REG(53)
+#define PTREGS_OFFSET_SP PTREGS_OFFSET_REG(54)
+#define PTREGS_OFFSET_LR PTREGS_OFFSET_REG(55)
+#define PTREGS_NR_GPRS 56
+#define PTREGS_OFFSET_PC PTREGS_OFFSET_REG(56)
+#define PTREGS_OFFSET_EX1 PTREGS_OFFSET_REG(57)
+#define PTREGS_OFFSET_FAULTNUM PTREGS_OFFSET_REG(58)
+#define PTREGS_OFFSET_ORIG_R0 PTREGS_OFFSET_REG(59)
+#define PTREGS_OFFSET_FLAGS PTREGS_OFFSET_REG(60)
+#if CHIP_HAS_CMPEXCH()
+#define PTREGS_OFFSET_CMPEXCH PTREGS_OFFSET_REG(61)
+#endif
+#define PTREGS_SIZE PTREGS_OFFSET_REG(64)
+
+
+#ifndef __ASSEMBLY__
+
+#ifndef __KERNEL__
+/* Provide appropriate length type to userspace regardless of -m32/-m64. */
+typedef uint_reg_t pt_reg_t;
+#endif
+
+/*
+ * This struct defines the way the registers are stored on the stack during a
+ * system call or exception. "struct sigcontext" has the same shape.
+ */
+struct pt_regs {
+ /* Saved main processor registers; 56..63 are special. */
+ /* tp, sp, and lr must immediately follow regs[] for aliasing. */
+ pt_reg_t regs[53];
+ pt_reg_t tp; /* aliases regs[TREG_TP] */
+ pt_reg_t sp; /* aliases regs[TREG_SP] */
+ pt_reg_t lr; /* aliases regs[TREG_LR] */
+
+ /* Saved special registers. */
+ pt_reg_t pc; /* stored in EX_CONTEXT_K_0 */
+ pt_reg_t ex1; /* stored in EX_CONTEXT_K_1 (PL and ICS bit) */
+ pt_reg_t faultnum; /* fault number (INT_SWINT_1 for syscall) */
+ pt_reg_t orig_r0; /* r0 at syscall entry, else zero */
+ pt_reg_t flags; /* flags (see below) */
+#if !CHIP_HAS_CMPEXCH()
+ pt_reg_t pad[3];
+#else
+ pt_reg_t cmpexch; /* value of CMPEXCH_VALUE SPR at interrupt */
+ pt_reg_t pad[2];
+#endif
+};
+
+#endif /* __ASSEMBLY__ */
+
+#define PTRACE_GETREGS 12
+#define PTRACE_SETREGS 13
+#define PTRACE_GETFPREGS 14
+#define PTRACE_SETFPREGS 15
+
+/* Support TILE-specific ptrace options, with events starting at 16. */
+#define PTRACE_EVENT_MIGRATE 16
+#define PTRACE_O_TRACEMIGRATE (1 << PTRACE_EVENT_MIGRATE)
+
+/*
+ * Flag bits in pt_regs.flags that are part of the ptrace API.
+ * We start our numbering higher up to avoid confusion with the
+ * non-ABI kernel-internal values that use the low 16 bits.
+ */
+#define PT_FLAGS_COMPAT 0x10000 /* process is an -m32 compat process */
+
+#endif /* _UAPI_ASM_TILE_PTRACE_H */
diff --git a/arch/tile/include/asm/hw_irq.h b/arch/tile/include/uapi/asm/setup.h
index 4fac5fbf333..e6f7da265ac 100644
--- a/arch/tile/include/asm/hw_irq.h
+++ b/arch/tile/include/uapi/asm/setup.h
@@ -12,7 +12,10 @@
* more details.
*/
-#ifndef _ASM_TILE_HW_IRQ_H
-#define _ASM_TILE_HW_IRQ_H
+#ifndef _UAPI_ASM_TILE_SETUP_H
+#define _UAPI_ASM_TILE_SETUP_H
-#endif /* _ASM_TILE_HW_IRQ_H */
+#define COMMAND_LINE_SIZE 2048
+
+
+#endif /* _UAPI_ASM_TILE_SETUP_H */
diff --git a/arch/tile/include/asm/sigcontext.h b/arch/tile/include/uapi/asm/sigcontext.h
index 6348e59d372..6348e59d372 100644
--- a/arch/tile/include/asm/sigcontext.h
+++ b/arch/tile/include/uapi/asm/sigcontext.h
diff --git a/arch/tile/include/asm/siginfo.h b/arch/tile/include/uapi/asm/siginfo.h
index 56d661bb010..56d661bb010 100644
--- a/arch/tile/include/asm/siginfo.h
+++ b/arch/tile/include/uapi/asm/siginfo.h
diff --git a/arch/tile/include/uapi/asm/signal.h b/arch/tile/include/uapi/asm/signal.h
new file mode 100644
index 00000000000..ef0d32d84a4
--- /dev/null
+++ b/arch/tile/include/uapi/asm/signal.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _UAPI_ASM_TILE_SIGNAL_H
+#define _UAPI_ASM_TILE_SIGNAL_H
+
+/* Do not notify a ptracer when this signal is handled. */
+#define SA_NOPTRACE 0x02000000u
+
+/* Used in earlier Tilera releases, so keeping for binary compatibility. */
+#define SA_RESTORER 0x04000000u
+
+#include <asm-generic/signal.h>
+
+
+#endif /* _UAPI_ASM_TILE_SIGNAL_H */
diff --git a/arch/tile/include/asm/stat.h b/arch/tile/include/uapi/asm/stat.h
index c0db34d56be..c0db34d56be 100644
--- a/arch/tile/include/asm/stat.h
+++ b/arch/tile/include/uapi/asm/stat.h
diff --git a/arch/tile/include/asm/swab.h b/arch/tile/include/uapi/asm/swab.h
index 7c37b38f6c8..7c37b38f6c8 100644
--- a/arch/tile/include/asm/swab.h
+++ b/arch/tile/include/uapi/asm/swab.h
diff --git a/arch/tile/include/uapi/asm/unistd.h b/arch/tile/include/uapi/asm/unistd.h
new file mode 100644
index 00000000000..3866397aaf5
--- /dev/null
+++ b/arch/tile/include/uapi/asm/unistd.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#if !defined(__LP64__) || defined(__SYSCALL_COMPAT)
+/* Use the flavor of this syscall that matches the 32-bit API better. */
+#define __ARCH_WANT_SYNC_FILE_RANGE2
+#endif
+
+/* Use the standard ABI for syscalls. */
+#include <asm-generic/unistd.h>
+
+#define NR_syscalls __NR_syscalls
+
+/* Additional Tilera-specific syscalls. */
+#define __NR_cacheflush (__NR_arch_specific_syscall + 1)
+__SYSCALL(__NR_cacheflush, sys_cacheflush)
+
+#ifndef __tilegx__
+/* "Fast" syscalls provide atomic support for 32-bit chips. */
+#define __NR_FAST_cmpxchg -1
+#define __NR_FAST_atomic_update -2
+#define __NR_FAST_cmpxchg64 -3
+#define __NR_cmpxchg_badaddr (__NR_arch_specific_syscall + 0)
+__SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr)
+#endif
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index b4dbc057baa..21f77bf68c6 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -3,16 +3,34 @@
#
extra-y := vmlinux.lds head_$(BITS).o
-obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \
+obj-y := backtrace.o entry.o hvglue.o irq.o messaging.o \
pci-dma.o proc.o process.o ptrace.o reboot.o \
- setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \
+ setup.o signal.o single_step.o stack.o sys.o \
+ sysfs.o time.o traps.o unaligned.o vdso.o \
intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_early_printk.o = -pg
+endif
+
obj-$(CONFIG_HARDWALL) += hardwall.o
-obj-$(CONFIG_TILEGX) += futex_64.o
obj-$(CONFIG_COMPAT) += compat.o compat_signal.o
obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel_$(BITS).o
+ifdef CONFIG_TILEGX
+obj-$(CONFIG_PCI) += pci_gx.o
+else
obj-$(CONFIG_PCI) += pci.o
+endif
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_USE_PMC) += pmc.o
+obj-$(CONFIG_TILE_USB) += usb.o
+obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o
+obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o mcount_64.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_KGDB) += kgdb.o
+
+obj-y += vdso/
diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c
index 01ddf19cc36..375e7c321ee 100644
--- a/arch/tile/kernel/asm-offsets.c
+++ b/arch/tile/kernel/asm-offsets.c
@@ -14,13 +14,6 @@
* Generates definitions from c-type structures used by assembly sources.
*/
-#include <linux/kbuild.h>
-#include <linux/thread_info.h>
-#include <linux/sched.h>
-#include <linux/hardirq.h>
-#include <linux/ptrace.h>
-#include <hv/hypervisor.h>
-
/* Check for compatible compiler early in the build. */
#ifdef CONFIG_TILEGX
# ifndef __tilegx__
@@ -31,46 +24,61 @@
# endif
#else
# ifdef __tilegx__
-# error Can not build TILEPro/TILE64 configurations with tilegx compiler
+# error Can not build TILEPro configurations with tilegx compiler
# endif
#endif
+#include <linux/kbuild.h>
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h>
+#include <linux/ptrace.h>
+#include <hv/hypervisor.h>
+
void foo(void)
{
- DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET,
offsetof(struct single_step_state, buffer));
- DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET,
offsetof(struct single_step_state, flags));
- DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET,
offsetof(struct single_step_state, orig_pc));
- DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET,
offsetof(struct single_step_state, next_pc));
- DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET,
offsetof(struct single_step_state, branch_next_pc));
- DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET,
offsetof(struct single_step_state, update_value));
- DEFINE(THREAD_INFO_TASK_OFFSET, \
+ DEFINE(THREAD_INFO_TASK_OFFSET,
offsetof(struct thread_info, task));
- DEFINE(THREAD_INFO_FLAGS_OFFSET, \
+ DEFINE(THREAD_INFO_FLAGS_OFFSET,
offsetof(struct thread_info, flags));
- DEFINE(THREAD_INFO_STATUS_OFFSET, \
+ DEFINE(THREAD_INFO_STATUS_OFFSET,
offsetof(struct thread_info, status));
- DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, \
+ DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET,
offsetof(struct thread_info, homecache_cpu));
- DEFINE(THREAD_INFO_STEP_STATE_OFFSET, \
+ DEFINE(THREAD_INFO_PREEMPT_COUNT_OFFSET,
+ offsetof(struct thread_info, preempt_count));
+ DEFINE(THREAD_INFO_STEP_STATE_OFFSET,
offsetof(struct thread_info, step_state));
+#ifdef __tilegx__
+ DEFINE(THREAD_INFO_UNALIGN_JIT_BASE_OFFSET,
+ offsetof(struct thread_info, unalign_jit_base));
+ DEFINE(THREAD_INFO_UNALIGN_JIT_TMP_OFFSET,
+ offsetof(struct thread_info, unalign_jit_tmp));
+#endif
DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET,
offsetof(struct task_struct, thread.ksp));
DEFINE(TASK_STRUCT_THREAD_PC_OFFSET,
offsetof(struct task_struct, thread.pc));
- DEFINE(HV_TOPOLOGY_WIDTH_OFFSET, \
+ DEFINE(HV_TOPOLOGY_WIDTH_OFFSET,
offsetof(HV_Topology, width));
- DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET, \
+ DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET,
offsetof(HV_Topology, height));
- DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET, \
+ DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET,
offsetof(irq_cpustat_t, irq_syscall_count));
}
diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c
index 9092ce8aa6b..f8b74ca83b9 100644
--- a/arch/tile/kernel/backtrace.c
+++ b/arch/tile/kernel/backtrace.c
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
+#include <asm/byteorder.h>
#include <asm/backtrace.h>
#include <asm/tile-desc.h>
#include <arch/abi.h>
@@ -336,8 +337,12 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location,
bytes_to_prefetch / sizeof(tile_bundle_bits);
}
- /* Decode the next bundle. */
- bundle.bits = prefetched_bundles[next_bundle++];
+ /*
+ * Decode the next bundle.
+ * TILE always stores instruction bundles in little-endian
+ * mode, even when the chip is running in big-endian mode.
+ */
+ bundle.bits = le64_to_cpu(prefetched_bundles[next_bundle++]);
bundle.num_insns =
parse_insn_tile(bundle.bits, pc, bundle.insns);
num_info_ops = bt_get_info_ops(&bundle, info_operands);
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index bf5e9d70266..49120843ff9 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -16,7 +16,6 @@
#define __SYSCALL_COMPAT
#include <linux/compat.h>
-#include <linux/msg.h>
#include <linux/syscalls.h>
#include <linux/kdev_t.h>
#include <linux/fs.h>
@@ -33,121 +32,69 @@
* adapt the usual convention.
*/
-long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high)
+COMPAT_SYSCALL_DEFINE4(truncate64, char __user *, filename, u32, dummy,
+ u32, low, u32, high)
{
return sys_truncate(filename, ((loff_t)high << 32) | low);
}
-long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high)
+COMPAT_SYSCALL_DEFINE4(ftruncate64, unsigned int, fd, u32, dummy,
+ u32, low, u32, high)
{
return sys_ftruncate(fd, ((loff_t)high << 32) | low);
}
-long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count,
- u32 dummy, u32 low, u32 high)
+COMPAT_SYSCALL_DEFINE6(pread64, unsigned int, fd, char __user *, ubuf,
+ size_t, count, u32, dummy, u32, low, u32, high)
{
return sys_pread64(fd, ubuf, count, ((loff_t)high << 32) | low);
}
-long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count,
- u32 dummy, u32 low, u32 high)
+COMPAT_SYSCALL_DEFINE6(pwrite64, unsigned int, fd, char __user *, ubuf,
+ size_t, count, u32, dummy, u32, low, u32, high)
{
return sys_pwrite64(fd, ubuf, count, ((loff_t)high << 32) | low);
}
-long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len)
-{
- return sys_lookup_dcookie(((loff_t)high << 32) | low, buf, len);
-}
-
-long compat_sys_sync_file_range2(int fd, unsigned int flags,
- u32 offset_lo, u32 offset_hi,
- u32 nbytes_lo, u32 nbytes_hi)
+COMPAT_SYSCALL_DEFINE6(sync_file_range2, int, fd, unsigned int, flags,
+ u32, offset_lo, u32, offset_hi,
+ u32, nbytes_lo, u32, nbytes_hi)
{
return sys_sync_file_range(fd, ((loff_t)offset_hi << 32) | offset_lo,
((loff_t)nbytes_hi << 32) | nbytes_lo,
flags);
}
-long compat_sys_fallocate(int fd, int mode,
- u32 offset_lo, u32 offset_hi,
- u32 len_lo, u32 len_hi)
+COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode,
+ u32, offset_lo, u32, offset_hi,
+ u32, len_lo, u32, len_hi)
{
return sys_fallocate(fd, mode, ((loff_t)offset_hi << 32) | offset_lo,
((loff_t)len_hi << 32) | len_lo);
}
-
-
-long compat_sys_sched_rr_get_interval(compat_pid_t pid,
- struct compat_timespec __user *interval)
-{
- struct timespec t;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
- ret = sys_sched_rr_get_interval(pid,
- (struct timespec __force __user *)&t);
- set_fs(old_fs);
- if (put_compat_timespec(&t, interval))
- return -EFAULT;
- return ret;
-}
-
/*
- * The usual compat_sys_msgsnd() and _msgrcv() seem to be assuming
- * some different calling convention than our normal 32-bit tile code.
+ * Avoid bug in generic sys_llseek() that specifies offset_high and
+ * offset_low as "unsigned long", thus making it possible to pass
+ * a sign-extended high 32 bits in offset_low.
*/
-
-/* Already defined in ipc/compat.c, but we need it here. */
-struct compat_msgbuf {
- compat_long_t mtype;
- char mtext[1];
-};
-
-long tile_compat_sys_msgsnd(int msqid,
- struct compat_msgbuf __user *msgp,
- size_t msgsz, int msgflg)
+COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high,
+ unsigned int, offset_low, loff_t __user *, result,
+ unsigned int, origin)
{
- compat_long_t mtype;
-
- if (get_user(mtype, &msgp->mtype))
- return -EFAULT;
- return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
-}
-
-long tile_compat_sys_msgrcv(int msqid,
- struct compat_msgbuf __user *msgp,
- size_t msgsz, long msgtyp, int msgflg)
-{
- long err, mtype;
-
- err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
- if (err < 0)
- goto out;
-
- if (put_user(mtype, &msgp->mtype))
- err = -EFAULT;
- out:
- return err;
+ return sys_llseek(fd, offset_high, offset_low, result, origin);
}
/* Provide the compat syscall number to call mapping. */
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = (call),
-/* The generic versions of these don't work for Tile. */
-#define compat_sys_msgrcv tile_compat_sys_msgrcv
-#define compat_sys_msgsnd tile_compat_sys_msgsnd
-
/* See comments in sys.c */
#define compat_sys_fadvise64_64 sys32_fadvise64_64
#define compat_sys_readahead sys32_readahead
+#define sys_llseek compat_sys_llseek
-/* Call the trampolines to manage pt_regs where necessary. */
-#define compat_sys_execve _compat_sys_execve
-#define compat_sys_sigaltstack _compat_sys_sigaltstack
+/* Call the assembly trampolines where necessary. */
#define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn
#define sys_clone _sys_clone
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index 77763ccd5a7..19c04b5ce40 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -32,21 +32,9 @@
#include <asm/ucontext.h>
#include <asm/sigframe.h>
#include <asm/syscalls.h>
+#include <asm/vdso.h>
#include <arch/interrupts.h>
-struct compat_sigaction {
- compat_uptr_t sa_handler;
- compat_ulong_t sa_flags;
- compat_uptr_t sa_restorer;
- sigset_t sa_mask __packed;
-};
-
-struct compat_sigaltstack {
- compat_uptr_t ss_sp;
- int ss_flags;
- compat_size_t ss_size;
-};
-
struct compat_ucontext {
compat_ulong_t uc_flags;
compat_uptr_t uc_link;
@@ -55,129 +43,13 @@ struct compat_ucontext {
sigset_t uc_sigmask; /* mask last for extensibility */
};
-#define COMPAT_SI_PAD_SIZE ((SI_MAX_SIZE - 3 * sizeof(int)) / sizeof(int))
-
-struct compat_siginfo {
- int si_signo;
- int si_errno;
- int si_code;
-
- union {
- int _pad[COMPAT_SI_PAD_SIZE];
-
- /* kill() */
- struct {
- unsigned int _pid; /* sender's pid */
- unsigned int _uid; /* sender's uid */
- } _kill;
-
- /* POSIX.1b timers */
- struct {
- compat_timer_t _tid; /* timer id */
- int _overrun; /* overrun count */
- compat_sigval_t _sigval; /* same as below */
- int _sys_private; /* not to be passed to user */
- int _overrun_incr; /* amount to add to overrun */
- } _timer;
-
- /* POSIX.1b signals */
- struct {
- unsigned int _pid; /* sender's pid */
- unsigned int _uid; /* sender's uid */
- compat_sigval_t _sigval;
- } _rt;
-
- /* SIGCHLD */
- struct {
- unsigned int _pid; /* which child */
- unsigned int _uid; /* sender's uid */
- int _status; /* exit code */
- compat_clock_t _utime;
- compat_clock_t _stime;
- } _sigchld;
-
- /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
- struct {
- unsigned int _addr; /* faulting insn/memory ref. */
-#ifdef __ARCH_SI_TRAPNO
- int _trapno; /* TRAP # which caused the signal */
-#endif
- } _sigfault;
-
- /* SIGPOLL */
- struct {
- int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
- int _fd;
- } _sigpoll;
- } _sifields;
-};
-
struct compat_rt_sigframe {
unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */
struct compat_siginfo info;
struct compat_ucontext uc;
};
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
- struct compat_sigaction __user *oact,
- size_t sigsetsize)
-{
- struct k_sigaction new_sa, old_sa;
- int ret = -EINVAL;
-
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(sigset_t))
- goto out;
-
- if (act) {
- compat_uptr_t handler, restorer;
-
- if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
- __get_user(handler, &act->sa_handler) ||
- __get_user(new_sa.sa.sa_flags, &act->sa_flags) ||
- __get_user(restorer, &act->sa_restorer) ||
- __copy_from_user(&new_sa.sa.sa_mask, &act->sa_mask,
- sizeof(sigset_t)))
- return -EFAULT;
- new_sa.sa.sa_handler = compat_ptr(handler);
- new_sa.sa.sa_restorer = compat_ptr(restorer);
- }
-
- ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
-
- if (!ret && oact) {
- if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
- __put_user(ptr_to_compat(old_sa.sa.sa_handler),
- &oact->sa_handler) ||
- __put_user(ptr_to_compat(old_sa.sa.sa_restorer),
- &oact->sa_restorer) ||
- __put_user(old_sa.sa.sa_flags, &oact->sa_flags) ||
- __copy_to_user(&oact->sa_mask, &old_sa.sa.sa_mask,
- sizeof(sigset_t)))
- return -EFAULT;
- }
-out:
- return ret;
-}
-
-long compat_sys_rt_sigqueueinfo(int pid, int sig,
- struct compat_siginfo __user *uinfo)
-{
- siginfo_t info;
- int ret;
- mm_segment_t old_fs = get_fs();
-
- if (copy_siginfo_from_user32(&info, uinfo))
- return -EFAULT;
- set_fs(KERNEL_DS);
- ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force __user *)&info);
- set_fs(old_fs);
- return ret;
-}
-
-int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from)
+int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from)
{
int err;
@@ -255,44 +127,10 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
return err;
}
-long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
- struct compat_sigaltstack __user *uoss_ptr,
- struct pt_regs *regs)
-{
- stack_t uss, uoss;
- int ret;
- mm_segment_t seg;
-
- if (uss_ptr) {
- u32 ptr;
-
- memset(&uss, 0, sizeof(stack_t));
- if (!access_ok(VERIFY_READ, uss_ptr, sizeof(*uss_ptr)) ||
- __get_user(ptr, &uss_ptr->ss_sp) ||
- __get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
- __get_user(uss.ss_size, &uss_ptr->ss_size))
- return -EFAULT;
- uss.ss_sp = compat_ptr(ptr);
- }
- seg = get_fs();
- set_fs(KERNEL_DS);
- ret = do_sigaltstack(uss_ptr ? (stack_t __user __force *)&uss : NULL,
- (stack_t __user __force *)&uoss,
- (unsigned long)compat_ptr(regs->sp));
- set_fs(seg);
- if (ret >= 0 && uoss_ptr) {
- if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(*uoss_ptr)) ||
- __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
- __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
- __put_user(uoss.ss_size, &uoss_ptr->ss_size))
- ret = -EFAULT;
- }
- return ret;
-}
-
/* The assembly shim for this function arranges to ignore the return value. */
-long compat_sys_rt_sigreturn(struct pt_regs *regs)
+long compat_sys_rt_sigreturn(void)
{
+ struct pt_regs *regs = current_pt_regs();
struct compat_rt_sigframe __user *frame =
(struct compat_rt_sigframe __user *) compat_ptr(regs->sp);
sigset_t set;
@@ -302,13 +140,12 @@ long compat_sys_rt_sigreturn(struct pt_regs *regs)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
- if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0)
+ if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
return 0;
@@ -385,17 +222,13 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __clear_user(&frame->save_area, sizeof(frame->save_area));
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(ptr_to_compat((void *)(current->sas_ss_sp)),
- &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
err |= setup_sigcontext(&frame->uc.uc_mcontext, regs);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
goto give_sigsegv;
- restorer = VDSO_BASE;
+ restorer = VDSO_SYM(&__vdso_rt_sigreturn);
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ptr_to_compat_reg(ka->sa.sa_restorer);
@@ -403,28 +236,17 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
* Set up registers for signal handler.
* Registers that we don't modify keep the value they had from
* user-space at the time we took the signal.
+ * We always pass siginfo and mcontext, regardless of SA_SIGINFO,
+ * since some things rely on this (e.g. glibc's debug/segfault.c).
*/
regs->pc = ptr_to_compat_reg(ka->sa.sa_handler);
regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */
regs->sp = ptr_to_compat_reg(frame);
regs->lr = restorer;
regs->regs[0] = (unsigned long) usig;
-
- if (ka->sa.sa_flags & SA_SIGINFO) {
- /* Need extra arguments, so mark to restore caller-saves. */
- regs->regs[1] = ptr_to_compat_reg(&frame->info);
- regs->regs[2] = ptr_to_compat_reg(&frame->uc);
- regs->flags |= PT_FLAGS_CALLER_SAVES;
- }
-
- /*
- * Notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
- */
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
+ regs->regs[1] = ptr_to_compat_reg(&frame->info);
+ regs->regs[2] = ptr_to_compat_reg(&frame->uc);
+ regs->flags |= PT_FLAGS_CALLER_SAVES;
return 0;
give_sigsegv:
diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c
index 493a0e66d91..b608e00e7f6 100644
--- a/arch/tile/kernel/early_printk.c
+++ b/arch/tile/kernel/early_printk.c
@@ -16,41 +16,31 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/string.h>
+#include <linux/irqflags.h>
+#include <linux/printk.h>
#include <asm/setup.h>
#include <hv/hypervisor.h>
static void early_hv_write(struct console *con, const char *s, unsigned n)
{
- hv_console_write((HV_VirtAddr) s, n);
+ tile_console_write(s, n);
+
+ /*
+ * Convert NL to NLCR (close enough to CRNL) during early boot.
+ * We assume newlines are at the ends of strings, which turns out
+ * to be good enough for early boot console output.
+ */
+ if (n && s[n-1] == '\n')
+ tile_console_write("\r", 1);
}
static struct console early_hv_console = {
.name = "earlyhv",
.write = early_hv_write,
- .flags = CON_PRINTBUFFER,
+ .flags = CON_PRINTBUFFER | CON_BOOT,
.index = -1,
};
-/* Direct interface for emergencies */
-static struct console *early_console = &early_hv_console;
-static int early_console_initialized;
-static int early_console_complete;
-
-static void early_vprintk(const char *fmt, va_list ap)
-{
- char buf[512];
- int n = vscnprintf(buf, sizeof(buf), fmt, ap);
- early_console->write(early_console, buf, n);
-}
-
-void early_printk(const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- early_vprintk(fmt, ap);
- va_end(ap);
-}
-
void early_panic(const char *fmt, ...)
{
va_list ap;
@@ -58,52 +48,21 @@ void early_panic(const char *fmt, ...)
va_start(ap, fmt);
early_printk("Kernel panic - not syncing: ");
early_vprintk(fmt, ap);
- early_console->write(early_console, "\n", 1);
+ early_printk("\n");
va_end(ap);
dump_stack();
hv_halt();
}
-static int __initdata keep_early;
-
static int __init setup_early_printk(char *str)
{
- if (early_console_initialized)
+ if (early_console)
return 1;
- if (str != NULL && strncmp(str, "keep", 4) == 0)
- keep_early = 1;
-
early_console = &early_hv_console;
- early_console_initialized = 1;
register_console(early_console);
return 0;
}
-void __init disable_early_printk(void)
-{
- early_console_complete = 1;
- if (!early_console_initialized || !early_console)
- return;
- if (!keep_early) {
- early_printk("disabling early console\n");
- unregister_console(early_console);
- early_console_initialized = 0;
- } else {
- early_printk("keeping early console\n");
- }
-}
-
-void warn_early_printk(void)
-{
- if (early_console_complete || early_console_initialized)
- return;
- early_printk("\
-Machine shutting down before console output is fully initialized.\n\
-You may wish to reboot and add the option 'earlyprintk' to your\n\
-boot command line to see any diagnostic early console output.\n\
-");
-}
-
early_param("earlyprintk", setup_early_printk);
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
index 431e9ae6048..3d9175992a2 100644
--- a/arch/tile/kernel/entry.S
+++ b/arch/tile/kernel/entry.S
@@ -27,33 +27,6 @@ STD_ENTRY(current_text_addr)
{ move r0, lr; jrp lr }
STD_ENDPROC(current_text_addr)
-/*
- * Implement execve(). The i386 code has a note that forking from kernel
- * space results in no copy on write until the execve, so we should be
- * careful not to write to the stack here.
- */
-STD_ENTRY(kernel_execve)
- moveli TREG_SYSCALL_NR_NAME, __NR_execve
- swint1
- jrp lr
- STD_ENDPROC(kernel_execve)
-
-/*
- * We don't run this function directly, but instead copy it to a page
- * we map into every user process. See vdso_setup().
- *
- * Note that libc has a copy of this function that it uses to compare
- * against the PC when a stack backtrace ends, so if this code is
- * changed, the libc implementation(s) should also be updated.
- */
- .pushsection .data
-ENTRY(__rt_sigreturn)
- moveli TREG_SYSCALL_NR_NAME,__NR_rt_sigreturn
- swint1
- ENDPROC(__rt_sigreturn)
- ENTRY(__rt_sigreturn_end)
- .popsection
-
STD_ENTRY(dump_stack)
{ move r2, lr; lnk r1 }
{ move r4, r52; addli r1, r1, dump_stack - . }
@@ -68,23 +41,10 @@ STD_ENTRY(KBacktraceIterator_init_current)
jrp lr /* keep backtracer happy */
STD_ENDPROC(KBacktraceIterator_init_current)
-/*
- * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then
- * free the old stack (passed in r0) and re-invoke cpu_idle().
- * We update sp and ksp0 simultaneously to avoid backtracer warnings.
- */
-STD_ENTRY(cpu_idle_on_new_stack)
- {
- move sp, r1
- mtspr SPR_SYSTEM_SAVE_K_0, r2
- }
- jal free_thread_info
- j cpu_idle
- STD_ENDPROC(cpu_idle_on_new_stack)
-
/* Loop forever on a nap during SMP boot. */
STD_ENTRY(smp_nap)
nap
+ nop /* avoid provoking the icache prefetch with a jump */
j smp_nap /* we are not architecturally guaranteed not to exit nap */
jrp lr /* clue in the backtracer */
STD_ENDPROC(smp_nap)
@@ -99,11 +59,13 @@ STD_ENTRY(smp_nap)
*/
STD_ENTRY(_cpu_idle)
movei r1, 1
+ IRQ_ENABLE_LOAD(r2, r3)
mtspr INTERRUPT_CRITICAL_SECTION, r1
- IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */
+ IRQ_ENABLE_APPLY(r2, r3) /* unmask, but still with ICS set */
mtspr INTERRUPT_CRITICAL_SECTION, zero
.global _cpu_idle_nap
_cpu_idle_nap:
nap
+ nop /* avoid provoking the icache prefetch with a jump */
jrp lr
STD_ENDPROC(_cpu_idle)
diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c
new file mode 100644
index 00000000000..8d52d83cc51
--- /dev/null
+++ b/arch/tile/kernel/ftrace.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE-Gx specific ftrace support
+ */
+
+#include <linux/ftrace.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+#include <asm/sections.h>
+
+#include <arch/opcode.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+static inline tilegx_bundle_bits NOP(void)
+{
+ return create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) |
+ create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) |
+ create_Opcode_X0(RRR_0_OPCODE_X0) |
+ create_UnaryOpcodeExtension_X1(NOP_UNARY_OPCODE_X1) |
+ create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) |
+ create_Opcode_X1(RRR_0_OPCODE_X1);
+}
+
+static int machine_stopped __read_mostly;
+
+int ftrace_arch_code_modify_prepare(void)
+{
+ machine_stopped = 1;
+ return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+ flush_icache_range(0, CHIP_L1I_CACHE_SIZE());
+ machine_stopped = 0;
+ return 0;
+}
+
+/*
+ * Put { move r10, lr; jal ftrace_caller } in a bundle, this lets dynamic
+ * tracer just add one cycle overhead to every kernel function when disabled.
+ */
+static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
+ bool link)
+{
+ tilegx_bundle_bits opcode_x0, opcode_x1;
+ long pcrel_by_instr = (addr - pc) >> TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES;
+
+ if (link) {
+ /* opcode: jal addr */
+ opcode_x1 =
+ create_Opcode_X1(JUMP_OPCODE_X1) |
+ create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) |
+ create_JumpOff_X1(pcrel_by_instr);
+ } else {
+ /* opcode: j addr */
+ opcode_x1 =
+ create_Opcode_X1(JUMP_OPCODE_X1) |
+ create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) |
+ create_JumpOff_X1(pcrel_by_instr);
+ }
+
+ if (addr == FTRACE_ADDR) {
+ /* opcode: or r10, lr, zero */
+ opcode_x0 =
+ create_Dest_X0(10) |
+ create_SrcA_X0(TREG_LR) |
+ create_SrcB_X0(TREG_ZERO) |
+ create_RRROpcodeExtension_X0(OR_RRR_0_OPCODE_X0) |
+ create_Opcode_X0(RRR_0_OPCODE_X0);
+ } else {
+ /* opcode: fnop */
+ opcode_x0 =
+ create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) |
+ create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) |
+ create_Opcode_X0(RRR_0_OPCODE_X0);
+ }
+
+ return opcode_x1 | opcode_x0;
+}
+
+static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
+{
+ return NOP();
+}
+
+static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
+{
+ return ftrace_gen_branch(pc, addr, true);
+}
+
+static int ftrace_modify_code(unsigned long pc, unsigned long old,
+ unsigned long new)
+{
+ unsigned long pc_wr;
+
+ /* Check if the address is in kernel text space and module space. */
+ if (!kernel_text_address(pc))
+ return -EINVAL;
+
+ /* Operate on writable kernel text mapping. */
+ pc_wr = pc - MEM_SV_START + PAGE_OFFSET;
+
+ if (probe_kernel_write((void *)pc_wr, &new, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ smp_wmb();
+
+ if (!machine_stopped && num_online_cpus() > 1)
+ flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+
+ return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long pc, old;
+ unsigned long new;
+ int ret;
+
+ pc = (unsigned long)&ftrace_call;
+ memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(pc, (unsigned long)func);
+
+ ret = ftrace_modify_code(pc, old, new);
+
+ return ret;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long new, old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_nop_replace(rec);
+ new = ftrace_call_replace(ip, addr);
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned long old;
+ unsigned long new;
+ int ret;
+
+ old = ftrace_call_replace(ip, addr);
+ new = ftrace_nop_replace(rec);
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long frame_pointer)
+{
+ unsigned long return_hooker = (unsigned long) &return_to_handler;
+ struct ftrace_graph_ent trace;
+ unsigned long old;
+ int err;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ old = *parent;
+ *parent = return_hooker;
+
+ err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+ frame_pointer);
+ if (err == -EBUSY) {
+ *parent = old;
+ return;
+ }
+
+ trace.func = self_addr;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ current->curr_ret_stack--;
+ *parent = old;
+ }
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern unsigned long ftrace_graph_call;
+
+static int __ftrace_modify_caller(unsigned long *callsite,
+ void (*func) (void), bool enable)
+{
+ unsigned long caller_fn = (unsigned long) func;
+ unsigned long pc = (unsigned long) callsite;
+ unsigned long branch = ftrace_gen_branch(pc, caller_fn, false);
+ unsigned long nop = NOP();
+ unsigned long old = enable ? nop : branch;
+ unsigned long new = enable ? branch : nop;
+
+ return ftrace_modify_code(pc, old, new);
+}
+
+static int ftrace_modify_graph_caller(bool enable)
+{
+ int ret;
+
+ ret = __ftrace_modify_caller(&ftrace_graph_call,
+ ftrace_graph_caller,
+ enable);
+
+ return ret;
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/tile/kernel/futex_64.S b/arch/tile/kernel/futex_64.S
deleted file mode 100644
index f465d1eda20..00000000000
--- a/arch/tile/kernel/futex_64.S
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- *
- * Atomically access user memory, but use MMU to avoid propagating
- * kernel exceptions.
- */
-
-#include <linux/linkage.h>
-#include <asm/errno.h>
-#include <asm/futex.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-
-/*
- * Provide a set of atomic memory operations supporting <asm/futex.h>.
- *
- * r0: user address to manipulate
- * r1: new value to write, or for cmpxchg, old value to compare against
- * r2: (cmpxchg only) new value to write
- *
- * Return __get_user struct, r0 with value, r1 with error.
- */
-#define FUTEX_OP(name, ...) \
-STD_ENTRY(futex_##name) \
- __VA_ARGS__; \
- { \
- move r1, zero; \
- jrp lr \
- }; \
- STD_ENDPROC(futex_##name); \
- .pushsection __ex_table,"a"; \
- .quad 1b, get_user_fault; \
- .popsection
-
- .pushsection .fixup,"ax"
-get_user_fault:
- { movei r1, -EFAULT; jrp lr }
- ENDPROC(get_user_fault)
- .popsection
-
-FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2)
-FUTEX_OP(set, 1: exch4 r0, r0, r1)
-FUTEX_OP(add, 1: fetchadd4 r0, r0, r1)
-FUTEX_OP(or, 1: fetchor4 r0, r0, r1)
-FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1)
diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c
index 8c41891aab3..531f4c36535 100644
--- a/arch/tile/kernel/hardwall.c
+++ b/arch/tile/kernel/hardwall.c
@@ -33,59 +33,157 @@
/*
- * This data structure tracks the rectangle data, etc., associated
- * one-to-one with a "struct file *" from opening HARDWALL_FILE.
+ * Implement a per-cpu "hardwall" resource class such as UDN or IPI.
+ * We use "hardwall" nomenclature throughout for historical reasons.
+ * The lock here controls access to the list data structure as well as
+ * to the items on the list.
+ */
+struct hardwall_type {
+ int index;
+ int is_xdn;
+ int is_idn;
+ int disabled;
+ const char *name;
+ struct list_head list;
+ spinlock_t lock;
+ struct proc_dir_entry *proc_dir;
+};
+
+enum hardwall_index {
+ HARDWALL_UDN = 0,
+#ifndef __tilepro__
+ HARDWALL_IDN = 1,
+ HARDWALL_IPI = 2,
+#endif
+ _HARDWALL_TYPES
+};
+
+static struct hardwall_type hardwall_types[] = {
+ { /* user-space access to UDN */
+ 0,
+ 1,
+ 0,
+ 0,
+ "udn",
+ LIST_HEAD_INIT(hardwall_types[HARDWALL_UDN].list),
+ __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_UDN].lock),
+ NULL
+ },
+#ifndef __tilepro__
+ { /* user-space access to IDN */
+ 1,
+ 1,
+ 1,
+ 1, /* disabled pending hypervisor support */
+ "idn",
+ LIST_HEAD_INIT(hardwall_types[HARDWALL_IDN].list),
+ __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_IDN].lock),
+ NULL
+ },
+ { /* access to user-space IPI */
+ 2,
+ 0,
+ 0,
+ 0,
+ "ipi",
+ LIST_HEAD_INIT(hardwall_types[HARDWALL_IPI].list),
+ __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_IPI].lock),
+ NULL
+ },
+#endif
+};
+
+/*
+ * This data structure tracks the cpu data, etc., associated
+ * one-to-one with a "struct file *" from opening a hardwall device file.
* Note that the file's private data points back to this structure.
*/
struct hardwall_info {
- struct list_head list; /* "rectangles" list */
+ struct list_head list; /* for hardwall_types.list */
struct list_head task_head; /* head of tasks in this hardwall */
- struct cpumask cpumask; /* cpus in the rectangle */
+ struct hardwall_type *type; /* type of this resource */
+ struct cpumask cpumask; /* cpus reserved */
+ int id; /* integer id for this hardwall */
+ int teardown_in_progress; /* are we tearing this one down? */
+
+ /* Remaining fields only valid for user-network resources. */
int ulhc_x; /* upper left hand corner x coord */
int ulhc_y; /* upper left hand corner y coord */
int width; /* rectangle width */
int height; /* rectangle height */
- int id; /* integer id for this hardwall */
- int teardown_in_progress; /* are we tearing this one down? */
+#if CHIP_HAS_REV1_XDN()
+ atomic_t xdn_pending_count; /* cores in phase 1 of drain */
+#endif
};
-/* Currently allocated hardwall rectangles */
-static LIST_HEAD(rectangles);
/* /proc/tile/hardwall */
static struct proc_dir_entry *hardwall_proc_dir;
/* Functions to manage files in /proc/tile/hardwall. */
-static void hardwall_add_proc(struct hardwall_info *rect);
-static void hardwall_remove_proc(struct hardwall_info *rect);
-
-/*
- * Guard changes to the hardwall data structures.
- * This could be finer grained (e.g. one lock for the list of hardwall
- * rectangles, then separate embedded locks for each one's list of tasks),
- * but there are subtle correctness issues when trying to start with
- * a task's "hardwall" pointer and lock the correct rectangle's embedded
- * lock in the presence of a simultaneous deactivation, so it seems
- * easier to have a single lock, given that none of these data
- * structures are touched very frequently during normal operation.
- */
-static DEFINE_SPINLOCK(hardwall_lock);
+static void hardwall_add_proc(struct hardwall_info *);
+static void hardwall_remove_proc(struct hardwall_info *);
/* Allow disabling UDN access. */
-static int udn_disabled;
static int __init noudn(char *str)
{
pr_info("User-space UDN access is disabled\n");
- udn_disabled = 1;
+ hardwall_types[HARDWALL_UDN].disabled = 1;
return 0;
}
early_param("noudn", noudn);
+#ifndef __tilepro__
+/* Allow disabling IDN access. */
+static int __init noidn(char *str)
+{
+ pr_info("User-space IDN access is disabled\n");
+ hardwall_types[HARDWALL_IDN].disabled = 1;
+ return 0;
+}
+early_param("noidn", noidn);
+
+/* Allow disabling IPI access. */
+static int __init noipi(char *str)
+{
+ pr_info("User-space IPI access is disabled\n");
+ hardwall_types[HARDWALL_IPI].disabled = 1;
+ return 0;
+}
+early_param("noipi", noipi);
+#endif
+
/*
- * Low-level primitives
+ * Low-level primitives for UDN/IDN
*/
+#ifdef __tilepro__
+#define mtspr_XDN(hwt, name, val) \
+ do { (void)(hwt); __insn_mtspr(SPR_UDN_##name, (val)); } while (0)
+#define mtspr_MPL_XDN(hwt, name, val) \
+ do { (void)(hwt); __insn_mtspr(SPR_MPL_UDN_##name, (val)); } while (0)
+#define mfspr_XDN(hwt, name) \
+ ((void)(hwt), __insn_mfspr(SPR_UDN_##name))
+#else
+#define mtspr_XDN(hwt, name, val) \
+ do { \
+ if ((hwt)->is_idn) \
+ __insn_mtspr(SPR_IDN_##name, (val)); \
+ else \
+ __insn_mtspr(SPR_UDN_##name, (val)); \
+ } while (0)
+#define mtspr_MPL_XDN(hwt, name, val) \
+ do { \
+ if ((hwt)->is_idn) \
+ __insn_mtspr(SPR_MPL_IDN_##name, (val)); \
+ else \
+ __insn_mtspr(SPR_MPL_UDN_##name, (val)); \
+ } while (0)
+#define mfspr_XDN(hwt, name) \
+ ((hwt)->is_idn ? __insn_mfspr(SPR_IDN_##name) : __insn_mfspr(SPR_UDN_##name))
+#endif
+
/* Set a CPU bit if the CPU is online. */
#define cpu_online_set(cpu, dst) do { \
if (cpu_online(cpu)) \
@@ -101,7 +199,7 @@ static int contains(struct hardwall_info *r, int x, int y)
}
/* Compute the rectangle parameters and validate the cpumask. */
-static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask)
+static int check_rectangle(struct hardwall_info *r, struct cpumask *mask)
{
int x, y, cpu, ulhc, lrhc;
@@ -114,8 +212,6 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask)
r->ulhc_y = cpu_y(ulhc);
r->width = cpu_x(lrhc) - r->ulhc_x + 1;
r->height = cpu_y(lrhc) - r->ulhc_y + 1;
- cpumask_copy(&r->cpumask, mask);
- r->id = ulhc; /* The ulhc cpu id can be the hardwall id. */
/* Width and height must be positive */
if (r->width <= 0 || r->height <= 0)
@@ -128,7 +224,7 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask)
return -EINVAL;
/*
- * Note that offline cpus can't be drained when this UDN
+ * Note that offline cpus can't be drained when this user network
* rectangle eventually closes. We used to detect this
* situation and print a warning, but it annoyed users and
* they ignored it anyway, so now we just return without a
@@ -137,16 +233,6 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask)
return 0;
}
-/* Do the two given rectangles overlap on any cpu? */
-static int overlaps(struct hardwall_info *a, struct hardwall_info *b)
-{
- return a->ulhc_x + a->width > b->ulhc_x && /* A not to the left */
- b->ulhc_x + b->width > a->ulhc_x && /* B not to the left */
- a->ulhc_y + a->height > b->ulhc_y && /* A not above */
- b->ulhc_y + b->height > a->ulhc_y; /* B not above */
-}
-
-
/*
* Hardware management of hardwall setup, teardown, trapping,
* and enabling/disabling PL0 access to the networks.
@@ -157,26 +243,38 @@ enum direction_protect {
N_PROTECT = (1 << 0),
E_PROTECT = (1 << 1),
S_PROTECT = (1 << 2),
- W_PROTECT = (1 << 3)
+ W_PROTECT = (1 << 3),
+ C_PROTECT = (1 << 4),
};
-static void enable_firewall_interrupts(void)
+static inline int xdn_which_interrupt(struct hardwall_type *hwt)
{
- arch_local_irq_unmask_now(INT_UDN_FIREWALL);
+#ifndef __tilepro__
+ if (hwt->is_idn)
+ return INT_IDN_FIREWALL;
+#endif
+ return INT_UDN_FIREWALL;
}
-static void disable_firewall_interrupts(void)
+static void enable_firewall_interrupts(struct hardwall_type *hwt)
{
- arch_local_irq_mask_now(INT_UDN_FIREWALL);
+ arch_local_irq_unmask_now(xdn_which_interrupt(hwt));
+}
+
+static void disable_firewall_interrupts(struct hardwall_type *hwt)
+{
+ arch_local_irq_mask_now(xdn_which_interrupt(hwt));
}
/* Set up hardwall on this cpu based on the passed hardwall_info. */
-static void hardwall_setup_ipi_func(void *info)
+static void hardwall_setup_func(void *info)
{
struct hardwall_info *r = info;
- int cpu = smp_processor_id();
- int x = cpu % smp_width;
- int y = cpu / smp_width;
+ struct hardwall_type *hwt = r->type;
+
+ int cpu = smp_processor_id(); /* on_each_cpu disables preemption */
+ int x = cpu_x(cpu);
+ int y = cpu_y(cpu);
int bits = 0;
if (x == r->ulhc_x)
bits |= W_PROTECT;
@@ -187,13 +285,12 @@ static void hardwall_setup_ipi_func(void *info)
if (y == r->ulhc_y + r->height - 1)
bits |= S_PROTECT;
BUG_ON(bits == 0);
- __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, bits);
- enable_firewall_interrupts();
-
+ mtspr_XDN(hwt, DIRECTION_PROTECT, bits);
+ enable_firewall_interrupts(hwt);
}
/* Set up all cpus on edge of rectangle to enable/disable hardwall SPRs. */
-static void hardwall_setup(struct hardwall_info *r)
+static void hardwall_protect_rectangle(struct hardwall_info *r)
{
int x, y, cpu, delta;
struct cpumask rect_cpus;
@@ -217,37 +314,50 @@ static void hardwall_setup(struct hardwall_info *r)
}
/* Then tell all the cpus to set up their protection SPR */
- on_each_cpu_mask(&rect_cpus, hardwall_setup_ipi_func, r, 1);
+ on_each_cpu_mask(&rect_cpus, hardwall_setup_func, r, 1);
}
+/* Entered from INT_xDN_FIREWALL interrupt vector with irqs disabled. */
void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
{
struct hardwall_info *rect;
+ struct hardwall_type *hwt;
struct task_struct *p;
struct siginfo info;
- int x, y;
int cpu = smp_processor_id();
int found_processes;
- unsigned long flags;
-
struct pt_regs *old_regs = set_irq_regs(regs);
+
irq_enter();
+ /* Figure out which network trapped. */
+ switch (fault_num) {
+#ifndef __tilepro__
+ case INT_IDN_FIREWALL:
+ hwt = &hardwall_types[HARDWALL_IDN];
+ break;
+#endif
+ case INT_UDN_FIREWALL:
+ hwt = &hardwall_types[HARDWALL_UDN];
+ break;
+ default:
+ BUG();
+ }
+ BUG_ON(hwt->disabled);
+
/* This tile trapped a network access; find the rectangle. */
- x = cpu % smp_width;
- y = cpu / smp_width;
- spin_lock_irqsave(&hardwall_lock, flags);
- list_for_each_entry(rect, &rectangles, list) {
- if (contains(rect, x, y))
+ spin_lock(&hwt->lock);
+ list_for_each_entry(rect, &hwt->list, list) {
+ if (cpumask_test_cpu(cpu, &rect->cpumask))
break;
}
/*
* It shouldn't be possible not to find this cpu on the
* rectangle list, since only cpus in rectangles get hardwalled.
- * The hardwall is only removed after the UDN is drained.
+ * The hardwall is only removed after the user network is drained.
*/
- BUG_ON(&rect->list == &rectangles);
+ BUG_ON(&rect->list == &hwt->list);
/*
* If we already started teardown on this hardwall, don't worry;
@@ -255,30 +365,32 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
* to quiesce.
*/
if (rect->teardown_in_progress) {
- pr_notice("cpu %d: detected hardwall violation %#lx"
+ pr_notice("cpu %d: detected %s hardwall violation %#lx"
" while teardown already in progress\n",
- cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT));
+ cpu, hwt->name,
+ (long)mfspr_XDN(hwt, DIRECTION_PROTECT));
goto done;
}
/*
* Kill off any process that is activated in this rectangle.
* We bypass security to deliver the signal, since it must be
- * one of the activated processes that generated the UDN
+ * one of the activated processes that generated the user network
* message that caused this trap, and all the activated
* processes shared a single open file so are pretty tightly
* bound together from a security point of view to begin with.
*/
rect->teardown_in_progress = 1;
wmb(); /* Ensure visibility of rectangle before notifying processes. */
- pr_notice("cpu %d: detected hardwall violation %#lx...\n",
- cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT));
+ pr_notice("cpu %d: detected %s hardwall violation %#lx...\n",
+ cpu, hwt->name, (long)mfspr_XDN(hwt, DIRECTION_PROTECT));
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_HARDWALL;
found_processes = 0;
- list_for_each_entry(p, &rect->task_head, thread.hardwall_list) {
- BUG_ON(p->thread.hardwall != rect);
+ list_for_each_entry(p, &rect->task_head,
+ thread.hardwall[hwt->index].list) {
+ BUG_ON(p->thread.hardwall[hwt->index].info != rect);
if (!(p->flags & PF_EXITING)) {
found_processes = 1;
pr_notice("hardwall: killing %d\n", p->pid);
@@ -289,7 +401,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
pr_notice("hardwall: no associated processes!\n");
done:
- spin_unlock_irqrestore(&hardwall_lock, flags);
+ spin_unlock(&hwt->lock);
/*
* We have to disable firewall interrupts now, or else when we
@@ -298,48 +410,87 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
* haven't yet drained the network, and that would allow packets
* to cross out of the hardwall region.
*/
- disable_firewall_interrupts();
+ disable_firewall_interrupts(hwt);
irq_exit();
set_irq_regs(old_regs);
}
-/* Allow access from user space to the UDN. */
-void grant_network_mpls(void)
+/* Allow access from user space to the user network. */
+void grant_hardwall_mpls(struct hardwall_type *hwt)
{
- __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_0, 1);
- __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_0, 1);
- __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_0, 1);
- __insn_mtspr(SPR_MPL_UDN_TIMER_SET_0, 1);
+#ifndef __tilepro__
+ if (!hwt->is_xdn) {
+ __insn_mtspr(SPR_MPL_IPI_0_SET_0, 1);
+ return;
+ }
+#endif
+ mtspr_MPL_XDN(hwt, ACCESS_SET_0, 1);
+ mtspr_MPL_XDN(hwt, AVAIL_SET_0, 1);
+ mtspr_MPL_XDN(hwt, COMPLETE_SET_0, 1);
+ mtspr_MPL_XDN(hwt, TIMER_SET_0, 1);
#if !CHIP_HAS_REV1_XDN()
- __insn_mtspr(SPR_MPL_UDN_REFILL_SET_0, 1);
- __insn_mtspr(SPR_MPL_UDN_CA_SET_0, 1);
+ mtspr_MPL_XDN(hwt, REFILL_SET_0, 1);
+ mtspr_MPL_XDN(hwt, CA_SET_0, 1);
#endif
}
-/* Deny access from user space to the UDN. */
-void restrict_network_mpls(void)
+/* Deny access from user space to the user network. */
+void restrict_hardwall_mpls(struct hardwall_type *hwt)
{
- __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_1, 1);
- __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_1, 1);
- __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_1, 1);
- __insn_mtspr(SPR_MPL_UDN_TIMER_SET_1, 1);
+#ifndef __tilepro__
+ if (!hwt->is_xdn) {
+ __insn_mtspr(SPR_MPL_IPI_0_SET_1, 1);
+ return;
+ }
+#endif
+ mtspr_MPL_XDN(hwt, ACCESS_SET_1, 1);
+ mtspr_MPL_XDN(hwt, AVAIL_SET_1, 1);
+ mtspr_MPL_XDN(hwt, COMPLETE_SET_1, 1);
+ mtspr_MPL_XDN(hwt, TIMER_SET_1, 1);
#if !CHIP_HAS_REV1_XDN()
- __insn_mtspr(SPR_MPL_UDN_REFILL_SET_1, 1);
- __insn_mtspr(SPR_MPL_UDN_CA_SET_1, 1);
+ mtspr_MPL_XDN(hwt, REFILL_SET_1, 1);
+ mtspr_MPL_XDN(hwt, CA_SET_1, 1);
#endif
}
+/* Restrict or deny as necessary for the task we're switching to. */
+void hardwall_switch_tasks(struct task_struct *prev,
+ struct task_struct *next)
+{
+ int i;
+ for (i = 0; i < HARDWALL_TYPES; ++i) {
+ if (prev->thread.hardwall[i].info != NULL) {
+ if (next->thread.hardwall[i].info == NULL)
+ restrict_hardwall_mpls(&hardwall_types[i]);
+ } else if (next->thread.hardwall[i].info != NULL) {
+ grant_hardwall_mpls(&hardwall_types[i]);
+ }
+ }
+}
+
+/* Does this task have the right to IPI the given cpu? */
+int hardwall_ipi_valid(int cpu)
+{
+#ifdef __tilegx__
+ struct hardwall_info *info =
+ current->thread.hardwall[HARDWALL_IPI].info;
+ return info && cpumask_test_cpu(cpu, &info->cpumask);
+#else
+ return 0;
+#endif
+}
/*
- * Code to create, activate, deactivate, and destroy hardwall rectangles.
+ * Code to create, activate, deactivate, and destroy hardwall resources.
*/
-/* Create a hardwall for the given rectangle */
-static struct hardwall_info *hardwall_create(
- size_t size, const unsigned char __user *bits)
+/* Create a hardwall for the given resource */
+static struct hardwall_info *hardwall_create(struct hardwall_type *hwt,
+ size_t size,
+ const unsigned char __user *bits)
{
- struct hardwall_info *iter, *rect;
+ struct hardwall_info *iter, *info;
struct cpumask mask;
unsigned long flags;
int rc;
@@ -370,55 +521,70 @@ static struct hardwall_info *hardwall_create(
}
}
- /* Allocate a new rectangle optimistically. */
- rect = kmalloc(sizeof(struct hardwall_info),
+ /* Allocate a new hardwall_info optimistically. */
+ info = kmalloc(sizeof(struct hardwall_info),
GFP_KERNEL | __GFP_ZERO);
- if (rect == NULL)
+ if (info == NULL)
return ERR_PTR(-ENOMEM);
- INIT_LIST_HEAD(&rect->task_head);
+ INIT_LIST_HEAD(&info->task_head);
+ info->type = hwt;
/* Compute the rectangle size and validate that it's plausible. */
- rc = setup_rectangle(rect, &mask);
- if (rc != 0) {
- kfree(rect);
- return ERR_PTR(rc);
+ cpumask_copy(&info->cpumask, &mask);
+ info->id = find_first_bit(cpumask_bits(&mask), nr_cpumask_bits);
+ if (hwt->is_xdn) {
+ rc = check_rectangle(info, &mask);
+ if (rc != 0) {
+ kfree(info);
+ return ERR_PTR(rc);
+ }
}
+ /*
+ * Eliminate cpus that are not part of this Linux client.
+ * Note that this allows for configurations that we might not want to
+ * support, such as one client on every even cpu, another client on
+ * every odd cpu.
+ */
+ cpumask_and(&info->cpumask, &info->cpumask, cpu_online_mask);
+
/* Confirm it doesn't overlap and add it to the list. */
- spin_lock_irqsave(&hardwall_lock, flags);
- list_for_each_entry(iter, &rectangles, list) {
- if (overlaps(iter, rect)) {
- spin_unlock_irqrestore(&hardwall_lock, flags);
- kfree(rect);
+ spin_lock_irqsave(&hwt->lock, flags);
+ list_for_each_entry(iter, &hwt->list, list) {
+ if (cpumask_intersects(&iter->cpumask, &info->cpumask)) {
+ spin_unlock_irqrestore(&hwt->lock, flags);
+ kfree(info);
return ERR_PTR(-EBUSY);
}
}
- list_add_tail(&rect->list, &rectangles);
- spin_unlock_irqrestore(&hardwall_lock, flags);
+ list_add_tail(&info->list, &hwt->list);
+ spin_unlock_irqrestore(&hwt->lock, flags);
/* Set up appropriate hardwalling on all affected cpus. */
- hardwall_setup(rect);
+ if (hwt->is_xdn)
+ hardwall_protect_rectangle(info);
/* Create a /proc/tile/hardwall entry. */
- hardwall_add_proc(rect);
+ hardwall_add_proc(info);
- return rect;
+ return info;
}
/* Activate a given hardwall on this cpu for this process. */
-static int hardwall_activate(struct hardwall_info *rect)
+static int hardwall_activate(struct hardwall_info *info)
{
- int cpu, x, y;
+ int cpu;
unsigned long flags;
struct task_struct *p = current;
struct thread_struct *ts = &p->thread;
+ struct hardwall_type *hwt;
- /* Require a rectangle. */
- if (rect == NULL)
+ /* Require a hardwall. */
+ if (info == NULL)
return -ENODATA;
- /* Not allowed to activate a rectangle that is being torn down. */
- if (rect->teardown_in_progress)
+ /* Not allowed to activate a hardwall that is being torn down. */
+ if (info->teardown_in_progress)
return -EINVAL;
/*
@@ -428,78 +594,87 @@ static int hardwall_activate(struct hardwall_info *rect)
if (cpumask_weight(&p->cpus_allowed) != 1)
return -EPERM;
- /* Make sure we are bound to a cpu in this rectangle. */
+ /* Make sure we are bound to a cpu assigned to this resource. */
cpu = smp_processor_id();
BUG_ON(cpumask_first(&p->cpus_allowed) != cpu);
- x = cpu_x(cpu);
- y = cpu_y(cpu);
- if (!contains(rect, x, y))
+ if (!cpumask_test_cpu(cpu, &info->cpumask))
return -EINVAL;
/* If we are already bound to this hardwall, it's a no-op. */
- if (ts->hardwall) {
- BUG_ON(ts->hardwall != rect);
+ hwt = info->type;
+ if (ts->hardwall[hwt->index].info) {
+ BUG_ON(ts->hardwall[hwt->index].info != info);
return 0;
}
- /* Success! This process gets to use the user networks on this cpu. */
- ts->hardwall = rect;
- spin_lock_irqsave(&hardwall_lock, flags);
- list_add(&ts->hardwall_list, &rect->task_head);
- spin_unlock_irqrestore(&hardwall_lock, flags);
- grant_network_mpls();
- printk(KERN_DEBUG "Pid %d (%s) activated for hardwall: cpu %d\n",
- p->pid, p->comm, cpu);
+ /* Success! This process gets to use the resource on this cpu. */
+ ts->hardwall[hwt->index].info = info;
+ spin_lock_irqsave(&hwt->lock, flags);
+ list_add(&ts->hardwall[hwt->index].list, &info->task_head);
+ spin_unlock_irqrestore(&hwt->lock, flags);
+ grant_hardwall_mpls(hwt);
+ printk(KERN_DEBUG "Pid %d (%s) activated for %s hardwall: cpu %d\n",
+ p->pid, p->comm, hwt->name, cpu);
return 0;
}
/*
- * Deactivate a task's hardwall. Must hold hardwall_lock.
- * This method may be called from free_task(), so we don't want to
+ * Deactivate a task's hardwall. Must hold lock for hardwall_type.
+ * This method may be called from exit_thread(), so we don't want to
* rely on too many fields of struct task_struct still being valid.
* We assume the cpus_allowed, pid, and comm fields are still valid.
*/
-static void _hardwall_deactivate(struct task_struct *task)
+static void _hardwall_deactivate(struct hardwall_type *hwt,
+ struct task_struct *task)
{
struct thread_struct *ts = &task->thread;
if (cpumask_weight(&task->cpus_allowed) != 1) {
- pr_err("pid %d (%s) releasing networks with"
+ pr_err("pid %d (%s) releasing %s hardwall with"
" an affinity mask containing %d cpus!\n",
- task->pid, task->comm,
+ task->pid, task->comm, hwt->name,
cpumask_weight(&task->cpus_allowed));
BUG();
}
- BUG_ON(ts->hardwall == NULL);
- ts->hardwall = NULL;
- list_del(&ts->hardwall_list);
+ BUG_ON(ts->hardwall[hwt->index].info == NULL);
+ ts->hardwall[hwt->index].info = NULL;
+ list_del(&ts->hardwall[hwt->index].list);
if (task == current)
- restrict_network_mpls();
+ restrict_hardwall_mpls(hwt);
}
/* Deactivate a task's hardwall. */
-int hardwall_deactivate(struct task_struct *task)
+static int hardwall_deactivate(struct hardwall_type *hwt,
+ struct task_struct *task)
{
unsigned long flags;
int activated;
- spin_lock_irqsave(&hardwall_lock, flags);
- activated = (task->thread.hardwall != NULL);
+ spin_lock_irqsave(&hwt->lock, flags);
+ activated = (task->thread.hardwall[hwt->index].info != NULL);
if (activated)
- _hardwall_deactivate(task);
- spin_unlock_irqrestore(&hardwall_lock, flags);
+ _hardwall_deactivate(hwt, task);
+ spin_unlock_irqrestore(&hwt->lock, flags);
if (!activated)
return -EINVAL;
- printk(KERN_DEBUG "Pid %d (%s) deactivated for hardwall: cpu %d\n",
- task->pid, task->comm, smp_processor_id());
+ printk(KERN_DEBUG "Pid %d (%s) deactivated for %s hardwall: cpu %d\n",
+ task->pid, task->comm, hwt->name, raw_smp_processor_id());
return 0;
}
-/* Stop a UDN switch before draining the network. */
-static void stop_udn_switch(void *ignored)
+void hardwall_deactivate_all(struct task_struct *task)
+{
+ int i;
+ for (i = 0; i < HARDWALL_TYPES; ++i)
+ if (task->thread.hardwall[i].info)
+ hardwall_deactivate(&hardwall_types[i], task);
+}
+
+/* Stop the switch before draining the network. */
+static void stop_xdn_switch(void *arg)
{
#if !CHIP_HAS_REV1_XDN()
/* Freeze the switch and the demux. */
@@ -507,13 +682,71 @@ static void stop_udn_switch(void *ignored)
SPR_UDN_SP_FREEZE__SP_FRZ_MASK |
SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK |
SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK);
+#else
+ /*
+ * Drop all packets bound for the core or off the edge.
+ * We rely on the normal hardwall protection setup code
+ * to have set the low four bits to trigger firewall interrupts,
+ * and shift those bits up to trigger "drop on send" semantics,
+ * plus adding "drop on send to core" for all switches.
+ * In practice it seems the switches latch the DIRECTION_PROTECT
+ * SPR so they won't start dropping if they're already
+ * delivering the last message to the core, but it doesn't
+ * hurt to enable it here.
+ */
+ struct hardwall_type *hwt = arg;
+ unsigned long protect = mfspr_XDN(hwt, DIRECTION_PROTECT);
+ mtspr_XDN(hwt, DIRECTION_PROTECT, (protect | C_PROTECT) << 5);
#endif
}
+static void empty_xdn_demuxes(struct hardwall_type *hwt)
+{
+#ifndef __tilepro__
+ if (hwt->is_idn) {
+ while (__insn_mfspr(SPR_IDN_DATA_AVAIL) & (1 << 0))
+ (void) __tile_idn0_receive();
+ while (__insn_mfspr(SPR_IDN_DATA_AVAIL) & (1 << 1))
+ (void) __tile_idn1_receive();
+ return;
+ }
+#endif
+ while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0))
+ (void) __tile_udn0_receive();
+ while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1))
+ (void) __tile_udn1_receive();
+ while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2))
+ (void) __tile_udn2_receive();
+ while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3))
+ (void) __tile_udn3_receive();
+}
+
/* Drain all the state from a stopped switch. */
-static void drain_udn_switch(void *ignored)
+static void drain_xdn_switch(void *arg)
{
-#if !CHIP_HAS_REV1_XDN()
+ struct hardwall_info *info = arg;
+ struct hardwall_type *hwt = info->type;
+
+#if CHIP_HAS_REV1_XDN()
+ /*
+ * The switches have been configured to drop any messages
+ * destined for cores (or off the edge of the rectangle).
+ * But the current message may continue to be delivered,
+ * so we wait until all the cores have finished any pending
+ * messages before we stop draining.
+ */
+ int pending = mfspr_XDN(hwt, PENDING);
+ while (pending--) {
+ empty_xdn_demuxes(hwt);
+ if (hwt->is_idn)
+ __tile_idn_send(0);
+ else
+ __tile_udn_send(0);
+ }
+ atomic_dec(&info->xdn_pending_count);
+ while (atomic_read(&info->xdn_pending_count))
+ empty_xdn_demuxes(hwt);
+#else
int i;
int from_tile_words, ca_count;
@@ -533,15 +766,7 @@ static void drain_udn_switch(void *ignored)
(void) __insn_mfspr(SPR_UDN_DEMUX_WRITE_FIFO);
/* Empty out demuxes. */
- while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0))
- (void) __tile_udn0_receive();
- while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1))
- (void) __tile_udn1_receive();
- while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2))
- (void) __tile_udn2_receive();
- while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3))
- (void) __tile_udn3_receive();
- BUG_ON((__insn_mfspr(SPR_UDN_DATA_AVAIL) & 0xF) != 0);
+ empty_xdn_demuxes(hwt);
/* Empty out catch all. */
ca_count = __insn_mfspr(SPR_UDN_DEMUX_CA_COUNT);
@@ -563,21 +788,25 @@ static void drain_udn_switch(void *ignored)
#endif
}
-/* Reset random UDN state registers at boot up and during hardwall teardown. */
-void reset_network_state(void)
+/* Reset random XDN state registers at boot up and during hardwall teardown. */
+static void reset_xdn_network_state(struct hardwall_type *hwt)
{
-#if !CHIP_HAS_REV1_XDN()
- /* Reset UDN coordinates to their standard value */
- unsigned int cpu = smp_processor_id();
- unsigned int x = cpu % smp_width;
- unsigned int y = cpu / smp_width;
-#endif
-
- if (udn_disabled)
+ if (hwt->disabled)
return;
+ /* Clear out other random registers so we have a clean slate. */
+ mtspr_XDN(hwt, DIRECTION_PROTECT, 0);
+ mtspr_XDN(hwt, AVAIL_EN, 0);
+ mtspr_XDN(hwt, DEADLOCK_TIMEOUT, 0);
+
#if !CHIP_HAS_REV1_XDN()
- __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7));
+ /* Reset UDN coordinates to their standard value */
+ {
+ unsigned int cpu = smp_processor_id();
+ unsigned int x = cpu_x(cpu);
+ unsigned int y = cpu_y(cpu);
+ __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7));
+ }
/* Set demux tags to predefined values and enable them. */
__insn_mtspr(SPR_UDN_TAG_VALID, 0xf);
@@ -585,56 +814,50 @@ void reset_network_state(void)
__insn_mtspr(SPR_UDN_TAG_1, (1 << 1));
__insn_mtspr(SPR_UDN_TAG_2, (1 << 2));
__insn_mtspr(SPR_UDN_TAG_3, (1 << 3));
-#endif
- /* Clear out other random registers so we have a clean slate. */
- __insn_mtspr(SPR_UDN_AVAIL_EN, 0);
- __insn_mtspr(SPR_UDN_DEADLOCK_TIMEOUT, 0);
-#if !CHIP_HAS_REV1_XDN()
+ /* Set other rev0 random registers to a clean state. */
__insn_mtspr(SPR_UDN_REFILL_EN, 0);
__insn_mtspr(SPR_UDN_DEMUX_QUEUE_SEL, 0);
__insn_mtspr(SPR_UDN_SP_FIFO_SEL, 0);
-#endif
/* Start the switch and demux. */
-#if !CHIP_HAS_REV1_XDN()
__insn_mtspr(SPR_UDN_SP_FREEZE, 0);
#endif
}
-/* Restart a UDN switch after draining. */
-static void restart_udn_switch(void *ignored)
+void reset_network_state(void)
{
- reset_network_state();
-
- /* Disable firewall interrupts. */
- __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, 0);
- disable_firewall_interrupts();
+ reset_xdn_network_state(&hardwall_types[HARDWALL_UDN]);
+#ifndef __tilepro__
+ reset_xdn_network_state(&hardwall_types[HARDWALL_IDN]);
+#endif
}
-/* Build a struct cpumask containing all valid tiles in bounding rectangle. */
-static void fill_mask(struct hardwall_info *r, struct cpumask *result)
+/* Restart an XDN switch after draining. */
+static void restart_xdn_switch(void *arg)
{
- int x, y, cpu;
+ struct hardwall_type *hwt = arg;
- cpumask_clear(result);
+#if CHIP_HAS_REV1_XDN()
+ /* One last drain step to avoid races with injection and draining. */
+ empty_xdn_demuxes(hwt);
+#endif
- cpu = r->ulhc_y * smp_width + r->ulhc_x;
- for (y = 0; y < r->height; ++y, cpu += smp_width - r->width) {
- for (x = 0; x < r->width; ++x, ++cpu)
- cpu_online_set(cpu, result);
- }
+ reset_xdn_network_state(hwt);
+
+ /* Disable firewall interrupts. */
+ disable_firewall_interrupts(hwt);
}
/* Last reference to a hardwall is gone, so clear the network. */
-static void hardwall_destroy(struct hardwall_info *rect)
+static void hardwall_destroy(struct hardwall_info *info)
{
struct task_struct *task;
+ struct hardwall_type *hwt;
unsigned long flags;
- struct cpumask mask;
- /* Make sure this file actually represents a rectangle. */
- if (rect == NULL)
+ /* Make sure this file actually represents a hardwall. */
+ if (info == NULL)
return;
/*
@@ -644,39 +867,53 @@ static void hardwall_destroy(struct hardwall_info *rect)
* deactivate any remaining tasks before freeing the
* hardwall_info object itself.
*/
- spin_lock_irqsave(&hardwall_lock, flags);
- list_for_each_entry(task, &rect->task_head, thread.hardwall_list)
- _hardwall_deactivate(task);
- spin_unlock_irqrestore(&hardwall_lock, flags);
-
- /* Drain the UDN. */
- printk(KERN_DEBUG "Clearing hardwall rectangle %dx%d %d,%d\n",
- rect->width, rect->height, rect->ulhc_x, rect->ulhc_y);
- fill_mask(rect, &mask);
- on_each_cpu_mask(&mask, stop_udn_switch, NULL, 1);
- on_each_cpu_mask(&mask, drain_udn_switch, NULL, 1);
+ hwt = info->type;
+ info->teardown_in_progress = 1;
+ spin_lock_irqsave(&hwt->lock, flags);
+ list_for_each_entry(task, &info->task_head,
+ thread.hardwall[hwt->index].list)
+ _hardwall_deactivate(hwt, task);
+ spin_unlock_irqrestore(&hwt->lock, flags);
+
+ if (hwt->is_xdn) {
+ /* Configure the switches for draining the user network. */
+ printk(KERN_DEBUG
+ "Clearing %s hardwall rectangle %dx%d %d,%d\n",
+ hwt->name, info->width, info->height,
+ info->ulhc_x, info->ulhc_y);
+ on_each_cpu_mask(&info->cpumask, stop_xdn_switch, hwt, 1);
+
+ /* Drain the network. */
+#if CHIP_HAS_REV1_XDN()
+ atomic_set(&info->xdn_pending_count,
+ cpumask_weight(&info->cpumask));
+ on_each_cpu_mask(&info->cpumask, drain_xdn_switch, info, 0);
+#else
+ on_each_cpu_mask(&info->cpumask, drain_xdn_switch, info, 1);
+#endif
- /* Restart switch and disable firewall. */
- on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1);
+ /* Restart switch and disable firewall. */
+ on_each_cpu_mask(&info->cpumask, restart_xdn_switch, hwt, 1);
+ }
/* Remove the /proc/tile/hardwall entry. */
- hardwall_remove_proc(rect);
-
- /* Now free the rectangle from the list. */
- spin_lock_irqsave(&hardwall_lock, flags);
- BUG_ON(!list_empty(&rect->task_head));
- list_del(&rect->list);
- spin_unlock_irqrestore(&hardwall_lock, flags);
- kfree(rect);
+ hardwall_remove_proc(info);
+
+ /* Now free the hardwall from the list. */
+ spin_lock_irqsave(&hwt->lock, flags);
+ BUG_ON(!list_empty(&info->task_head));
+ list_del(&info->list);
+ spin_unlock_irqrestore(&hwt->lock, flags);
+ kfree(info);
}
static int hardwall_proc_show(struct seq_file *sf, void *v)
{
- struct hardwall_info *rect = sf->private;
+ struct hardwall_info *info = sf->private;
char buf[256];
- int rc = cpulist_scnprintf(buf, sizeof(buf), &rect->cpumask);
+ int rc = cpulist_scnprintf(buf, sizeof(buf), &info->cpumask);
buf[rc++] = '\n';
seq_write(sf, buf, rc);
return 0;
@@ -685,7 +922,7 @@ static int hardwall_proc_show(struct seq_file *sf, void *v)
static int hardwall_proc_open(struct inode *inode,
struct file *file)
{
- return single_open(file, hardwall_proc_show, PDE(inode)->data);
+ return single_open(file, hardwall_proc_show, PDE_DATA(inode));
}
static const struct file_operations hardwall_proc_fops = {
@@ -695,31 +932,45 @@ static const struct file_operations hardwall_proc_fops = {
.release = single_release,
};
-static void hardwall_add_proc(struct hardwall_info *rect)
+static void hardwall_add_proc(struct hardwall_info *info)
{
char buf[64];
- snprintf(buf, sizeof(buf), "%d", rect->id);
- proc_create_data(buf, 0444, hardwall_proc_dir,
- &hardwall_proc_fops, rect);
+ snprintf(buf, sizeof(buf), "%d", info->id);
+ proc_create_data(buf, 0444, info->type->proc_dir,
+ &hardwall_proc_fops, info);
}
-static void hardwall_remove_proc(struct hardwall_info *rect)
+static void hardwall_remove_proc(struct hardwall_info *info)
{
char buf[64];
- snprintf(buf, sizeof(buf), "%d", rect->id);
- remove_proc_entry(buf, hardwall_proc_dir);
+ snprintf(buf, sizeof(buf), "%d", info->id);
+ remove_proc_entry(buf, info->type->proc_dir);
}
int proc_pid_hardwall(struct task_struct *task, char *buffer)
{
- struct hardwall_info *rect = task->thread.hardwall;
- return rect ? sprintf(buffer, "%d\n", rect->id) : 0;
+ int i;
+ int n = 0;
+ for (i = 0; i < HARDWALL_TYPES; ++i) {
+ struct hardwall_info *info = task->thread.hardwall[i].info;
+ if (info)
+ n += sprintf(&buffer[n], "%s: %d\n",
+ info->type->name, info->id);
+ }
+ return n;
}
void proc_tile_hardwall_init(struct proc_dir_entry *root)
{
- if (!udn_disabled)
- hardwall_proc_dir = proc_mkdir("hardwall", root);
+ int i;
+ for (i = 0; i < HARDWALL_TYPES; ++i) {
+ struct hardwall_type *hwt = &hardwall_types[i];
+ if (hwt->disabled)
+ continue;
+ if (hardwall_proc_dir == NULL)
+ hardwall_proc_dir = proc_mkdir("hardwall", root);
+ hwt->proc_dir = proc_mkdir(hwt->name, hardwall_proc_dir);
+ }
}
@@ -729,34 +980,45 @@ void proc_tile_hardwall_init(struct proc_dir_entry *root)
static long hardwall_ioctl(struct file *file, unsigned int a, unsigned long b)
{
- struct hardwall_info *rect = file->private_data;
+ struct hardwall_info *info = file->private_data;
+ int minor = iminor(file->f_mapping->host);
+ struct hardwall_type* hwt;
if (_IOC_TYPE(a) != HARDWALL_IOCTL_BASE)
return -EINVAL;
+ BUILD_BUG_ON(HARDWALL_TYPES != _HARDWALL_TYPES);
+ BUILD_BUG_ON(HARDWALL_TYPES !=
+ sizeof(hardwall_types)/sizeof(hardwall_types[0]));
+
+ if (minor < 0 || minor >= HARDWALL_TYPES)
+ return -EINVAL;
+ hwt = &hardwall_types[minor];
+ WARN_ON(info && hwt != info->type);
+
switch (_IOC_NR(a)) {
case _HARDWALL_CREATE:
- if (udn_disabled)
+ if (hwt->disabled)
return -ENOSYS;
- if (rect != NULL)
+ if (info != NULL)
return -EALREADY;
- rect = hardwall_create(_IOC_SIZE(a),
- (const unsigned char __user *)b);
- if (IS_ERR(rect))
- return PTR_ERR(rect);
- file->private_data = rect;
+ info = hardwall_create(hwt, _IOC_SIZE(a),
+ (const unsigned char __user *)b);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+ file->private_data = info;
return 0;
case _HARDWALL_ACTIVATE:
- return hardwall_activate(rect);
+ return hardwall_activate(info);
case _HARDWALL_DEACTIVATE:
- if (current->thread.hardwall != rect)
+ if (current->thread.hardwall[hwt->index].info != info)
return -EINVAL;
- return hardwall_deactivate(current);
+ return hardwall_deactivate(hwt, current);
case _HARDWALL_GET_ID:
- return rect ? rect->id : -EINVAL;
+ return info ? info->id : -EINVAL;
default:
return -EINVAL;
@@ -775,26 +1037,28 @@ static long hardwall_compat_ioctl(struct file *file,
/* The user process closed the file; revoke access to user networks. */
static int hardwall_flush(struct file *file, fl_owner_t owner)
{
- struct hardwall_info *rect = file->private_data;
+ struct hardwall_info *info = file->private_data;
struct task_struct *task, *tmp;
unsigned long flags;
- if (rect) {
+ if (info) {
/*
* NOTE: if multiple threads are activated on this hardwall
* file, the other threads will continue having access to the
- * UDN until they are context-switched out and back in again.
+ * user network until they are context-switched out and back
+ * in again.
*
* NOTE: A NULL files pointer means the task is being torn
* down, so in that case we also deactivate it.
*/
- spin_lock_irqsave(&hardwall_lock, flags);
- list_for_each_entry_safe(task, tmp, &rect->task_head,
- thread.hardwall_list) {
+ struct hardwall_type *hwt = info->type;
+ spin_lock_irqsave(&hwt->lock, flags);
+ list_for_each_entry_safe(task, tmp, &info->task_head,
+ thread.hardwall[hwt->index].list) {
if (task->files == owner || task->files == NULL)
- _hardwall_deactivate(task);
+ _hardwall_deactivate(hwt, task);
}
- spin_unlock_irqrestore(&hardwall_lock, flags);
+ spin_unlock_irqrestore(&hwt->lock, flags);
}
return 0;
@@ -824,11 +1088,11 @@ static int __init dev_hardwall_init(void)
int rc;
dev_t dev;
- rc = alloc_chrdev_region(&dev, 0, 1, "hardwall");
+ rc = alloc_chrdev_region(&dev, 0, HARDWALL_TYPES, "hardwall");
if (rc < 0)
return rc;
cdev_init(&hardwall_dev, &dev_hardwall_fops);
- rc = cdev_add(&hardwall_dev, dev, 1);
+ rc = cdev_add(&hardwall_dev, dev, HARDWALL_TYPES);
if (rc < 0)
return rc;
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S
index 1a39b7c1c87..8d5b40ff292 100644
--- a/arch/tile/kernel/head_32.S
+++ b/arch/tile/kernel/head_32.S
@@ -38,13 +38,13 @@ ENTRY(_start)
movei r2, TILE_CHIP_REV
}
{
- moveli r0, _HV_VERSION
- jal hv_init
+ moveli r0, _HV_VERSION_OLD_HV_INIT
+ jal _hv_init
}
/* Get a reasonable default ASID in r0 */
{
move r0, zero
- jal hv_inquire_asid
+ jal _hv_inquire_asid
}
/* Install the default page table */
{
@@ -64,21 +64,21 @@ ENTRY(_start)
auli r0, r0, ha16(swapper_pg_dir - PAGE_OFFSET)
}
{
- inv r6
+ finv r6
move r1, zero /* high 32 bits of CPA is zero */
}
{
moveli lr, lo16(1f)
- move r5, zero
+ moveli r5, CTX_PAGE_FLAG
}
{
auli lr, lr, ha16(1f)
- j hv_install_context
+ j _hv_install_context
}
1:
/* Get our processor number and save it away in SAVE_K_0. */
- jal hv_inquire_topology
+ jal _hv_inquire_topology
mulll_uu r4, r1, r2 /* r1 == y, r2 == width */
add r4, r4, r0 /* r0 == x, so r4 == cpu == y*width + x */
@@ -86,7 +86,7 @@ ENTRY(_start)
/*
* Load up our per-cpu offset. When the first (master) tile
* boots, this value is still zero, so we will load boot_pc
- * with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
+ * with start_kernel, and boot_sp at the top of init_stack.
* The master tile initializes the per-cpu offset array, so that
* when subsequent (secondary) tiles boot, they will instead load
* from their per-cpu versions of boot_sp and boot_pc.
@@ -126,7 +126,6 @@ ENTRY(_start)
lw sp, r1
or r4, sp, r4
mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */
- addi sp, sp, -STACK_TOP_DELTA
{
move lr, zero /* stop backtraces in the called function */
jr r0
@@ -141,11 +140,11 @@ ENTRY(empty_zero_page)
.macro PTE va, cpa, bits1, no_org=0
.ifeq \no_org
- .org swapper_pg_dir + HV_L1_INDEX(\va) * HV_PTE_SIZE
+ .org swapper_pg_dir + PGD_INDEX(\va) * HV_PTE_SIZE
.endif
.word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \
(HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE)
- .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32))
+ .word (\bits1) | (HV_CPA_TO_PTFN(\cpa) << (HV_PTE_INDEX_PTFN - 32))
.endm
__PAGE_ALIGNED_DATA
@@ -163,10 +162,10 @@ ENTRY(swapper_pg_dir)
.set addr, addr + PGDIR_SIZE
.endr
- /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */
- PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \
+ /* The true text VAs are mapped as VA = PA + MEM_SV_START */
+ PTE MEM_SV_START, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \
(1 << (HV_PTE_INDEX_EXECUTABLE - 32))
- .org swapper_pg_dir + HV_L1_SIZE
+ .org swapper_pg_dir + PGDIR_SIZE
END(swapper_pg_dir)
/*
diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S
index 6bc3a932fe4..bd0e12f283f 100644
--- a/arch/tile/kernel/head_64.S
+++ b/arch/tile/kernel/head_64.S
@@ -25,6 +25,15 @@
#include <arch/chip.h>
#include <arch/spr_def.h>
+/* Extract two 32-bit bit values that were read into one register. */
+#ifdef __BIG_ENDIAN__
+#define GET_FIRST_INT(rd, rs) shrsi rd, rs, 32
+#define GET_SECOND_INT(rd, rs) addxi rd, rs, 0
+#else
+#define GET_FIRST_INT(rd, rs) addxi rd, rs, 0
+#define GET_SECOND_INT(rd, rs) shrsi rd, rs, 32
+#endif
+
/*
* This module contains the entry code for kernel images. It performs the
* minimal setup needed to call the generic C routines.
@@ -34,17 +43,23 @@
ENTRY(_start)
/* Notify the hypervisor of what version of the API we want */
{
+#if KERNEL_PL == 1 && _HV_VERSION == 13
+ /* Support older hypervisors by asking for API version 12. */
+ movei r0, _HV_VERSION_OLD_HV_INIT
+#else
+ movei r0, _HV_VERSION
+#endif
movei r1, TILE_CHIP
- movei r2, TILE_CHIP_REV
}
{
- moveli r0, _HV_VERSION
- jal hv_init
+ movei r2, TILE_CHIP_REV
+ movei r3, KERNEL_PL
}
+ jal _hv_init
/* Get a reasonable default ASID in r0 */
{
move r0, zero
- jal hv_inquire_asid
+ jal _hv_inquire_asid
}
/*
@@ -55,7 +70,7 @@ ENTRY(_start)
* other CPUs should see a properly-constructed page table.
*/
{
- v4int_l r2, zero, r0 /* ASID for hv_install_context */
+ GET_FIRST_INT(r2, r0) /* ASID for hv_install_context */
moveli r4, hw1_last(swapper_pgprot - PAGE_OFFSET)
}
{
@@ -71,7 +86,7 @@ ENTRY(_start)
{
/* After initializing swapper_pgprot, HV_PTE_GLOBAL is set. */
bfextu r7, r1, HV_PTE_INDEX_GLOBAL, HV_PTE_INDEX_GLOBAL
- inv r4
+ finv r4
}
bnez r7, .Lno_write
{
@@ -114,30 +129,25 @@ ENTRY(_start)
shl16insli r0, r0, hw0(swapper_pg_dir - PAGE_OFFSET)
}
{
- move r3, zero
- j hv_install_context
+ moveli r3, CTX_PAGE_FLAG
+ j _hv_install_context
}
1:
/* Install the interrupt base. */
- moveli r0, hw2_last(MEM_SV_START)
- shl16insli r0, r0, hw1(MEM_SV_START)
- shl16insli r0, r0, hw0(MEM_SV_START)
+ moveli r0, hw2_last(intrpt_start)
+ shl16insli r0, r0, hw1(intrpt_start)
+ shl16insli r0, r0, hw0(intrpt_start)
mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0
- /*
- * Get our processor number and save it away in SAVE_K_0.
- * Extract stuff from the topology structure: r4 = y, r6 = x,
- * r5 = width. FIXME: consider whether we want to just make these
- * 64-bit values (and if so fix smp_topology write below, too).
- */
- jal hv_inquire_topology
+ /* Get our processor number and save it away in SAVE_K_0. */
+ jal _hv_inquire_topology
{
- v4int_l r5, zero, r1 /* r5 = width */
- shrui r4, r0, 32 /* r4 = y */
+ GET_FIRST_INT(r5, r1) /* r5 = width */
+ GET_SECOND_INT(r4, r0) /* r4 = y */
}
{
- v4int_l r6, zero, r0 /* r6 = x */
+ GET_FIRST_INT(r6, r0) /* r6 = x */
mul_lu_lu r4, r4, r5
}
{
@@ -148,7 +158,7 @@ ENTRY(_start)
/*
* Load up our per-cpu offset. When the first (master) tile
* boots, this value is still zero, so we will load boot_pc
- * with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
+ * with start_kernel, and boot_sp with at the top of init_stack.
* The master tile initializes the per-cpu offset array, so that
* when subsequent (secondary) tiles boot, they will instead load
* from their per-cpu versions of boot_sp and boot_pc.
@@ -192,9 +202,9 @@ ENTRY(_start)
}
ld r0, r0
ld sp, r1
- or r4, sp, r4
+ shli r4, r4, CPU_SHIFT
+ bfins r4, sp, 0, CPU_SHIFT-1
mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */
- addi sp, sp, -STACK_TOP_DELTA
{
move lr, zero /* stop backtraces in the called function */
jr r0
@@ -210,19 +220,19 @@ ENTRY(empty_zero_page)
.macro PTE cpa, bits1
.quad HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED |\
HV_PTE_GLOBAL | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |\
- (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN)
+ (\bits1) | (HV_CPA_TO_PTFN(\cpa) << HV_PTE_INDEX_PTFN)
.endm
__PAGE_ALIGNED_DATA
.align PAGE_SIZE
ENTRY(swapper_pg_dir)
- .org swapper_pg_dir + HV_L0_INDEX(PAGE_OFFSET) * HV_PTE_SIZE
+ .org swapper_pg_dir + PGD_INDEX(PAGE_OFFSET) * HV_PTE_SIZE
.Lsv_data_pmd:
.quad 0 /* PTE temp_data_pmd - PAGE_OFFSET, 0 */
- .org swapper_pg_dir + HV_L0_INDEX(MEM_SV_START) * HV_PTE_SIZE
+ .org swapper_pg_dir + PGD_INDEX(MEM_SV_START) * HV_PTE_SIZE
.Lsv_code_pmd:
.quad 0 /* PTE temp_code_pmd - PAGE_OFFSET, 0 */
- .org swapper_pg_dir + HV_L0_SIZE
+ .org swapper_pg_dir + SIZEOF_PGD
END(swapper_pg_dir)
.align HV_PAGE_TABLE_ALIGN
@@ -233,11 +243,11 @@ ENTRY(temp_data_pmd)
* permissions later.
*/
.set addr, 0
- .rept HV_L1_ENTRIES
+ .rept PTRS_PER_PMD
PTE addr, HV_PTE_READABLE | HV_PTE_WRITABLE
- .set addr, addr + HV_PAGE_SIZE_LARGE
+ .set addr, addr + HPAGE_SIZE
.endr
- .org temp_data_pmd + HV_L1_SIZE
+ .org temp_data_pmd + SIZEOF_PMD
END(temp_data_pmd)
.align HV_PAGE_TABLE_ALIGN
@@ -248,11 +258,11 @@ ENTRY(temp_code_pmd)
* permissions later.
*/
.set addr, 0
- .rept HV_L1_ENTRIES
+ .rept PTRS_PER_PMD
PTE addr, HV_PTE_READABLE | HV_PTE_EXECUTABLE
- .set addr, addr + HV_PAGE_SIZE_LARGE
+ .set addr, addr + HPAGE_SIZE
.endr
- .org temp_code_pmd + HV_L1_SIZE
+ .org temp_code_pmd + SIZEOF_PMD
END(temp_code_pmd)
/*
diff --git a/arch/tile/kernel/hvglue.S b/arch/tile/kernel/hvglue.S
new file mode 100644
index 00000000000..2ab45662239
--- /dev/null
+++ b/arch/tile/kernel/hvglue.S
@@ -0,0 +1,74 @@
+/* Hypervisor call vector addresses; see <hv/hypervisor.h> */
+.macro gensym sym, val, size
+.org \val
+.global _\sym
+.type _\sym,function
+_\sym:
+.size _\sym,\size
+#ifndef CONFIG_TILE_HVGLUE_TRACE
+.globl \sym
+.set \sym,_\sym
+#endif
+.endm
+
+.section .hvglue,"x",@nobits
+.align 8
+gensym hv_init, 0x20, 32
+gensym hv_install_context, 0x40, 32
+gensym hv_sysconf, 0x60, 32
+gensym hv_get_rtc, 0x80, 32
+gensym hv_set_rtc, 0xa0, 32
+gensym hv_flush_asid, 0xc0, 32
+gensym hv_flush_page, 0xe0, 32
+gensym hv_flush_pages, 0x100, 32
+gensym hv_restart, 0x120, 32
+gensym hv_halt, 0x140, 32
+gensym hv_power_off, 0x160, 32
+gensym hv_inquire_physical, 0x180, 32
+gensym hv_inquire_memory_controller, 0x1a0, 32
+gensym hv_inquire_virtual, 0x1c0, 32
+gensym hv_inquire_asid, 0x1e0, 32
+gensym hv_nanosleep, 0x200, 32
+gensym hv_console_read_if_ready, 0x220, 32
+gensym hv_console_write, 0x240, 32
+gensym hv_downcall_dispatch, 0x260, 32
+gensym hv_inquire_topology, 0x280, 32
+gensym hv_fs_findfile, 0x2a0, 32
+gensym hv_fs_fstat, 0x2c0, 32
+gensym hv_fs_pread, 0x2e0, 32
+gensym hv_physaddr_read64, 0x300, 32
+gensym hv_physaddr_write64, 0x320, 32
+gensym hv_get_command_line, 0x340, 32
+gensym hv_set_caching, 0x360, 32
+gensym hv_bzero_page, 0x380, 32
+gensym hv_register_message_state, 0x3a0, 32
+gensym hv_send_message, 0x3c0, 32
+gensym hv_receive_message, 0x3e0, 32
+gensym hv_inquire_context, 0x400, 32
+gensym hv_start_all_tiles, 0x420, 32
+gensym hv_dev_open, 0x440, 32
+gensym hv_dev_close, 0x460, 32
+gensym hv_dev_pread, 0x480, 32
+gensym hv_dev_pwrite, 0x4a0, 32
+gensym hv_dev_poll, 0x4c0, 32
+gensym hv_dev_poll_cancel, 0x4e0, 32
+gensym hv_dev_preada, 0x500, 32
+gensym hv_dev_pwritea, 0x520, 32
+gensym hv_flush_remote, 0x540, 32
+gensym hv_console_putc, 0x560, 32
+gensym hv_inquire_tiles, 0x580, 32
+gensym hv_confstr, 0x5a0, 32
+gensym hv_reexec, 0x5c0, 32
+gensym hv_set_command_line, 0x5e0, 32
+gensym hv_clear_intr, 0x600, 32
+gensym hv_enable_intr, 0x620, 32
+gensym hv_disable_intr, 0x640, 32
+gensym hv_raise_intr, 0x660, 32
+gensym hv_trigger_ipi, 0x680, 32
+gensym hv_store_mapping, 0x6a0, 32
+gensym hv_inquire_realpa, 0x6c0, 32
+gensym hv_flush_all, 0x6e0, 32
+gensym hv_get_ipi_pte, 0x700, 32
+gensym hv_set_pte_super_shift, 0x720, 32
+gensym hv_console_set_ipi, 0x7e0, 32
+gensym hv_glue_internals, 0x800, 30720
diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds
deleted file mode 100644
index 2b7cd0a659a..00000000000
--- a/arch/tile/kernel/hvglue.lds
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Hypervisor call vector addresses; see <hv/hypervisor.h> */
-hv_init = TEXT_OFFSET + 0x10020;
-hv_install_context = TEXT_OFFSET + 0x10040;
-hv_sysconf = TEXT_OFFSET + 0x10060;
-hv_get_rtc = TEXT_OFFSET + 0x10080;
-hv_set_rtc = TEXT_OFFSET + 0x100a0;
-hv_flush_asid = TEXT_OFFSET + 0x100c0;
-hv_flush_page = TEXT_OFFSET + 0x100e0;
-hv_flush_pages = TEXT_OFFSET + 0x10100;
-hv_restart = TEXT_OFFSET + 0x10120;
-hv_halt = TEXT_OFFSET + 0x10140;
-hv_power_off = TEXT_OFFSET + 0x10160;
-hv_inquire_physical = TEXT_OFFSET + 0x10180;
-hv_inquire_memory_controller = TEXT_OFFSET + 0x101a0;
-hv_inquire_virtual = TEXT_OFFSET + 0x101c0;
-hv_inquire_asid = TEXT_OFFSET + 0x101e0;
-hv_nanosleep = TEXT_OFFSET + 0x10200;
-hv_console_read_if_ready = TEXT_OFFSET + 0x10220;
-hv_console_write = TEXT_OFFSET + 0x10240;
-hv_downcall_dispatch = TEXT_OFFSET + 0x10260;
-hv_inquire_topology = TEXT_OFFSET + 0x10280;
-hv_fs_findfile = TEXT_OFFSET + 0x102a0;
-hv_fs_fstat = TEXT_OFFSET + 0x102c0;
-hv_fs_pread = TEXT_OFFSET + 0x102e0;
-hv_physaddr_read64 = TEXT_OFFSET + 0x10300;
-hv_physaddr_write64 = TEXT_OFFSET + 0x10320;
-hv_get_command_line = TEXT_OFFSET + 0x10340;
-hv_set_caching = TEXT_OFFSET + 0x10360;
-hv_bzero_page = TEXT_OFFSET + 0x10380;
-hv_register_message_state = TEXT_OFFSET + 0x103a0;
-hv_send_message = TEXT_OFFSET + 0x103c0;
-hv_receive_message = TEXT_OFFSET + 0x103e0;
-hv_inquire_context = TEXT_OFFSET + 0x10400;
-hv_start_all_tiles = TEXT_OFFSET + 0x10420;
-hv_dev_open = TEXT_OFFSET + 0x10440;
-hv_dev_close = TEXT_OFFSET + 0x10460;
-hv_dev_pread = TEXT_OFFSET + 0x10480;
-hv_dev_pwrite = TEXT_OFFSET + 0x104a0;
-hv_dev_poll = TEXT_OFFSET + 0x104c0;
-hv_dev_poll_cancel = TEXT_OFFSET + 0x104e0;
-hv_dev_preada = TEXT_OFFSET + 0x10500;
-hv_dev_pwritea = TEXT_OFFSET + 0x10520;
-hv_flush_remote = TEXT_OFFSET + 0x10540;
-hv_console_putc = TEXT_OFFSET + 0x10560;
-hv_inquire_tiles = TEXT_OFFSET + 0x10580;
-hv_confstr = TEXT_OFFSET + 0x105a0;
-hv_reexec = TEXT_OFFSET + 0x105c0;
-hv_set_command_line = TEXT_OFFSET + 0x105e0;
-hv_clear_intr = TEXT_OFFSET + 0x10600;
-hv_enable_intr = TEXT_OFFSET + 0x10620;
-hv_disable_intr = TEXT_OFFSET + 0x10640;
-hv_raise_intr = TEXT_OFFSET + 0x10660;
-hv_trigger_ipi = TEXT_OFFSET + 0x10680;
-hv_store_mapping = TEXT_OFFSET + 0x106a0;
-hv_inquire_realpa = TEXT_OFFSET + 0x106c0;
-hv_flush_all = TEXT_OFFSET + 0x106e0;
-hv_get_ipi_pte = TEXT_OFFSET + 0x10700;
-hv_glue_internals = TEXT_OFFSET + 0x10720;
diff --git a/arch/tile/kernel/hvglue_trace.c b/arch/tile/kernel/hvglue_trace.c
new file mode 100644
index 00000000000..85c74ad2931
--- /dev/null
+++ b/arch/tile/kernel/hvglue_trace.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ * Pull in the hypervisor header so we declare all the ABI functions
+ * with the underscore versions, then undef the names so that we can
+ * provide our own wrapper versions.
+ */
+#define hv_init _hv_init
+#define hv_install_context _hv_install_context
+#define hv_sysconf _hv_sysconf
+#define hv_get_rtc _hv_get_rtc
+#define hv_set_rtc _hv_set_rtc
+#define hv_flush_asid _hv_flush_asid
+#define hv_flush_page _hv_flush_page
+#define hv_flush_pages _hv_flush_pages
+#define hv_restart _hv_restart
+#define hv_halt _hv_halt
+#define hv_power_off _hv_power_off
+#define hv_inquire_physical _hv_inquire_physical
+#define hv_inquire_memory_controller _hv_inquire_memory_controller
+#define hv_inquire_virtual _hv_inquire_virtual
+#define hv_inquire_asid _hv_inquire_asid
+#define hv_nanosleep _hv_nanosleep
+#define hv_console_read_if_ready _hv_console_read_if_ready
+#define hv_console_write _hv_console_write
+#define hv_downcall_dispatch _hv_downcall_dispatch
+#define hv_inquire_topology _hv_inquire_topology
+#define hv_fs_findfile _hv_fs_findfile
+#define hv_fs_fstat _hv_fs_fstat
+#define hv_fs_pread _hv_fs_pread
+#define hv_physaddr_read64 _hv_physaddr_read64
+#define hv_physaddr_write64 _hv_physaddr_write64
+#define hv_get_command_line _hv_get_command_line
+#define hv_set_caching _hv_set_caching
+#define hv_bzero_page _hv_bzero_page
+#define hv_register_message_state _hv_register_message_state
+#define hv_send_message _hv_send_message
+#define hv_receive_message _hv_receive_message
+#define hv_inquire_context _hv_inquire_context
+#define hv_start_all_tiles _hv_start_all_tiles
+#define hv_dev_open _hv_dev_open
+#define hv_dev_close _hv_dev_close
+#define hv_dev_pread _hv_dev_pread
+#define hv_dev_pwrite _hv_dev_pwrite
+#define hv_dev_poll _hv_dev_poll
+#define hv_dev_poll_cancel _hv_dev_poll_cancel
+#define hv_dev_preada _hv_dev_preada
+#define hv_dev_pwritea _hv_dev_pwritea
+#define hv_flush_remote _hv_flush_remote
+#define hv_console_putc _hv_console_putc
+#define hv_inquire_tiles _hv_inquire_tiles
+#define hv_confstr _hv_confstr
+#define hv_reexec _hv_reexec
+#define hv_set_command_line _hv_set_command_line
+#define hv_clear_intr _hv_clear_intr
+#define hv_enable_intr _hv_enable_intr
+#define hv_disable_intr _hv_disable_intr
+#define hv_raise_intr _hv_raise_intr
+#define hv_trigger_ipi _hv_trigger_ipi
+#define hv_store_mapping _hv_store_mapping
+#define hv_inquire_realpa _hv_inquire_realpa
+#define hv_flush_all _hv_flush_all
+#define hv_get_ipi_pte _hv_get_ipi_pte
+#define hv_set_pte_super_shift _hv_set_pte_super_shift
+#define hv_console_set_ipi _hv_console_set_ipi
+#include <hv/hypervisor.h>
+#undef hv_init
+#undef hv_install_context
+#undef hv_sysconf
+#undef hv_get_rtc
+#undef hv_set_rtc
+#undef hv_flush_asid
+#undef hv_flush_page
+#undef hv_flush_pages
+#undef hv_restart
+#undef hv_halt
+#undef hv_power_off
+#undef hv_inquire_physical
+#undef hv_inquire_memory_controller
+#undef hv_inquire_virtual
+#undef hv_inquire_asid
+#undef hv_nanosleep
+#undef hv_console_read_if_ready
+#undef hv_console_write
+#undef hv_downcall_dispatch
+#undef hv_inquire_topology
+#undef hv_fs_findfile
+#undef hv_fs_fstat
+#undef hv_fs_pread
+#undef hv_physaddr_read64
+#undef hv_physaddr_write64
+#undef hv_get_command_line
+#undef hv_set_caching
+#undef hv_bzero_page
+#undef hv_register_message_state
+#undef hv_send_message
+#undef hv_receive_message
+#undef hv_inquire_context
+#undef hv_start_all_tiles
+#undef hv_dev_open
+#undef hv_dev_close
+#undef hv_dev_pread
+#undef hv_dev_pwrite
+#undef hv_dev_poll
+#undef hv_dev_poll_cancel
+#undef hv_dev_preada
+#undef hv_dev_pwritea
+#undef hv_flush_remote
+#undef hv_console_putc
+#undef hv_inquire_tiles
+#undef hv_confstr
+#undef hv_reexec
+#undef hv_set_command_line
+#undef hv_clear_intr
+#undef hv_enable_intr
+#undef hv_disable_intr
+#undef hv_raise_intr
+#undef hv_trigger_ipi
+#undef hv_store_mapping
+#undef hv_inquire_realpa
+#undef hv_flush_all
+#undef hv_get_ipi_pte
+#undef hv_set_pte_super_shift
+#undef hv_console_set_ipi
+
+/*
+ * Provide macros based on <linux/syscalls.h> to provide a wrapper
+ * function that invokes the same function with an underscore prefix.
+ * We can't use the existing __SC_xxx macros because we need to
+ * support up to nine arguments rather than up to six, and also this
+ * way the file stands alone from possible changes in the
+ * implementation of <linux/syscalls.h>.
+ */
+#define HV_WRAP0(type, name) \
+ type name(void); \
+ type name(void) \
+ { \
+ return _##name(); \
+ }
+#define __HV_DECL1(t1, a1) t1 a1
+#define __HV_DECL2(t2, a2, ...) t2 a2, __HV_DECL1(__VA_ARGS__)
+#define __HV_DECL3(t3, a3, ...) t3 a3, __HV_DECL2(__VA_ARGS__)
+#define __HV_DECL4(t4, a4, ...) t4 a4, __HV_DECL3(__VA_ARGS__)
+#define __HV_DECL5(t5, a5, ...) t5 a5, __HV_DECL4(__VA_ARGS__)
+#define __HV_DECL6(t6, a6, ...) t6 a6, __HV_DECL5(__VA_ARGS__)
+#define __HV_DECL7(t7, a7, ...) t7 a7, __HV_DECL6(__VA_ARGS__)
+#define __HV_DECL8(t8, a8, ...) t8 a8, __HV_DECL7(__VA_ARGS__)
+#define __HV_DECL9(t9, a9, ...) t9 a9, __HV_DECL8(__VA_ARGS__)
+#define __HV_PASS1(t1, a1) a1
+#define __HV_PASS2(t2, a2, ...) a2, __HV_PASS1(__VA_ARGS__)
+#define __HV_PASS3(t3, a3, ...) a3, __HV_PASS2(__VA_ARGS__)
+#define __HV_PASS4(t4, a4, ...) a4, __HV_PASS3(__VA_ARGS__)
+#define __HV_PASS5(t5, a5, ...) a5, __HV_PASS4(__VA_ARGS__)
+#define __HV_PASS6(t6, a6, ...) a6, __HV_PASS5(__VA_ARGS__)
+#define __HV_PASS7(t7, a7, ...) a7, __HV_PASS6(__VA_ARGS__)
+#define __HV_PASS8(t8, a8, ...) a8, __HV_PASS7(__VA_ARGS__)
+#define __HV_PASS9(t9, a9, ...) a9, __HV_PASS8(__VA_ARGS__)
+#define HV_WRAPx(x, type, name, ...) \
+ type name(__HV_DECL##x(__VA_ARGS__)); \
+ type name(__HV_DECL##x(__VA_ARGS__)) \
+ { \
+ return _##name(__HV_PASS##x(__VA_ARGS__)); \
+ }
+#define HV_WRAP1(type, name, ...) HV_WRAPx(1, type, name, __VA_ARGS__)
+#define HV_WRAP2(type, name, ...) HV_WRAPx(2, type, name, __VA_ARGS__)
+#define HV_WRAP3(type, name, ...) HV_WRAPx(3, type, name, __VA_ARGS__)
+#define HV_WRAP4(type, name, ...) HV_WRAPx(4, type, name, __VA_ARGS__)
+#define HV_WRAP5(type, name, ...) HV_WRAPx(5, type, name, __VA_ARGS__)
+#define HV_WRAP6(type, name, ...) HV_WRAPx(6, type, name, __VA_ARGS__)
+#define HV_WRAP7(type, name, ...) HV_WRAPx(7, type, name, __VA_ARGS__)
+#define HV_WRAP8(type, name, ...) HV_WRAPx(8, type, name, __VA_ARGS__)
+#define HV_WRAP9(type, name, ...) HV_WRAPx(9, type, name, __VA_ARGS__)
+
+/* List all the hypervisor API functions. */
+HV_WRAP4(void, hv_init, HV_VersionNumber, interface_version_number,
+ int, chip_num, int, chip_rev_num, int, client_pl)
+HV_WRAP1(long, hv_sysconf, HV_SysconfQuery, query)
+HV_WRAP3(int, hv_confstr, HV_ConfstrQuery, query, HV_VirtAddr, buf, int, len)
+#if CHIP_HAS_IPI()
+HV_WRAP3(int, hv_get_ipi_pte, HV_Coord, tile, int, pl, HV_PTE*, pte)
+HV_WRAP3(int, hv_console_set_ipi, int, ipi, int, event, HV_Coord, coord);
+#else
+HV_WRAP1(void, hv_enable_intr, HV_IntrMask, enab_mask)
+HV_WRAP1(void, hv_disable_intr, HV_IntrMask, disab_mask)
+HV_WRAP1(void, hv_clear_intr, HV_IntrMask, clear_mask)
+HV_WRAP1(void, hv_raise_intr, HV_IntrMask, raise_mask)
+HV_WRAP2(HV_Errno, hv_trigger_ipi, HV_Coord, tile, int, interrupt)
+#endif /* !CHIP_HAS_IPI() */
+HV_WRAP3(int, hv_store_mapping, HV_VirtAddr, va, unsigned int, len,
+ HV_PhysAddr, pa)
+HV_WRAP2(HV_PhysAddr, hv_inquire_realpa, HV_PhysAddr, cpa, unsigned int, len)
+HV_WRAP0(HV_RTCTime, hv_get_rtc)
+HV_WRAP1(void, hv_set_rtc, HV_RTCTime, time)
+HV_WRAP4(int, hv_install_context, HV_PhysAddr, page_table, HV_PTE, access,
+ HV_ASID, asid, __hv32, flags)
+HV_WRAP2(int, hv_set_pte_super_shift, int, level, int, log2_count)
+HV_WRAP0(HV_Context, hv_inquire_context)
+HV_WRAP1(int, hv_flush_asid, HV_ASID, asid)
+HV_WRAP2(int, hv_flush_page, HV_VirtAddr, address, HV_PageSize, page_size)
+HV_WRAP3(int, hv_flush_pages, HV_VirtAddr, start, HV_PageSize, page_size,
+ unsigned long, size)
+HV_WRAP1(int, hv_flush_all, int, preserve_global)
+HV_WRAP2(void, hv_restart, HV_VirtAddr, cmd, HV_VirtAddr, args)
+HV_WRAP0(void, hv_halt)
+HV_WRAP0(void, hv_power_off)
+HV_WRAP1(int, hv_reexec, HV_PhysAddr, entry)
+HV_WRAP0(HV_Topology, hv_inquire_topology)
+HV_WRAP3(HV_Errno, hv_inquire_tiles, HV_InqTileSet, set, HV_VirtAddr, cpumask,
+ int, length)
+HV_WRAP1(HV_PhysAddrRange, hv_inquire_physical, int, idx)
+HV_WRAP2(HV_MemoryControllerInfo, hv_inquire_memory_controller, HV_Coord, coord,
+ int, controller)
+HV_WRAP1(HV_VirtAddrRange, hv_inquire_virtual, int, idx)
+HV_WRAP1(HV_ASIDRange, hv_inquire_asid, int, idx)
+HV_WRAP1(void, hv_nanosleep, int, nanosecs)
+HV_WRAP0(int, hv_console_read_if_ready)
+HV_WRAP1(void, hv_console_putc, int, byte)
+HV_WRAP2(int, hv_console_write, HV_VirtAddr, bytes, int, len)
+HV_WRAP0(void, hv_downcall_dispatch)
+HV_WRAP1(int, hv_fs_findfile, HV_VirtAddr, filename)
+HV_WRAP1(HV_FS_StatInfo, hv_fs_fstat, int, inode)
+HV_WRAP4(int, hv_fs_pread, int, inode, HV_VirtAddr, buf,
+ int, length, int, offset)
+HV_WRAP2(unsigned long long, hv_physaddr_read64, HV_PhysAddr, addr,
+ HV_PTE, access)
+HV_WRAP3(void, hv_physaddr_write64, HV_PhysAddr, addr, HV_PTE, access,
+ unsigned long long, val)
+HV_WRAP2(int, hv_get_command_line, HV_VirtAddr, buf, int, length)
+HV_WRAP2(HV_Errno, hv_set_command_line, HV_VirtAddr, buf, int, length)
+HV_WRAP1(void, hv_set_caching, unsigned long, bitmask)
+HV_WRAP2(void, hv_bzero_page, HV_VirtAddr, va, unsigned int, size)
+HV_WRAP1(HV_Errno, hv_register_message_state, HV_MsgState*, msgstate)
+HV_WRAP4(int, hv_send_message, HV_Recipient *, recips, int, nrecip,
+ HV_VirtAddr, buf, int, buflen)
+HV_WRAP3(HV_RcvMsgInfo, hv_receive_message, HV_MsgState, msgstate,
+ HV_VirtAddr, buf, int, buflen)
+HV_WRAP0(void, hv_start_all_tiles)
+HV_WRAP2(int, hv_dev_open, HV_VirtAddr, name, __hv32, flags)
+HV_WRAP1(int, hv_dev_close, int, devhdl)
+HV_WRAP5(int, hv_dev_pread, int, devhdl, __hv32, flags, HV_VirtAddr, va,
+ __hv32, len, __hv64, offset)
+HV_WRAP5(int, hv_dev_pwrite, int, devhdl, __hv32, flags, HV_VirtAddr, va,
+ __hv32, len, __hv64, offset)
+HV_WRAP3(int, hv_dev_poll, int, devhdl, __hv32, events, HV_IntArg, intarg)
+HV_WRAP1(int, hv_dev_poll_cancel, int, devhdl)
+HV_WRAP6(int, hv_dev_preada, int, devhdl, __hv32, flags, __hv32, sgl_len,
+ HV_SGL *, sglp, __hv64, offset, HV_IntArg, intarg)
+HV_WRAP6(int, hv_dev_pwritea, int, devhdl, __hv32, flags, __hv32, sgl_len,
+ HV_SGL *, sglp, __hv64, offset, HV_IntArg, intarg)
+HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa,
+ unsigned long, cache_control, unsigned long*, cache_cpumask,
+ HV_VirtAddr, tlb_va, unsigned long, tlb_length,
+ unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask,
+ HV_Remote_ASID*, asids, int, asidcount)
diff --git a/arch/tile/kernel/init_task.c b/arch/tile/kernel/init_task.c
deleted file mode 100644
index 928b3187066..00000000000
--- a/arch/tile/kernel/init_task.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/init_task.h>
-#include <linux/mqueue.h>
-#include <linux/module.h>
-#include <linux/start_kernel.h>
-#include <linux/uaccess.h>
-
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-
-/*
- * Initial thread structure.
- *
- * We need to make sure that this is THREAD_SIZE aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union __init_task_data = {
- INIT_THREAD_INFO(init_task)
-};
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-EXPORT_SYMBOL(init_task);
-
-/*
- * per-CPU stack and boot info.
- */
-DEFINE_PER_CPU(unsigned long, boot_sp) =
- (unsigned long)init_stack + THREAD_SIZE;
-
-#ifdef CONFIG_SMP
-DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel;
-#else
-/*
- * The variable must be __initdata since it references __init code.
- * With CONFIG_SMP it is per-cpu data, which is exempt from validation.
- */
-unsigned long __initdata boot_pc = (unsigned long)start_kernel;
-#endif
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index aecc8ed5f39..cdbda45a4e4 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -28,20 +28,10 @@
#include <arch/interrupts.h>
#include <arch/spr_def.h>
-#ifdef CONFIG_PREEMPT
-# error "No support for kernel preemption currently"
-#endif
-
#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg)
#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
-#if !CHIP_HAS_WH64()
- /* By making this an empty macro, we can use wh64 in the code. */
- .macro wh64 reg
- .endm
-#endif
-
.macro push_reg reg, ptr=sp, delta=-4
{
sw \ptr, \reg
@@ -189,7 +179,7 @@ intvec_\vecname:
* point sp at the top aligned address on the actual stack page.
*/
mfspr r0, SPR_SYSTEM_SAVE_K_0
- mm r0, r0, zero, LOG2_THREAD_SIZE, 31
+ mm r0, r0, zero, LOG2_NR_CPU_IDS, 31
0:
/*
@@ -207,6 +197,9 @@ intvec_\vecname:
* cache line 1: r14...r29
* cache line 0: 2 x frame, r0..r13
*/
+#if STACK_TOP_DELTA != 64
+#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
+#endif
andi r0, r0, -64
/*
@@ -320,24 +313,20 @@ intvec_\vecname:
movei r3, 0
}
.else
- .ifc \c_routine, op_handle_perf_interrupt
+ .ifc \c_routine, handle_perf_interrupt
{
mfspr r2, PERF_COUNT_STS
movei r3, -1 /* not used, but set for consistency */
}
.else
-#if CHIP_HAS_AUX_PERF_COUNTERS()
- .ifc \c_routine, op_handle_aux_perf_interrupt
+ .ifc \c_routine, handle_perf_interrupt
{
mfspr r2, AUX_PERF_COUNT_STS
movei r3, -1 /* not used, but set for consistency */
}
.else
-#endif
movei r3, 0
-#if CHIP_HAS_AUX_PERF_COUNTERS()
.endif
-#endif
.endif
.endif
.endif
@@ -354,7 +343,7 @@ intvec_\vecname:
#ifdef __COLLECT_LINKER_FEEDBACK__
.pushsection .text.intvec_feedback,"ax"
.org (\vecnum << 5)
- FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8)
+ FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8)
jrp lr
.popsection
#endif
@@ -468,7 +457,7 @@ intvec_\vecname:
}
{
auli r21, r21, ha16(__per_cpu_offset)
- mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1
+ mm r20, r20, zero, 0, LOG2_NR_CPU_IDS-1
}
s2a r20, r20, r21
lw tp, r20
@@ -562,7 +551,6 @@ intvec_\vecname:
.endif
mtspr INTERRUPT_CRITICAL_SECTION, zero
-#if CHIP_HAS_WH64()
/*
* Prepare the first 256 stack bytes to be rapidly accessible
* without having to fetch the background data. We don't really
@@ -583,7 +571,6 @@ intvec_\vecname:
addi r52, r52, -64
}
wh64 r52
-#endif
#ifdef CONFIG_TRACE_IRQFLAGS
.ifnc \function,handle_nmi
@@ -762,7 +749,7 @@ intvec_\vecname:
.macro dc_dispatch vecnum, vecname
.org (\vecnum << 8)
intvec_\vecname:
- j hv_downcall_dispatch
+ j _hv_downcall_dispatch
ENDPROC(intvec_\vecname)
.endm
@@ -799,6 +786,10 @@ handle_interrupt:
* This routine takes a boolean in r30 indicating if this is an NMI.
* If so, we also expect a boolean in r31 indicating whether to
* re-enable the oprofile interrupts.
+ *
+ * Note that .Lresume_userspace is jumped to directly in several
+ * places, and we need to make sure r30 is set correctly in those
+ * callers as well.
*/
STD_ENTRY(interrupt_return)
/* If we're resuming to kernel space, don't check thread flags. */
@@ -808,17 +799,37 @@ STD_ENTRY(interrupt_return)
}
lw r29, r29
andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ bzt r29, .Lresume_userspace
+
+#ifdef CONFIG_PREEMPT
+ /* Returning to kernel space. Check if we need preemption. */
+ GET_THREAD_INFO(r29)
+ addli r28, r29, THREAD_INFO_FLAGS_OFFSET
{
- bzt r29, .Lresume_userspace
- PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+ lw r28, r28
+ addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET
}
+ {
+ andi r28, r28, _TIF_NEED_RESCHED
+ lw r29, r29
+ }
+ bzt r28, 1f
+ bnz r29, 1f
+ /* Disable interrupts explicitly for preemption. */
+ IRQ_DISABLE(r20,r21)
+ TRACE_IRQS_OFF
+ jal preempt_schedule_irq
+ FEEDBACK_REENTER(interrupt_return)
+1:
+#endif
/* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */
{
- lw r28, r29
+ PTREGS_PTR(r29, PTREGS_OFFSET_PC)
moveli r27, lo16(_cpu_idle_nap)
}
{
+ lw r28, r29
auli r27, r27, ha16(_cpu_idle_nap)
}
{
@@ -835,6 +846,18 @@ STD_ENTRY(interrupt_return)
FEEDBACK_REENTER(interrupt_return)
/*
+ * Use r33 to hold whether we have already loaded the callee-saves
+ * into ptregs. We don't want to do it twice in this loop, since
+ * then we'd clobber whatever changes are made by ptrace, etc.
+ * Get base of stack in r32.
+ */
+ {
+ GET_THREAD_INFO(r32)
+ movei r33, 0
+ }
+
+.Lretry_work_pending:
+ /*
* Disable interrupts so as to make sure we don't
* miss an interrupt that sets any of the thread flags (like
* need_resched or sigpending) between sampling and the iret.
@@ -844,9 +867,6 @@ STD_ENTRY(interrupt_return)
IRQ_DISABLE(r20, r21)
TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */
- /* Get base of stack in r32; note r30/31 are used as arguments here. */
- GET_THREAD_INFO(r32)
-
/* Check to see if there is any work to do before returning to user. */
{
@@ -862,16 +882,18 @@ STD_ENTRY(interrupt_return)
/*
* Make sure we have all the registers saved for signal
- * handling or single-step. Call out to C code to figure out
- * exactly what we need to do for each flag bit, then if
- * necessary, reload the flags and recheck.
+ * handling, notify-resume, or single-step. Call out to C
+ * code to figure out exactly what we need to do for each flag bit,
+ * then if necessary, reload the flags and recheck.
*/
- push_extra_callee_saves r0
{
PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
- jal do_work_pending
+ bnz r33, 1f
}
- bnz r0, .Lresume_userspace
+ push_extra_callee_saves r0
+ movei r33, 1
+1: jal do_work_pending
+ bnz r0, .Lretry_work_pending
/*
* In the NMI case we
@@ -924,6 +946,13 @@ STD_ENTRY(interrupt_return)
bzt r30, .Lrestore_regs
3:
+ /* We are relying on INT_PERF_COUNT at 33, and AUX_PERF_COUNT at 48 */
+ {
+ moveli r0, lo16(1 << (INT_PERF_COUNT - 32))
+ bz r31, .Lrestore_regs
+ }
+ auli r0, r0, ha16(1 << (INT_AUX_PERF_COUNT - 32))
+ mtspr SPR_INTERRUPT_MASK_RESET_K_1, r0
/*
* We now commit to returning from this interrupt, since we will be
@@ -1149,6 +1178,10 @@ handle_nmi:
PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
}
FEEDBACK_REENTER(handle_nmi)
+ {
+ movei r30, 1
+ seq r31, r0, zero
+ }
j interrupt_return
STD_ENDPROC(handle_nmi)
@@ -1176,15 +1209,20 @@ handle_syscall:
add r20, r20, tp
lw r21, r20
addi r21, r21, 1
- sw r20, r21
+ {
+ sw r20, r21
+ GET_THREAD_INFO(r31)
+ }
/* Trace syscalls, if requested. */
- GET_THREAD_INFO(r31)
addi r31, r31, THREAD_INFO_FLAGS_OFFSET
lw r30, r31
andi r30, r30, _TIF_SYSCALL_TRACE
bzt r30, .Lrestore_syscall_regs
- jal do_syscall_trace
+ {
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ jal do_syscall_trace_enter
+ }
FEEDBACK_REENTER(handle_syscall)
/*
@@ -1235,9 +1273,15 @@ handle_syscall:
lw r30, r31
andi r30, r30, _TIF_SYSCALL_TRACE
bzt r30, 1f
- jal do_syscall_trace
+ {
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ jal do_syscall_trace_exit
+ }
FEEDBACK_REENTER(handle_syscall)
-1: j .Lresume_userspace /* jump into middle of interrupt_return */
+1: {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
.Linvalid_syscall:
/* Report an invalid syscall back to the user program */
@@ -1246,7 +1290,10 @@ handle_syscall:
movei r28, -ENOSYS
}
sw r29, r28
- j .Lresume_userspace /* jump into middle of interrupt_return */
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
STD_ENDPROC(handle_syscall)
/* Return the address for oprofile to suppress in backtraces. */
@@ -1262,9 +1309,27 @@ STD_ENTRY(ret_from_fork)
jal sim_notify_fork
jal schedule_tail
FEEDBACK_REENTER(ret_from_fork)
- j .Lresume_userspace /* jump into middle of interrupt_return */
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
STD_ENDPROC(ret_from_fork)
+STD_ENTRY(ret_from_kernel_thread)
+ jal sim_notify_fork
+ jal schedule_tail
+ FEEDBACK_REENTER(ret_from_fork)
+ {
+ move r0, r31
+ jalr r30
+ }
+ FEEDBACK_REENTER(ret_from_kernel_thread)
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
+ STD_ENDPROC(ret_from_kernel_thread)
+
/*
* Code for ill interrupt.
*/
@@ -1349,7 +1414,10 @@ handle_ill:
3:
/* set PC and continue */
lw r26, r24
- sw r28, r26
+ {
+ sw r28, r26
+ GET_THREAD_INFO(r0)
+ }
/*
* Clear TIF_SINGLESTEP to prevent recursion if we execute an ill.
@@ -1357,7 +1425,6 @@ handle_ill:
* need to clear it here and can't really impose on all other arches.
* So what's another write between friends?
*/
- GET_THREAD_INFO(r0)
addi r1, r0, THREAD_INFO_FLAGS_OFFSET
{
@@ -1371,12 +1438,14 @@ handle_ill:
{
lw r0, r0 /* indirect thru thread_info to get task_info*/
addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */
- move r2, zero /* load error code into r2 */
}
jal send_sigtrap /* issue a SIGTRAP */
FEEDBACK_REENTER(handle_ill)
- j .Lresume_userspace /* jump into middle of interrupt_return */
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
.Ldispatch_normal_ill:
{
@@ -1406,15 +1475,6 @@ STD_ENTRY_LOCAL(bad_intr)
panic "Unhandled interrupt %#x: PC %#lx"
STD_ENDPROC(bad_intr)
-/* Put address of pt_regs in reg and jump. */
-#define PTREGS_SYSCALL(x, reg) \
- STD_ENTRY(_##x); \
- { \
- PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \
- j x \
- }; \
- STD_ENDPROC(_##x)
-
/*
* Special-case sigreturn to not write r0 to the stack on return.
* This is technically more efficient, but it also avoids difficulties
@@ -1430,12 +1490,9 @@ STD_ENTRY_LOCAL(bad_intr)
}; \
STD_ENDPROC(_##x)
-PTREGS_SYSCALL(sys_execve, r3)
-PTREGS_SYSCALL(sys_sigaltstack, r2)
PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0)
-PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1)
-/* Save additional callee-saves to pt_regs, put address in r4 and jump. */
+/* Save additional callee-saves to pt_regs and jump to standard function. */
STD_ENTRY(_sys_clone)
push_extra_callee_saves r4
j sys_clone
@@ -1478,12 +1535,10 @@ STD_ENTRY(_sys_clone)
__HEAD
.align 64
/* Align much later jump on the start of a cache line. */
-#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
nop
#if PAGE_SIZE >= 0x10000
nop
#endif
-#endif
ENTRY(sys_cmpxchg)
/*
@@ -1517,45 +1572,6 @@ ENTRY(sys_cmpxchg)
# error Code here assumes PAGE_OFFSET can be loaded with just hi16()
#endif
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
- {
- /* Check for unaligned input. */
- bnz sp, .Lcmpxchg_badaddr
- mm r25, r0, zero, 3, PAGE_SHIFT-1
- }
- {
- crc32_32 r25, zero, r25
- moveli r21, lo16(atomic_lock_ptr)
- }
- {
- auli r21, r21, ha16(atomic_lock_ptr)
- auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */
- }
- {
- shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT
- slt_u r23, r0, r23
- lw r26, r0 /* see comment in the "#else" for the "lw r26". */
- }
- {
- s2a r21, r20, r21
- bbns r23, .Lcmpxchg_badaddr
- }
- {
- lw r21, r21
- seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64
- andi r25, r25, ATOMIC_HASH_L2_SIZE - 1
- }
- {
- /* Branch away at this point if we're doing a 64-bit cmpxchg. */
- bbs r23, .Lcmpxchg64
- andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */
- }
- {
- s2a ATOMIC_LOCK_REG_NAME, r25, r21
- j .Lcmpxchg32_tns /* see comment in the #else for the jump. */
- }
-
-#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
{
/* Check for unaligned input. */
bnz sp, .Lcmpxchg_badaddr
@@ -1569,7 +1585,7 @@ ENTRY(sys_cmpxchg)
* Because of C pointer arithmetic, we want to compute this:
*
* ((char*)atomic_locks +
- * (((r0 >> 3) & (1 << (ATOMIC_HASH_SIZE - 1))) << 2))
+ * (((r0 >> 3) & ((1 << ATOMIC_HASH_SHIFT) - 1)) << 2))
*
* Instead of two shifts we just ">> 1", and use 'mm'
* to ignore the low and high bits we don't want.
@@ -1580,12 +1596,9 @@ ENTRY(sys_cmpxchg)
/*
* Ensure that the TLB is loaded before we take out the lock.
- * On tilepro, this will start fetching the value all the way
- * into our L1 as well (and if it gets modified before we
- * grab the lock, it will be invalidated from our cache
- * before we reload it). On tile64, we'll start fetching it
- * into our L1 if we're the home, and if we're not, we'll
- * still at least start fetching it into the home's L2.
+ * This will start fetching the value all the way into our L1
+ * as well (and if it gets modified before we grab the lock,
+ * it will be invalidated from our cache before we reload it).
*/
lw r26, r0
}
@@ -1628,8 +1641,6 @@ ENTRY(sys_cmpxchg)
j .Lcmpxchg32_tns
}
-#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/* Symbol for do_page_fault_ics() to use to compare against the PC. */
.global __sys_cmpxchg_grab_lock
__sys_cmpxchg_grab_lock:
@@ -1767,9 +1778,6 @@ __sys_cmpxchg_grab_lock:
.align 64
.Lcmpxchg64:
{
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
- s2a ATOMIC_LOCK_REG_NAME, r25, r21
-#endif
bzt r23, .Lcmpxchg64_tns
}
j .Lcmpxchg_badaddr
@@ -1835,11 +1843,12 @@ int_unalign:
push_extra_callee_saves r0
j do_trap
-/* Include .intrpt1 array of interrupt vectors */
- .section ".intrpt1", "ax"
+/* Include .intrpt array of interrupt vectors */
+ .section ".intrpt", "ax"
-#define op_handle_perf_interrupt bad_intr
-#define op_handle_aux_perf_interrupt bad_intr
+#ifndef CONFIG_USE_PMC
+#define handle_perf_interrupt bad_intr
+#endif
#ifndef CONFIG_HARDWALL
#define do_hardwall_trap bad_intr
@@ -1880,7 +1889,7 @@ int_unalign:
int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr
int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr
int_hand INT_PERF_COUNT, PERF_COUNT, \
- op_handle_perf_interrupt, handle_nmi
+ handle_perf_interrupt, handle_nmi
int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr
#if CONFIG_KERNEL_PL == 2
dc_dispatch INT_INTCTRL_2, INTCTRL_2
@@ -1904,10 +1913,8 @@ int_unalign:
do_page_fault
int_hand INT_SN_CPL, SN_CPL, bad_intr
int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap
-#if CHIP_HAS_AUX_PERF_COUNTERS()
int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \
- op_handle_aux_perf_interrupt, handle_nmi
-#endif
+ handle_perf_interrupt, handle_nmi
/* Synthetic interrupt delivered only by the simulator */
int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 79c93e10ba2..5b67efcecab 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -17,24 +17,33 @@
#include <linux/linkage.h>
#include <linux/errno.h>
#include <linux/unistd.h>
+#include <linux/init.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/irqflags.h>
#include <asm/asm-offsets.h>
#include <asm/types.h>
+#include <asm/traps.h>
+#include <asm/signal.h>
#include <hv/hypervisor.h>
#include <arch/abi.h>
#include <arch/interrupts.h>
#include <arch/spr_def.h>
-#ifdef CONFIG_PREEMPT
-# error "No support for kernel preemption currently"
-#endif
-
#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg)
#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
+#if CONFIG_KERNEL_PL == 1 || CONFIG_KERNEL_PL == 2
+/*
+ * Set "result" non-zero if ex1 holds the PL of the kernel
+ * (with or without ICS being set). Note this works only
+ * because we never find the PL at level 3.
+ */
+# define IS_KERNEL_EX1(result, ex1) andi result, ex1, CONFIG_KERNEL_PL
+#else
+# error Recode IS_KERNEL_EX1 for CONFIG_KERNEL_PL
+#endif
.macro push_reg reg, ptr=sp, delta=-8
{
@@ -97,6 +106,185 @@
}
.endm
+ /*
+ * Unalign data exception fast handling: In order to handle
+ * unaligned data access, a fast JIT version is generated and stored
+ * in a specific area in user space. We first need to do a quick poke
+ * to see if the JIT is available. We use certain bits in the fault
+ * PC (3 to 9 is used for 16KB page size) as index to address the JIT
+ * code area. The first 64bit word is the fault PC, and the 2nd one is
+ * the fault bundle itself. If these 2 words both match, then we
+ * directly "iret" to JIT code. If not, a slow path is invoked to
+ * generate new JIT code. Note: the current JIT code WILL be
+ * overwritten if it existed. So, ideally we can handle 128 unalign
+ * fixups via JIT. For lookup efficiency and to effectively support
+ * tight loops with multiple unaligned reference, a simple
+ * direct-mapped cache is used.
+ *
+ * SPR_EX_CONTEXT_K_0 is modified to return to JIT code.
+ * SPR_EX_CONTEXT_K_1 has ICS set.
+ * SPR_EX_CONTEXT_0_0 is setup to user program's next PC.
+ * SPR_EX_CONTEXT_0_1 = 0.
+ */
+ .macro int_hand_unalign_fast vecnum, vecname
+ .org (\vecnum << 8)
+intvec_\vecname:
+ /* Put r3 in SPR_SYSTEM_SAVE_K_1. */
+ mtspr SPR_SYSTEM_SAVE_K_1, r3
+
+ mfspr r3, SPR_EX_CONTEXT_K_1
+ /*
+ * Examine if exception comes from user without ICS set.
+ * If not, just go directly to the slow path.
+ */
+ bnez r3, hand_unalign_slow_nonuser
+
+ mfspr r3, SPR_SYSTEM_SAVE_K_0
+
+ /* Get &thread_info->unalign_jit_tmp[0] in r3. */
+ bfexts r3, r3, 0, CPU_SHIFT-1
+ mm r3, zero, LOG2_THREAD_SIZE, 63
+ addli r3, r3, THREAD_INFO_UNALIGN_JIT_TMP_OFFSET
+
+ /*
+ * Save r0, r1, r2 into thread_info array r3 points to
+ * from low to high memory in order.
+ */
+ st_add r3, r0, 8
+ st_add r3, r1, 8
+ {
+ st_add r3, r2, 8
+ andi r2, sp, 7
+ }
+
+ /* Save stored r3 value so we can revert it on a page fault. */
+ mfspr r1, SPR_SYSTEM_SAVE_K_1
+ st r3, r1
+
+ {
+ /* Generate a SIGBUS if sp is not 8-byte aligned. */
+ bnez r2, hand_unalign_slow_badsp
+ }
+
+ /*
+ * Get the thread_info in r0; load r1 with pc. Set the low bit of sp
+ * as an indicator to the page fault code in case we fault.
+ */
+ {
+ ori sp, sp, 1
+ mfspr r1, SPR_EX_CONTEXT_K_0
+ }
+
+ /* Add the jit_info offset in thread_info; extract r1 [3:9] into r2. */
+ {
+ addli r0, r3, THREAD_INFO_UNALIGN_JIT_BASE_OFFSET - \
+ (THREAD_INFO_UNALIGN_JIT_TMP_OFFSET + (3 * 8))
+ bfextu r2, r1, 3, (2 + PAGE_SHIFT - UNALIGN_JIT_SHIFT)
+ }
+
+ /* Load the jit_info; multiply r2 by 128. */
+ {
+ ld r0, r0
+ shli r2, r2, UNALIGN_JIT_SHIFT
+ }
+
+ /*
+ * If r0 is NULL, the JIT page is not mapped, so go to slow path;
+ * add offset r2 to r0 at the same time.
+ */
+ {
+ beqz r0, hand_unalign_slow
+ add r2, r0, r2
+ }
+
+ /*
+ * We are loading from userspace (both the JIT info PC and
+ * instruction word, and the instruction word we executed)
+ * and since either could fault while holding the interrupt
+ * critical section, we must tag this region and check it in
+ * do_page_fault() to handle it properly.
+ */
+ENTRY(__start_unalign_asm_code)
+
+ /* Load first word of JIT in r0 and increment r2 by 8. */
+ ld_add r0, r2, 8
+
+ /*
+ * Compare the PC with the 1st word in JIT; load the fault bundle
+ * into r1.
+ */
+ {
+ cmpeq r0, r0, r1
+ ld r1, r1
+ }
+
+ /* Go to slow path if PC doesn't match. */
+ beqz r0, hand_unalign_slow
+
+ /*
+ * Load the 2nd word of JIT, which is supposed to be the fault
+ * bundle for a cache hit. Increment r2; after this bundle r2 will
+ * point to the potential start of the JIT code we want to run.
+ */
+ ld_add r0, r2, 8
+
+ /* No further accesses to userspace are done after this point. */
+ENTRY(__end_unalign_asm_code)
+
+ /* Compare the real bundle with what is saved in the JIT area. */
+ {
+ cmpeq r0, r1, r0
+ mtspr SPR_EX_CONTEXT_0_1, zero
+ }
+
+ /* Go to slow path if the fault bundle does not match. */
+ beqz r0, hand_unalign_slow
+
+ /*
+ * A cache hit is found.
+ * r2 points to start of JIT code (3rd word).
+ * r0 is the fault pc.
+ * r1 is the fault bundle.
+ * Reset the low bit of sp.
+ */
+ {
+ mfspr r0, SPR_EX_CONTEXT_K_0
+ andi sp, sp, ~1
+ }
+
+ /* Write r2 into EX_CONTEXT_K_0 and increment PC. */
+ {
+ mtspr SPR_EX_CONTEXT_K_0, r2
+ addi r0, r0, 8
+ }
+
+ /*
+ * Set ICS on kernel EX_CONTEXT_K_1 in order to "iret" to
+ * user with ICS set. This way, if the JIT fixup causes another
+ * unalign exception (which shouldn't be possible) the user
+ * process will be terminated with SIGBUS. Also, our fixup will
+ * run without interleaving with external interrupts.
+ * Each fixup is at most 14 bundles, so it won't hold ICS for long.
+ */
+ {
+ movei r1, PL_ICS_EX1(USER_PL, 1)
+ mtspr SPR_EX_CONTEXT_0_0, r0
+ }
+
+ {
+ mtspr SPR_EX_CONTEXT_K_1, r1
+ addi r3, r3, -(3 * 8)
+ }
+
+ /* Restore r0..r3. */
+ ld_add r0, r3, 8
+ ld_add r1, r3, 8
+ ld_add r2, r3, 8
+ ld r3, r3
+
+ iret
+ ENDPROC(intvec_\vecname)
+ .endm
#ifdef __COLLECT_LINKER_FEEDBACK__
.pushsection .text.intvec_feedback,"ax"
@@ -117,15 +305,21 @@ intvec_feedback:
* The "processing" argument specifies the code for processing
* the interrupt. Defaults to "handle_interrupt".
*/
- .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
- .org (\vecnum << 8)
+ .macro __int_hand vecnum, vecname, c_routine,processing=handle_interrupt
intvec_\vecname:
/* Temporarily save a register so we have somewhere to work. */
mtspr SPR_SYSTEM_SAVE_K_1, r0
mfspr r0, SPR_EX_CONTEXT_K_1
- andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ /*
+ * The unalign data fastpath code sets the low bit in sp to
+ * force us to reset it here on fault.
+ */
+ {
+ blbs sp, 2f
+ IS_KERNEL_EX1(r0, r0)
+ }
.ifc \vecnum, INT_DOUBLE_FAULT
/*
@@ -175,15 +369,15 @@ intvec_\vecname:
}
.endif
-
+2:
/*
- * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and
- * the current stack top in the higher bits. So we recover
- * our stack top by just masking off the low bits, then
+ * SYSTEM_SAVE_K_0 holds the cpu number in the high bits, and
+ * the current stack top in the lower bits. So we recover
+ * our starting stack value by sign-extending the low bits, then
* point sp at the top aligned address on the actual stack page.
*/
mfspr r0, SPR_SYSTEM_SAVE_K_0
- mm r0, zero, LOG2_THREAD_SIZE, 63
+ bfexts r0, r0, 0, CPU_SHIFT-1
0:
/*
@@ -205,6 +399,9 @@ intvec_\vecname:
* cache line 1: r6...r13
* cache line 0: 2 x frame, r0..r5
*/
+#if STACK_TOP_DELTA != 64
+#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
+#endif
andi r0, r0, -64
/*
@@ -219,7 +416,9 @@ intvec_\vecname:
* This routine saves just the first four registers, plus the
* stack context so we can do proper backtracing right away,
* and defers to handle_interrupt to save the rest.
- * The backtracer needs pc, ex1, lr, sp, r52, and faultnum.
+ * The backtracer needs pc, ex1, lr, sp, r52, and faultnum,
+ * and needs sp set to its final location at the bottom of
+ * the stack frame.
*/
addli r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP)
wh64 r0 /* cache line 7 */
@@ -302,7 +501,7 @@ intvec_\vecname:
mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */
.else
.ifc \vecnum, INT_ILL_TRANS
- mfspr r2, ILL_TRANS_REASON
+ mfspr r2, ILL_VA_PC
.else
.ifc \vecnum, INT_DOUBLE_FAULT
mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */
@@ -310,14 +509,12 @@ intvec_\vecname:
.ifc \c_routine, do_trap
mfspr r2, GPV_REASON
.else
- .ifc \c_routine, op_handle_perf_interrupt
+ .ifc \c_routine, handle_perf_interrupt
mfspr r2, PERF_COUNT_STS
-#if CHIP_HAS_AUX_PERF_COUNTERS()
.else
- .ifc \c_routine, op_handle_aux_perf_interrupt
+ .ifc \c_routine, handle_perf_interrupt
mfspr r2, AUX_PERF_COUNT_STS
.endif
-#endif
.endif
.endif
.endif
@@ -336,7 +533,7 @@ intvec_\vecname:
#ifdef __COLLECT_LINKER_FEEDBACK__
.pushsection .text.intvec_feedback,"ax"
.org (\vecnum << 5)
- FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8)
+ FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8)
jrp lr
.popsection
#endif
@@ -449,31 +646,15 @@ intvec_\vecname:
push_reg r5, r52
st r52, r4
- /* Load tp with our per-cpu offset. */
-#ifdef CONFIG_SMP
- {
- mfspr r20, SPR_SYSTEM_SAVE_K_0
- moveli r21, hw2_last(__per_cpu_offset)
- }
- {
- shl16insli r21, r21, hw1(__per_cpu_offset)
- bfextu r20, r20, 0, LOG2_THREAD_SIZE-1
- }
- shl16insli r21, r21, hw0(__per_cpu_offset)
- shl3add r20, r20, r21
- ld tp, r20
-#else
- move tp, zero
-#endif
-
/*
* If we will be returning to the kernel, we will need to
* reset the interrupt masks to the state they had before.
- * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled.
+ * Set DISABLE_IRQ in flags iff we came from kernel pl with
+ * irqs disabled.
*/
mfspr r32, SPR_EX_CONTEXT_K_1
{
- andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ IS_KERNEL_EX1(r22, r22)
PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS)
}
beqzt r32, 1f /* zero if from user space */
@@ -488,6 +669,44 @@ intvec_\vecname:
.endif
st r21, r32
+ /*
+ * we've captured enough state to the stack (including in
+ * particular our EX_CONTEXT state) that we can now release
+ * the interrupt critical section and replace it with our
+ * standard "interrupts disabled" mask value. This allows
+ * synchronous interrupts (and profile interrupts) to punch
+ * through from this point onwards.
+ *
+ * It's important that no code before this point touch memory
+ * other than our own stack (to keep the invariant that this
+ * is all that gets touched under ICS), and that no code after
+ * this point reference any interrupt-specific SPR, in particular
+ * the EX_CONTEXT_K_ values.
+ */
+ .ifc \function,handle_nmi
+ IRQ_DISABLE_ALL(r20)
+ .else
+ IRQ_DISABLE(r20, r21)
+ .endif
+ mtspr INTERRUPT_CRITICAL_SECTION, zero
+
+ /* Load tp with our per-cpu offset. */
+#ifdef CONFIG_SMP
+ {
+ mfspr r20, SPR_SYSTEM_SAVE_K_0
+ moveli r21, hw2_last(__per_cpu_offset)
+ }
+ {
+ shl16insli r21, r21, hw1(__per_cpu_offset)
+ bfextu r20, r20, CPU_SHIFT, 63
+ }
+ shl16insli r21, r21, hw0(__per_cpu_offset)
+ shl3add r20, r20, r21
+ ld tp, r20
+#else
+ move tp, zero
+#endif
+
#ifdef __COLLECT_LINKER_FEEDBACK__
/*
* Notify the feedback routines that we were in the
@@ -512,21 +731,6 @@ intvec_\vecname:
#endif
/*
- * we've captured enough state to the stack (including in
- * particular our EX_CONTEXT state) that we can now release
- * the interrupt critical section and replace it with our
- * standard "interrupts disabled" mask value. This allows
- * synchronous interrupts (and profile interrupts) to punch
- * through from this point onwards.
- */
- .ifc \function,handle_nmi
- IRQ_DISABLE_ALL(r20)
- .else
- IRQ_DISABLE(r20, r21)
- .endif
- mtspr INTERRUPT_CRITICAL_SECTION, zero
-
- /*
* Prepare the first 256 stack bytes to be rapidly accessible
* without having to fetch the background data.
*/
@@ -576,7 +780,7 @@ intvec_\vecname:
.macro dc_dispatch vecnum, vecname
.org (\vecnum << 8)
intvec_\vecname:
- j hv_downcall_dispatch
+ j _hv_downcall_dispatch
ENDPROC(intvec_\vecname)
.endm
@@ -605,6 +809,10 @@ handle_interrupt:
* This routine takes a boolean in r30 indicating if this is an NMI.
* If so, we also expect a boolean in r31 indicating whether to
* re-enable the oprofile interrupts.
+ *
+ * Note that .Lresume_userspace is jumped to directly in several
+ * places, and we need to make sure r30 is set correctly in those
+ * callers as well.
*/
STD_ENTRY(interrupt_return)
/* If we're resuming to kernel space, don't check thread flags. */
@@ -613,14 +821,39 @@ STD_ENTRY(interrupt_return)
PTREGS_PTR(r29, PTREGS_OFFSET_EX1)
}
ld r29, r29
- andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ IS_KERNEL_EX1(r29, r29)
{
beqzt r29, .Lresume_userspace
- PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+ move r29, sp
}
+#ifdef CONFIG_PREEMPT
+ /* Returning to kernel space. Check if we need preemption. */
+ EXTRACT_THREAD_INFO(r29)
+ addli r28, r29, THREAD_INFO_FLAGS_OFFSET
+ {
+ ld r28, r28
+ addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET
+ }
+ {
+ andi r28, r28, _TIF_NEED_RESCHED
+ ld4s r29, r29
+ }
+ beqzt r28, 1f
+ bnez r29, 1f
+ /* Disable interrupts explicitly for preemption. */
+ IRQ_DISABLE(r20,r21)
+ TRACE_IRQS_OFF
+ jal preempt_schedule_irq
+ FEEDBACK_REENTER(interrupt_return)
+1:
+#endif
+
/* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */
- moveli r27, hw2_last(_cpu_idle_nap)
+ {
+ moveli r27, hw2_last(_cpu_idle_nap)
+ PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+ }
{
ld r28, r29
shl16insli r27, r27, hw1(_cpu_idle_nap)
@@ -642,6 +875,20 @@ STD_ENTRY(interrupt_return)
FEEDBACK_REENTER(interrupt_return)
/*
+ * Use r33 to hold whether we have already loaded the callee-saves
+ * into ptregs. We don't want to do it twice in this loop, since
+ * then we'd clobber whatever changes are made by ptrace, etc.
+ */
+ {
+ movei r33, 0
+ move r32, sp
+ }
+
+ /* Get base of stack in r32. */
+ EXTRACT_THREAD_INFO(r32)
+
+.Lretry_work_pending:
+ /*
* Disable interrupts so as to make sure we don't
* miss an interrupt that sets any of the thread flags (like
* need_resched or sigpending) between sampling and the iret.
@@ -651,9 +898,6 @@ STD_ENTRY(interrupt_return)
IRQ_DISABLE(r20, r21)
TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */
- /* Get base of stack in r32; note r30/31 are used as arguments here. */
- GET_THREAD_INFO(r32)
-
/* Check to see if there is any work to do before returning to user. */
{
@@ -669,16 +913,18 @@ STD_ENTRY(interrupt_return)
/*
* Make sure we have all the registers saved for signal
- * handling or single-step. Call out to C code to figure out
+ * handling or notify-resume. Call out to C code to figure out
* exactly what we need to do for each flag bit, then if
* necessary, reload the flags and recheck.
*/
- push_extra_callee_saves r0
{
PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
- jal do_work_pending
+ bnez r33, 1f
}
- bnez r0, .Lresume_userspace
+ push_extra_callee_saves r0
+ movei r33, 1
+1: jal do_work_pending
+ bnez r0, .Lretry_work_pending
/*
* In the NMI case we
@@ -702,7 +948,7 @@ STD_ENTRY(interrupt_return)
PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS)
}
{
- andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK
+ IS_KERNEL_EX1(r0, r0)
ld r32, r32
}
bnez r0, 1f
@@ -718,12 +964,22 @@ STD_ENTRY(interrupt_return)
beqzt r30, .Lrestore_regs
j 3f
2: TRACE_IRQS_ON
+ IRQ_ENABLE_LOAD(r20, r21)
movei r0, 1
mtspr INTERRUPT_CRITICAL_SECTION, r0
- IRQ_ENABLE(r20, r21)
+ IRQ_ENABLE_APPLY(r20, r21)
beqzt r30, .Lrestore_regs
3:
+#if INT_PERF_COUNT + 1 != INT_AUX_PERF_COUNT
+# error Bad interrupt assumption
+#endif
+ {
+ movei r0, 3 /* two adjacent bits for the PERF_COUNT mask */
+ beqz r31, .Lrestore_regs
+ }
+ shli r0, r0, INT_PERF_COUNT
+ mtspr SPR_INTERRUPT_MASK_RESET_K, r0
/*
* We now commit to returning from this interrupt, since we will be
@@ -737,7 +993,6 @@ STD_ENTRY(interrupt_return)
* that will save some cycles if this turns out to be a syscall.
*/
.Lrestore_regs:
- FEEDBACK_REENTER(interrupt_return) /* called from elsewhere */
/*
* Rotate so we have one high bit and one low bit to test.
@@ -773,7 +1028,7 @@ STD_ENTRY(interrupt_return)
pop_reg r21, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_PC
{
mtspr SPR_EX_CONTEXT_K_1, lr
- andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ IS_KERNEL_EX1(lr, lr)
}
{
mtspr SPR_EX_CONTEXT_K_0, r21
@@ -941,7 +1196,7 @@ handle_nmi:
FEEDBACK_REENTER(handle_nmi)
{
movei r30, 1
- move r31, r0
+ cmpeq r31, r0, zero
}
j interrupt_return
STD_ENDPROC(handle_nmi)
@@ -963,19 +1218,30 @@ handle_syscall:
shl16insli r20, r20, hw0(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET)
add r20, r20, tp
ld4s r21, r20
- addi r21, r21, 1
- st4 r20, r21
+ {
+ addi r21, r21, 1
+ move r31, sp
+ }
+ {
+ st4 r20, r21
+ EXTRACT_THREAD_INFO(r31)
+ }
/* Trace syscalls, if requested. */
- GET_THREAD_INFO(r31)
addi r31, r31, THREAD_INFO_FLAGS_OFFSET
- ld r30, r31
- andi r30, r30, _TIF_SYSCALL_TRACE
+ {
+ ld r30, r31
+ moveli r32, _TIF_SYSCALL_ENTRY_WORK
+ }
+ and r30, r30, r32
{
addi r30, r31, THREAD_INFO_STATUS_OFFSET - THREAD_INFO_FLAGS_OFFSET
beqzt r30, .Lrestore_syscall_regs
}
- jal do_syscall_trace
+ {
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ jal do_syscall_trace_enter
+ }
FEEDBACK_REENTER(handle_syscall)
/*
@@ -1004,7 +1270,9 @@ handle_syscall:
/* Ensure that the syscall number is within the legal range. */
{
moveli r20, hw2(sys_call_table)
+#ifdef CONFIG_COMPAT
blbs r30, .Lcompat_syscall
+#endif
}
{
cmpltu r21, TREG_SYSCALL_NR_NAME, r21
@@ -1038,13 +1306,37 @@ handle_syscall:
FEEDBACK_REENTER(handle_syscall)
/* Do syscall trace again, if requested. */
- ld r30, r31
- andi r30, r30, _TIF_SYSCALL_TRACE
- beqzt r30, 1f
- jal do_syscall_trace
+ {
+ ld r30, r31
+ moveli r32, _TIF_SYSCALL_EXIT_WORK
+ }
+ and r0, r30, r32
+ {
+ andi r0, r30, _TIF_SINGLESTEP
+ beqzt r0, 1f
+ }
+ {
+ PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
+ jal do_syscall_trace_exit
+ }
FEEDBACK_REENTER(handle_syscall)
-1: j .Lresume_userspace /* jump into middle of interrupt_return */
+ andi r0, r30, _TIF_SINGLESTEP
+
+1: beqzt r0, 2f
+
+ /* Single stepping -- notify ptrace. */
+ {
+ movei r0, SIGTRAP
+ jal ptrace_notify
+ }
+ FEEDBACK_REENTER(handle_syscall)
+
+2: {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
+#ifdef CONFIG_COMPAT
.Lcompat_syscall:
/*
* Load the base of the compat syscall table in r20, and
@@ -1069,6 +1361,7 @@ handle_syscall:
{ move r15, r4; addxi r4, r4, 0 }
{ move r16, r5; addxi r5, r5, 0 }
j .Lload_syscall_pointer
+#endif
.Linvalid_syscall:
/* Report an invalid syscall back to the user program */
@@ -1077,7 +1370,10 @@ handle_syscall:
movei r28, -ENOSYS
}
st r29, r28
- j .Lresume_userspace /* jump into middle of interrupt_return */
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
STD_ENDPROC(handle_syscall)
/* Return the address for oprofile to suppress in backtraces. */
@@ -1093,9 +1389,27 @@ STD_ENTRY(ret_from_fork)
jal sim_notify_fork
jal schedule_tail
FEEDBACK_REENTER(ret_from_fork)
- j .Lresume_userspace
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
STD_ENDPROC(ret_from_fork)
+STD_ENTRY(ret_from_kernel_thread)
+ jal sim_notify_fork
+ jal schedule_tail
+ FEEDBACK_REENTER(ret_from_fork)
+ {
+ move r0, r31
+ jalr r30
+ }
+ FEEDBACK_REENTER(ret_from_kernel_thread)
+ {
+ movei r30, 0 /* not an NMI */
+ j .Lresume_userspace /* jump into middle of interrupt_return */
+ }
+ STD_ENDPROC(ret_from_kernel_thread)
+
/* Various stub interrupt handlers and syscall handlers */
STD_ENTRY_LOCAL(_kernel_double_fault)
@@ -1112,15 +1426,6 @@ STD_ENTRY_LOCAL(bad_intr)
panic "Unhandled interrupt %#x: PC %#lx"
STD_ENDPROC(bad_intr)
-/* Put address of pt_regs in reg and jump. */
-#define PTREGS_SYSCALL(x, reg) \
- STD_ENTRY(_##x); \
- { \
- PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \
- j x \
- }; \
- STD_ENDPROC(_##x)
-
/*
* Special-case sigreturn to not write r0 to the stack on return.
* This is technically more efficient, but it also avoids difficulties
@@ -1136,37 +1441,74 @@ STD_ENTRY_LOCAL(bad_intr)
}; \
STD_ENDPROC(_##x)
-PTREGS_SYSCALL(sys_execve, r3)
-PTREGS_SYSCALL(sys_sigaltstack, r2)
PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0)
#ifdef CONFIG_COMPAT
-PTREGS_SYSCALL(compat_sys_execve, r3)
-PTREGS_SYSCALL(compat_sys_sigaltstack, r2)
PTREGS_SYSCALL_SIGRETURN(compat_sys_rt_sigreturn, r0)
#endif
-/* Save additional callee-saves to pt_regs, put address in r4 and jump. */
+/* Save additional callee-saves to pt_regs and jump to standard function. */
STD_ENTRY(_sys_clone)
push_extra_callee_saves r4
j sys_clone
STD_ENDPROC(_sys_clone)
-/* The single-step support may need to read all the registers. */
+ /*
+ * Recover r3, r2, r1 and r0 here saved by unalign fast vector.
+ * The vector area limit is 32 bundles, so we handle the reload here.
+ * r0, r1, r2 are in thread_info from low to high memory in order.
+ * r3 points to location the original r3 was saved.
+ * We put this code in the __HEAD section so it can be reached
+ * via a conditional branch from the fast path.
+ */
+ __HEAD
+hand_unalign_slow:
+ andi sp, sp, ~1
+hand_unalign_slow_badsp:
+ addi r3, r3, -(3 * 8)
+ ld_add r0, r3, 8
+ ld_add r1, r3, 8
+ ld r2, r3
+hand_unalign_slow_nonuser:
+ mfspr r3, SPR_SYSTEM_SAVE_K_1
+ __int_hand INT_UNALIGN_DATA, UNALIGN_DATA_SLOW, int_unalign
+
+/* The unaligned data support needs to read all the registers. */
int_unalign:
push_extra_callee_saves r0
- j do_trap
+ j do_unaligned
+ENDPROC(hand_unalign_slow)
-/* Include .intrpt1 array of interrupt vectors */
- .section ".intrpt1", "ax"
+/* Fill the return address stack with nonzero entries. */
+STD_ENTRY(fill_ra_stack)
+ {
+ move r0, lr
+ jal 1f
+ }
+1: jal 2f
+2: jal 3f
+3: jal 4f
+4: jrp r0
+ STD_ENDPROC(fill_ra_stack)
-#define op_handle_perf_interrupt bad_intr
-#define op_handle_aux_perf_interrupt bad_intr
+ .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
+ .org (\vecnum << 8)
+ __int_hand \vecnum, \vecname, \c_routine, \processing
+ .endm
+
+/* Include .intrpt array of interrupt vectors */
+ .section ".intrpt", "ax"
+ .global intrpt_start
+intrpt_start:
+
+#ifndef CONFIG_USE_PMC
+#define handle_perf_interrupt bad_intr
+#endif
#ifndef CONFIG_HARDWALL
#define do_hardwall_trap bad_intr
#endif
- int_hand INT_MEM_ERROR, MEM_ERROR, bad_intr
+ int_hand INT_MEM_ERROR, MEM_ERROR, do_trap
int_hand INT_SINGLE_STEP_3, SINGLE_STEP_3, bad_intr
#if CONFIG_KERNEL_PL == 2
int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, gx_singlestep_handle
@@ -1188,10 +1530,10 @@ int_unalign:
int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall
int_hand INT_SWINT_0, SWINT_0, do_trap
int_hand INT_ILL_TRANS, ILL_TRANS, do_trap
- int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign
+ int_hand_unalign_fast INT_UNALIGN_DATA, UNALIGN_DATA
int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault
int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault
- int_hand INT_IDN_FIREWALL, IDN_FIREWALL, bad_intr
+ int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap
int_hand INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap
int_hand INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt
int_hand INT_IDN_TIMER, IDN_TIMER, bad_intr
@@ -1208,9 +1550,9 @@ int_unalign:
#endif
int_hand INT_IPI_0, IPI_0, bad_intr
int_hand INT_PERF_COUNT, PERF_COUNT, \
- op_handle_perf_interrupt, handle_nmi
+ handle_perf_interrupt, handle_nmi
int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \
- op_handle_perf_interrupt, handle_nmi
+ handle_perf_interrupt, handle_nmi
int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr
#if CONFIG_KERNEL_PL == 2
dc_dispatch INT_INTCTRL_2, INTCTRL_2
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
index 02e62806501..637f2ffaa5f 100644
--- a/arch/tile/kernel/irq.c
+++ b/arch/tile/kernel/irq.c
@@ -21,6 +21,7 @@
#include <hv/drv_pcie_rc_intf.h>
#include <arch/spr_def.h>
#include <asm/traps.h>
+#include <linux/perf_event.h>
/* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */
#define IS_HW_CLEARED 1
@@ -53,12 +54,6 @@ static DEFINE_PER_CPU(unsigned long, irq_disable_mask)
*/
static DEFINE_PER_CPU(int, irq_depth);
-/* State for allocating IRQs on Gx. */
-#if CHIP_HAS_IPI()
-static unsigned long available_irqs = ~(1UL << IRQ_RESCHEDULE);
-static DEFINE_SPINLOCK(available_irqs_lock);
-#endif
-
#if CHIP_HAS_IPI()
/* Use SPRs to manipulate device interrupts. */
#define mask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_SET_K, irq_mask)
@@ -73,7 +68,8 @@ static DEFINE_SPINLOCK(available_irqs_lock);
/*
* The interrupt handling path, implemented in terms of HV interrupt
- * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx.
+ * emulation on TILEPro, and IPI hardware on TILE-Gx.
+ * Entered with interrupts disabled.
*/
void tile_dev_intr(struct pt_regs *regs, int intnum)
{
@@ -220,7 +216,7 @@ void __init init_IRQ(void)
ipi_init();
}
-void __cpuinit setup_irq_regs(void)
+void setup_irq_regs(void)
{
/* Enable interrupt delivery. */
unmask_irqs(~0UL);
@@ -233,7 +229,7 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type)
{
/*
* We use handle_level_irq() by default because the pending
- * interrupt vector (whether modeled by the HV on TILE64 and
+ * interrupt vector (whether modeled by the HV on
* TILEPro or implemented in hardware on TILE-Gx) has
* level-style semantics for each bit. An interrupt fires
* whenever a bit is high, not just at edges.
@@ -259,37 +255,27 @@ void ack_bad_irq(unsigned int irq)
}
/*
- * Generic, controller-independent functions:
+ * /proc/interrupts printing:
*/
-
-#if CHIP_HAS_IPI()
-int create_irq(void)
+int arch_show_interrupts(struct seq_file *p, int prec)
{
- unsigned long flags;
- int result;
-
- spin_lock_irqsave(&available_irqs_lock, flags);
- if (available_irqs == 0)
- result = -ENOMEM;
- else {
- result = __ffs(available_irqs);
- available_irqs &= ~(1UL << result);
- dynamic_irq_init(result);
- }
- spin_unlock_irqrestore(&available_irqs_lock, flags);
+#ifdef CONFIG_PERF_EVENTS
+ int i;
- return result;
+ seq_printf(p, "%*s: ", prec, "PMI");
+
+ for_each_online_cpu(i)
+ seq_printf(p, "%10llu ", per_cpu(perf_irqs, i));
+ seq_puts(p, " perf_events\n");
+#endif
+ return 0;
}
-EXPORT_SYMBOL(create_irq);
-void destroy_irq(unsigned int irq)
+#if CHIP_HAS_IPI()
+int arch_setup_hwirq(unsigned int irq, int node)
{
- unsigned long flags;
-
- spin_lock_irqsave(&available_irqs_lock, flags);
- available_irqs |= (1UL << irq);
- dynamic_irq_cleanup(irq);
- spin_unlock_irqrestore(&available_irqs_lock, flags);
+ return irq >= NR_IRQS ? -EINVAL : 0;
}
-EXPORT_SYMBOL(destroy_irq);
+
+void arch_teardown_hwirq(unsigned int irq) { }
#endif
diff --git a/arch/tile/kernel/kgdb.c b/arch/tile/kernel/kgdb.c
new file mode 100644
index 00000000000..4cd88381a83
--- /dev/null
+++ b/arch/tile/kernel/kgdb.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE-Gx KGDB support.
+ */
+
+#include <linux/ptrace.h>
+#include <linux/kgdb.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+
+static tile_bundle_bits singlestep_insn = TILEGX_BPT_BUNDLE | DIE_SSTEPBP;
+static unsigned long stepped_addr;
+static tile_bundle_bits stepped_instr;
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+ { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0])},
+ { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1])},
+ { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2])},
+ { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3])},
+ { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4])},
+ { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5])},
+ { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6])},
+ { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7])},
+ { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8])},
+ { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9])},
+ { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10])},
+ { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11])},
+ { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12])},
+ { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13])},
+ { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14])},
+ { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15])},
+ { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16])},
+ { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17])},
+ { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18])},
+ { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19])},
+ { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20])},
+ { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21])},
+ { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22])},
+ { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23])},
+ { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24])},
+ { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25])},
+ { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26])},
+ { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27])},
+ { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28])},
+ { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29])},
+ { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30])},
+ { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31])},
+ { "r32", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[32])},
+ { "r33", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[33])},
+ { "r34", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[34])},
+ { "r35", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[35])},
+ { "r36", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[36])},
+ { "r37", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[37])},
+ { "r38", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[38])},
+ { "r39", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[39])},
+ { "r40", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[40])},
+ { "r41", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[41])},
+ { "r42", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[42])},
+ { "r43", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[43])},
+ { "r44", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[44])},
+ { "r45", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[45])},
+ { "r46", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[46])},
+ { "r47", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[47])},
+ { "r48", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[48])},
+ { "r49", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[49])},
+ { "r50", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[50])},
+ { "r51", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[51])},
+ { "r52", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[52])},
+ { "tp", GDB_SIZEOF_REG, offsetof(struct pt_regs, tp)},
+ { "sp", GDB_SIZEOF_REG, offsetof(struct pt_regs, sp)},
+ { "lr", GDB_SIZEOF_REG, offsetof(struct pt_regs, lr)},
+ { "sn", GDB_SIZEOF_REG, -1},
+ { "idn0", GDB_SIZEOF_REG, -1},
+ { "idn1", GDB_SIZEOF_REG, -1},
+ { "udn0", GDB_SIZEOF_REG, -1},
+ { "udn1", GDB_SIZEOF_REG, -1},
+ { "udn2", GDB_SIZEOF_REG, -1},
+ { "udn3", GDB_SIZEOF_REG, -1},
+ { "zero", GDB_SIZEOF_REG, -1},
+ { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, pc)},
+ { "faultnum", GDB_SIZEOF_REG, offsetof(struct pt_regs, faultnum)},
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return NULL;
+
+ if (dbg_reg_def[regno].offset != -1)
+ memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+ dbg_reg_def[regno].size);
+ else
+ memset(mem, 0, dbg_reg_def[regno].size);
+ return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return -EINVAL;
+
+ if (dbg_reg_def[regno].offset != -1)
+ memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+ dbg_reg_def[regno].size);
+ return 0;
+}
+
+/*
+ * Similar to pt_regs_to_gdb_regs() except that process is sleeping and so
+ * we may not be able to get all the info.
+ */
+void
+sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
+{
+ int reg;
+ struct pt_regs *thread_regs;
+ unsigned long *ptr = gdb_regs;
+
+ if (task == NULL)
+ return;
+
+ /* Initialize to zero. */
+ memset(gdb_regs, 0, NUMREGBYTES);
+
+ thread_regs = task_pt_regs(task);
+ for (reg = 0; reg <= TREG_LAST_GPR; reg++)
+ *(ptr++) = thread_regs->regs[reg];
+
+ gdb_regs[TILEGX_PC_REGNUM] = thread_regs->pc;
+ gdb_regs[TILEGX_FAULTNUM_REGNUM] = thread_regs->faultnum;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+ regs->pc = pc;
+}
+
+static void kgdb_call_nmi_hook(void *ignored)
+{
+ kgdb_nmicallback(raw_smp_processor_id(), NULL);
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+ local_irq_enable();
+ smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+ local_irq_disable();
+}
+
+/*
+ * Convert a kernel address to the writable kernel text mapping.
+ */
+static unsigned long writable_address(unsigned long addr)
+{
+ unsigned long ret = 0;
+
+ if (core_kernel_text(addr))
+ ret = addr - MEM_SV_START + PAGE_OFFSET;
+ else if (is_module_text_address(addr))
+ ret = addr;
+ else
+ pr_err("Unknown virtual address 0x%lx\n", addr);
+
+ return ret;
+}
+
+/*
+ * Calculate the new address for after a step.
+ */
+static unsigned long get_step_address(struct pt_regs *regs)
+{
+ int src_reg;
+ int jump_off;
+ int br_off;
+ unsigned long addr;
+ unsigned int opcode;
+ tile_bundle_bits bundle;
+
+ /* Move to the next instruction by default. */
+ addr = regs->pc + TILEGX_BUNDLE_SIZE_IN_BYTES;
+ bundle = *(unsigned long *)instruction_pointer(regs);
+
+ /* 0: X mode, Otherwise: Y mode. */
+ if (bundle & TILEGX_BUNDLE_MODE_MASK) {
+ if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
+ get_RRROpcodeExtension_Y1(bundle) ==
+ UNARY_RRR_1_OPCODE_Y1) {
+ opcode = get_UnaryOpcodeExtension_Y1(bundle);
+
+ switch (opcode) {
+ case JALR_UNARY_OPCODE_Y1:
+ case JALRP_UNARY_OPCODE_Y1:
+ case JR_UNARY_OPCODE_Y1:
+ case JRP_UNARY_OPCODE_Y1:
+ src_reg = get_SrcA_Y1(bundle);
+ dbg_get_reg(src_reg, &addr, regs);
+ break;
+ }
+ }
+ } else if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
+ if (get_RRROpcodeExtension_X1(bundle) ==
+ UNARY_RRR_0_OPCODE_X1) {
+ opcode = get_UnaryOpcodeExtension_X1(bundle);
+
+ switch (opcode) {
+ case JALR_UNARY_OPCODE_X1:
+ case JALRP_UNARY_OPCODE_X1:
+ case JR_UNARY_OPCODE_X1:
+ case JRP_UNARY_OPCODE_X1:
+ src_reg = get_SrcA_X1(bundle);
+ dbg_get_reg(src_reg, &addr, regs);
+ break;
+ }
+ }
+ } else if (get_Opcode_X1(bundle) == JUMP_OPCODE_X1) {
+ opcode = get_JumpOpcodeExtension_X1(bundle);
+
+ switch (opcode) {
+ case JAL_JUMP_OPCODE_X1:
+ case J_JUMP_OPCODE_X1:
+ jump_off = sign_extend(get_JumpOff_X1(bundle), 27);
+ addr = regs->pc +
+ (jump_off << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES);
+ break;
+ }
+ } else if (get_Opcode_X1(bundle) == BRANCH_OPCODE_X1) {
+ br_off = 0;
+ opcode = get_BrType_X1(bundle);
+
+ switch (opcode) {
+ case BEQZT_BRANCH_OPCODE_X1:
+ case BEQZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) == 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BGEZT_BRANCH_OPCODE_X1:
+ case BGEZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) >= 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BGTZT_BRANCH_OPCODE_X1:
+ case BGTZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) > 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BLBCT_BRANCH_OPCODE_X1:
+ case BLBC_BRANCH_OPCODE_X1:
+ if (!(get_SrcA_X1(bundle) & 1))
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BLBST_BRANCH_OPCODE_X1:
+ case BLBS_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) & 1)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BLEZT_BRANCH_OPCODE_X1:
+ case BLEZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) <= 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BLTZT_BRANCH_OPCODE_X1:
+ case BLTZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) < 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BNEZT_BRANCH_OPCODE_X1:
+ case BNEZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) != 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ }
+
+ if (br_off != 0) {
+ br_off = sign_extend(br_off, 17);
+ addr = regs->pc +
+ (br_off << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES);
+ }
+ }
+
+ return addr;
+}
+
+/*
+ * Replace the next instruction after the current instruction with a
+ * breakpoint instruction.
+ */
+static void do_single_step(struct pt_regs *regs)
+{
+ unsigned long addr_wr;
+
+ /* Determine where the target instruction will send us to. */
+ stepped_addr = get_step_address(regs);
+ probe_kernel_read((char *)&stepped_instr, (char *)stepped_addr,
+ BREAK_INSTR_SIZE);
+
+ addr_wr = writable_address(stepped_addr);
+ probe_kernel_write((char *)addr_wr, (char *)&singlestep_insn,
+ BREAK_INSTR_SIZE);
+ smp_wmb();
+ flush_icache_range(stepped_addr, stepped_addr + BREAK_INSTR_SIZE);
+}
+
+static void undo_single_step(struct pt_regs *regs)
+{
+ unsigned long addr_wr;
+
+ if (stepped_instr == 0)
+ return;
+
+ addr_wr = writable_address(stepped_addr);
+ probe_kernel_write((char *)addr_wr, (char *)&stepped_instr,
+ BREAK_INSTR_SIZE);
+ stepped_instr = 0;
+ smp_wmb();
+ flush_icache_range(stepped_addr, stepped_addr + BREAK_INSTR_SIZE);
+}
+
+/*
+ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
+ * then try to fall into the debugger.
+ */
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+ int ret;
+ unsigned long flags;
+ struct die_args *args = (struct die_args *)ptr;
+ struct pt_regs *regs = args->regs;
+
+#ifdef CONFIG_KPROBES
+ /*
+ * Return immediately if the kprobes fault notifier has set
+ * DIE_PAGE_FAULT.
+ */
+ if (cmd == DIE_PAGE_FAULT)
+ return NOTIFY_DONE;
+#endif /* CONFIG_KPROBES */
+
+ switch (cmd) {
+ case DIE_BREAK:
+ case DIE_COMPILED_BPT:
+ break;
+ case DIE_SSTEPBP:
+ local_irq_save(flags);
+ kgdb_handle_exception(0, SIGTRAP, 0, regs);
+ local_irq_restore(flags);
+ return NOTIFY_STOP;
+ default:
+ /* Userspace events, ignore. */
+ if (user_mode(regs))
+ return NOTIFY_DONE;
+ }
+
+ local_irq_save(flags);
+ ret = kgdb_handle_exception(args->trapnr, args->signr, args->err, regs);
+ local_irq_restore(flags);
+ if (ret)
+ return NOTIFY_DONE;
+
+ return NOTIFY_STOP;
+}
+
+static struct notifier_block kgdb_notifier = {
+ .notifier_call = kgdb_notify,
+};
+
+/*
+ * kgdb_arch_handle_exception - Handle architecture specific GDB packets.
+ * @vector: The error vector of the exception that happened.
+ * @signo: The signal number of the exception that happened.
+ * @err_code: The error code of the exception that happened.
+ * @remcom_in_buffer: The buffer of the packet we have read.
+ * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into.
+ * @regs: The &struct pt_regs of the current process.
+ *
+ * This function MUST handle the 'c' and 's' command packets,
+ * as well packets to set / remove a hardware breakpoint, if used.
+ * If there are additional packets which the hardware needs to handle,
+ * they are handled here. The code should return -1 if it wants to
+ * process more packets, and a %0 or %1 if it wants to exit from the
+ * kgdb callback.
+ */
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+ char *remcom_in_buffer, char *remcom_out_buffer,
+ struct pt_regs *regs)
+{
+ char *ptr;
+ unsigned long address;
+
+ /* Undo any stepping we may have done. */
+ undo_single_step(regs);
+
+ switch (remcom_in_buffer[0]) {
+ case 'c':
+ case 's':
+ case 'D':
+ case 'k':
+ /*
+ * Try to read optional parameter, pc unchanged if no parm.
+ * If this was a compiled-in breakpoint, we need to move
+ * to the next instruction or we will just breakpoint
+ * over and over again.
+ */
+ ptr = &remcom_in_buffer[1];
+ if (kgdb_hex2long(&ptr, &address))
+ regs->pc = address;
+ else if (*(unsigned long *)regs->pc == compiled_bpt)
+ regs->pc += BREAK_INSTR_SIZE;
+
+ if (remcom_in_buffer[0] == 's') {
+ do_single_step(regs);
+ kgdb_single_step = 1;
+ atomic_set(&kgdb_cpu_doing_single_step,
+ raw_smp_processor_id());
+ } else
+ atomic_set(&kgdb_cpu_doing_single_step, -1);
+
+ return 0;
+ }
+
+ return -1; /* this means that we do not want to exit from the handler */
+}
+
+struct kgdb_arch arch_kgdb_ops;
+
+/*
+ * kgdb_arch_init - Perform any architecture specific initalization.
+ *
+ * This function will handle the initalization of any architecture
+ * specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+ tile_bundle_bits bundle = TILEGX_BPT_BUNDLE;
+
+ memcpy(arch_kgdb_ops.gdb_bpt_instr, &bundle, BREAK_INSTR_SIZE);
+ return register_die_notifier(&kgdb_notifier);
+}
+
+/*
+ * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ *
+ * This function will handle the uninitalization of any architecture
+ * specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+ unregister_die_notifier(&kgdb_notifier);
+}
+
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ unsigned long addr_wr = writable_address(bpt->bpt_addr);
+
+ if (addr_wr == 0)
+ return -1;
+
+ err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+ BREAK_INSTR_SIZE);
+ if (err)
+ return err;
+
+ err = probe_kernel_write((char *)addr_wr, arch_kgdb_ops.gdb_bpt_instr,
+ BREAK_INSTR_SIZE);
+ smp_wmb();
+ flush_icache_range((unsigned long)bpt->bpt_addr,
+ (unsigned long)bpt->bpt_addr + BREAK_INSTR_SIZE);
+ return err;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ unsigned long addr_wr = writable_address(bpt->bpt_addr);
+
+ if (addr_wr == 0)
+ return -1;
+
+ err = probe_kernel_write((char *)addr_wr, (char *)bpt->saved_instr,
+ BREAK_INSTR_SIZE);
+ smp_wmb();
+ flush_icache_range((unsigned long)bpt->bpt_addr,
+ (unsigned long)bpt->bpt_addr + BREAK_INSTR_SIZE);
+ return err;
+}
diff --git a/arch/tile/kernel/kprobes.c b/arch/tile/kernel/kprobes.c
new file mode 100644
index 00000000000..27cdcacbe81
--- /dev/null
+++ b/arch/tile/kernel/kprobes.c
@@ -0,0 +1,528 @@
+/*
+ * arch/tile/kernel/kprobes.c
+ * Kprobes on TILE-Gx
+ *
+ * Some portions copied from the MIPS version.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright 2006 Sony Corp.
+ * Copyright 2010 Cavium Networks
+ *
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+#include <arch/opcode.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+tile_bundle_bits breakpoint_insn = TILEGX_BPT_BUNDLE;
+tile_bundle_bits breakpoint2_insn = TILEGX_BPT_BUNDLE | DIE_SSTEPBP;
+
+/*
+ * Check whether instruction is branch or jump, or if executing it
+ * has different results depending on where it is executed (e.g. lnk).
+ */
+static int __kprobes insn_has_control(kprobe_opcode_t insn)
+{
+ if (get_Mode(insn) != 0) { /* Y-format bundle */
+ if (get_Opcode_Y1(insn) != RRR_1_OPCODE_Y1 ||
+ get_RRROpcodeExtension_Y1(insn) != UNARY_RRR_1_OPCODE_Y1)
+ return 0;
+
+ switch (get_UnaryOpcodeExtension_Y1(insn)) {
+ case JALRP_UNARY_OPCODE_Y1:
+ case JALR_UNARY_OPCODE_Y1:
+ case JRP_UNARY_OPCODE_Y1:
+ case JR_UNARY_OPCODE_Y1:
+ case LNK_UNARY_OPCODE_Y1:
+ return 1;
+ default:
+ return 0;
+ }
+ }
+
+ switch (get_Opcode_X1(insn)) {
+ case BRANCH_OPCODE_X1: /* branch instructions */
+ case JUMP_OPCODE_X1: /* jump instructions: j and jal */
+ return 1;
+
+ case RRR_0_OPCODE_X1: /* other jump instructions */
+ if (get_RRROpcodeExtension_X1(insn) != UNARY_RRR_0_OPCODE_X1)
+ return 0;
+ switch (get_UnaryOpcodeExtension_X1(insn)) {
+ case JALRP_UNARY_OPCODE_X1:
+ case JALR_UNARY_OPCODE_X1:
+ case JRP_UNARY_OPCODE_X1:
+ case JR_UNARY_OPCODE_X1:
+ case LNK_UNARY_OPCODE_X1:
+ return 1;
+ default:
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ unsigned long addr = (unsigned long)p->addr;
+
+ if (addr & (sizeof(kprobe_opcode_t) - 1))
+ return -EINVAL;
+
+ if (insn_has_control(*p->addr)) {
+ pr_notice("Kprobes for control instructions are not "
+ "supported\n");
+ return -EINVAL;
+ }
+
+ /* insn: must be on special executable page on tile. */
+ p->ainsn.insn = get_insn_slot();
+ if (!p->ainsn.insn)
+ return -ENOMEM;
+
+ /*
+ * In the kprobe->ainsn.insn[] array we store the original
+ * instruction at index zero and a break trap instruction at
+ * index one.
+ */
+ memcpy(&p->ainsn.insn[0], p->addr, sizeof(kprobe_opcode_t));
+ p->ainsn.insn[1] = breakpoint2_insn;
+ p->opcode = *p->addr;
+
+ return 0;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ unsigned long addr_wr;
+
+ /* Operate on writable kernel text mapping. */
+ addr_wr = (unsigned long)p->addr - MEM_SV_START + PAGE_OFFSET;
+
+ if (probe_kernel_write((void *)addr_wr, &breakpoint_insn,
+ sizeof(breakpoint_insn)))
+ pr_err("%s: failed to enable kprobe\n", __func__);
+
+ smp_wmb();
+ flush_insn_slot(p);
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *kp)
+{
+ unsigned long addr_wr;
+
+ /* Operate on writable kernel text mapping. */
+ addr_wr = (unsigned long)kp->addr - MEM_SV_START + PAGE_OFFSET;
+
+ if (probe_kernel_write((void *)addr_wr, &kp->opcode,
+ sizeof(kp->opcode)))
+ pr_err("%s: failed to enable kprobe\n", __func__);
+
+ smp_wmb();
+ flush_insn_slot(kp);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+ if (p->ainsn.insn) {
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+ }
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ kcb->prev_kprobe.saved_pc = kcb->kprobe_saved_pc;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->kprobe_saved_pc = kcb->prev_kprobe.saved_pc;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_saved_pc = regs->pc;
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+ /* Single step inline if the instruction is a break. */
+ if (p->opcode == breakpoint_insn ||
+ p->opcode == breakpoint2_insn)
+ regs->pc = (unsigned long)p->addr;
+ else
+ regs->pc = (unsigned long)&p->ainsn.insn[0];
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *p;
+ int ret = 0;
+ kprobe_opcode_t *addr;
+ struct kprobe_ctlblk *kcb;
+
+ addr = (kprobe_opcode_t *)regs->pc;
+
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing.
+ */
+ preempt_disable();
+ kcb = get_kprobe_ctlblk();
+
+ /* Check we're not actually recursing. */
+ if (kprobe_running()) {
+ p = get_kprobe(addr);
+ if (p) {
+ if (kcb->kprobe_status == KPROBE_HIT_SS &&
+ p->ainsn.insn[0] == breakpoint_insn) {
+ goto no_kprobe;
+ }
+ /*
+ * We have reentered the kprobe_handler(), since
+ * another probe was hit while within the handler.
+ * We here save the original kprobes variables and
+ * just single step on the instruction of the new probe
+ * without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kprobes_inc_nmissed_count(p);
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_REENTER;
+ return 1;
+ } else {
+ if (*addr != breakpoint_insn) {
+ /*
+ * The breakpoint instruction was removed by
+ * another cpu right after we hit, no further
+ * handling of this interrupt is appropriate.
+ */
+ ret = 1;
+ goto no_kprobe;
+ }
+ p = __this_cpu_read(current_kprobe);
+ if (p->break_handler && p->break_handler(p, regs))
+ goto ss_probe;
+ }
+ goto no_kprobe;
+ }
+
+ p = get_kprobe(addr);
+ if (!p) {
+ if (*addr != breakpoint_insn) {
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ */
+ ret = 1;
+ }
+ /* Not one of ours: let kernel handle it. */
+ goto no_kprobe;
+ }
+
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ if (p->pre_handler && p->pre_handler(p, regs)) {
+ /* Handler has already set things up, so skip ss setup. */
+ return 1;
+ }
+
+ss_probe:
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ return 1;
+
+no_kprobe:
+ preempt_enable_no_resched();
+ return ret;
+}
+
+/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction that has been replaced by the breakpoint. To avoid the
+ * SMP problems that can occur when we temporarily put back the
+ * original opcode to single-step, we single-stepped a copy of the
+ * instruction. The address of this copy is p->ainsn.insn.
+ *
+ * This function prepares to return from the post-single-step
+ * breakpoint trap.
+ */
+static void __kprobes resume_execution(struct kprobe *p,
+ struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ unsigned long orig_pc = kcb->kprobe_saved_pc;
+ regs->pc = orig_pc + 8;
+}
+
+static inline int post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (!cur)
+ return 0;
+
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
+ }
+
+ resume_execution(cur, regs, kcb);
+
+ /* Restore back the original saved kprobes variables and continue. */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ goto out;
+ }
+ reset_current_kprobe();
+out:
+ preempt_enable_no_resched();
+
+ return 1;
+}
+
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ if (kcb->kprobe_status & KPROBE_HIT_SS) {
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the ip points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ resume_execution(cur, regs, kcb);
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ }
+ return 0;
+}
+
+/*
+ * Wrapper routine for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_BREAK:
+ if (kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_SSTEPBP:
+ if (post_kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_PAGE_FAULT:
+ /* kprobe_running() needs smp_processor_id(). */
+ preempt_disable();
+
+ if (kprobe_running()
+ && kprobe_fault_handler(args->regs, args->trapnr))
+ ret = NOTIFY_STOP;
+ preempt_enable();
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ kcb->jprobe_saved_regs = *regs;
+ kcb->jprobe_saved_sp = regs->sp;
+
+ memcpy(kcb->jprobes_stack, (void *)kcb->jprobe_saved_sp,
+ MIN_JPROBES_STACK_SIZE(kcb->jprobe_saved_sp));
+
+ regs->pc = (unsigned long)(jp->entry);
+
+ return 1;
+}
+
+/* Defined in the inline asm below. */
+void jprobe_return_end(void);
+
+void __kprobes jprobe_return(void)
+{
+ asm volatile(
+ "bpt\n\t"
+ ".globl jprobe_return_end\n"
+ "jprobe_return_end:\n");
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (regs->pc >= (unsigned long)jprobe_return &&
+ regs->pc <= (unsigned long)jprobe_return_end) {
+ *regs = kcb->jprobe_saved_regs;
+ memcpy((void *)kcb->jprobe_saved_sp, kcb->jprobes_stack,
+ MIN_JPROBES_STACK_SIZE(kcb->jprobe_saved_sp));
+ preempt_enable_no_resched();
+
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Function return probe trampoline:
+ * - init_kprobes() establishes a probepoint here
+ * - When the probed function returns, this probe causes the
+ * handlers to fire
+ */
+static void __used kretprobe_trampoline_holder(void)
+{
+ asm volatile(
+ "nop\n\t"
+ ".global kretprobe_trampoline\n"
+ "kretprobe_trampoline:\n\t"
+ "nop\n\t"
+ : : : "memory");
+}
+
+void kretprobe_trampoline(void);
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *) regs->lr;
+
+ /* Replace the return addr with trampoline addr */
+ regs->lr = (unsigned long)kretprobe_trampoline;
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit.
+ */
+static int __kprobes trampoline_probe_handler(struct kprobe *p,
+ struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address = (unsigned long)kretprobe_trampoline;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because multiple functions in the call path have
+ * a return probe installed on them, and/or more than one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address) {
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+ instruction_pointer(regs) = orig_ret_address;
+
+ reset_current_kprobe();
+ kretprobe_hash_unlock(current, &flags);
+ preempt_enable_no_resched();
+
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ if (p->addr == (kprobe_opcode_t *)kretprobe_trampoline)
+ return 1;
+
+ return 0;
+}
+
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *)kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ register_kprobe(&trampoline_p);
+ return 0;
+}
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
index 6255f2eab11..f0b54a93471 100644
--- a/arch/tile/kernel/machine_kexec.c
+++ b/arch/tile/kernel/machine_kexec.c
@@ -31,6 +31,8 @@
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/checksum.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
#include <hv/hypervisor.h>
@@ -222,11 +224,22 @@ struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order)
return alloc_pages_node(0, gfp_mask, order);
}
+/*
+ * Address range in which pa=va mapping is set in setup_quasi_va_is_pa().
+ * For tilepro, PAGE_OFFSET is used since this is the largest possbile value
+ * for tilepro, while for tilegx, we limit it to entire middle level page
+ * table which we assume has been allocated and is undoubtedly large enough.
+ */
+#ifndef __tilegx__
+#define QUASI_VA_IS_PA_ADDR_RANGE PAGE_OFFSET
+#else
+#define QUASI_VA_IS_PA_ADDR_RANGE PGDIR_SIZE
+#endif
+
static void setup_quasi_va_is_pa(void)
{
- HV_PTE *pgtable;
HV_PTE pte;
- int i;
+ unsigned long i;
/*
* Flush our TLB to prevent conflicts between the previous contents
@@ -234,16 +247,22 @@ static void setup_quasi_va_is_pa(void)
*/
local_flush_tlb_all();
- /* setup VA is PA, at least up to PAGE_OFFSET */
-
- pgtable = (HV_PTE *)current->mm->pgd;
+ /*
+ * setup VA is PA, at least up to QUASI_VA_IS_PA_ADDR_RANGE.
+ * Note here we assume that level-1 page table is defined by
+ * HPAGE_SIZE.
+ */
pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE);
pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
-
- for (i = 0; i < pgd_index(PAGE_OFFSET); i++) {
+ for (i = 0; i < (QUASI_VA_IS_PA_ADDR_RANGE >> HPAGE_SHIFT); i++) {
+ unsigned long vaddr = i << HPAGE_SHIFT;
+ pgd_t *pgd = pgd_offset(current->mm, vaddr);
+ pud_t *pud = pud_offset(pgd, vaddr);
+ pte_t *ptep = (pte_t *) pmd_offset(pud, vaddr);
unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT);
+
if (pfn_valid(pfn))
- __set_pte(&pgtable[i], pfn_pte(pfn, pte));
+ __set_pte(ptep, pfn_pte(pfn, pte));
}
}
@@ -251,6 +270,7 @@ static void setup_quasi_va_is_pa(void)
void machine_kexec(struct kimage *image)
{
void *reboot_code_buffer;
+ pte_t *ptep;
void (*rnk)(unsigned long, void *, unsigned long)
__noreturn;
@@ -266,8 +286,10 @@ void machine_kexec(struct kimage *image)
*/
homecache_change_page_home(image->control_code_page, 0,
smp_processor_id());
- reboot_code_buffer = vmap(&image->control_code_page, 1, 0,
- __pgprot(_PAGE_KERNEL | _PAGE_EXECUTABLE));
+ reboot_code_buffer = page_address(image->control_code_page);
+ BUG_ON(reboot_code_buffer == NULL);
+ ptep = virt_to_pte(NULL, (unsigned long)reboot_code_buffer);
+ __set_pte(ptep, pte_mkexec(*ptep));
memcpy(reboot_code_buffer, relocate_new_kernel,
relocate_new_kernel_size);
__flush_icache_range(
diff --git a/arch/tile/kernel/mcount_64.S b/arch/tile/kernel/mcount_64.S
new file mode 100644
index 00000000000..70d7bb0c4d8
--- /dev/null
+++ b/arch/tile/kernel/mcount_64.S
@@ -0,0 +1,224 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE-Gx specific __mcount support
+ */
+
+#include <linux/linkage.h>
+#include <asm/ftrace.h>
+
+#define REGSIZE 8
+
+ .text
+ .global __mcount
+
+ .macro MCOUNT_SAVE_REGS
+ addli sp, sp, -REGSIZE
+ {
+ st sp, lr
+ addli r29, sp, - (12 * REGSIZE)
+ }
+ {
+ addli sp, sp, - (13 * REGSIZE)
+ st r29, sp
+ }
+ addli r29, r29, REGSIZE
+ { st r29, r0; addli r29, r29, REGSIZE }
+ { st r29, r1; addli r29, r29, REGSIZE }
+ { st r29, r2; addli r29, r29, REGSIZE }
+ { st r29, r3; addli r29, r29, REGSIZE }
+ { st r29, r4; addli r29, r29, REGSIZE }
+ { st r29, r5; addli r29, r29, REGSIZE }
+ { st r29, r6; addli r29, r29, REGSIZE }
+ { st r29, r7; addli r29, r29, REGSIZE }
+ { st r29, r8; addli r29, r29, REGSIZE }
+ { st r29, r9; addli r29, r29, REGSIZE }
+ { st r29, r10; addli r29, r29, REGSIZE }
+ .endm
+
+ .macro MCOUNT_RESTORE_REGS
+ addli r29, sp, (2 * REGSIZE)
+ { ld r0, r29; addli r29, r29, REGSIZE }
+ { ld r1, r29; addli r29, r29, REGSIZE }
+ { ld r2, r29; addli r29, r29, REGSIZE }
+ { ld r3, r29; addli r29, r29, REGSIZE }
+ { ld r4, r29; addli r29, r29, REGSIZE }
+ { ld r5, r29; addli r29, r29, REGSIZE }
+ { ld r6, r29; addli r29, r29, REGSIZE }
+ { ld r7, r29; addli r29, r29, REGSIZE }
+ { ld r8, r29; addli r29, r29, REGSIZE }
+ { ld r9, r29; addli r29, r29, REGSIZE }
+ { ld r10, r29; addli lr, sp, (13 * REGSIZE) }
+ { ld lr, lr; addli sp, sp, (14 * REGSIZE) }
+ .endm
+
+ .macro RETURN_BACK
+ { move r12, lr; move lr, r10 }
+ jrp r12
+ .endm
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+ .align 64
+STD_ENTRY(__mcount)
+__mcount:
+ j ftrace_stub
+STD_ENDPROC(__mcount)
+
+ .align 64
+STD_ENTRY(ftrace_caller)
+ moveli r11, hw2_last(function_trace_stop)
+ { shl16insli r11, r11, hw1(function_trace_stop); move r12, lr }
+ { shl16insli r11, r11, hw0(function_trace_stop); move lr, r10 }
+ ld r11, r11
+ beqz r11, 1f
+ jrp r12
+
+1:
+ { move r10, lr; move lr, r12 }
+ MCOUNT_SAVE_REGS
+
+ /* arg1: self return address */
+ /* arg2: parent's return address */
+ { move r0, lr; move r1, r10 }
+
+ .global ftrace_call
+ftrace_call:
+ /*
+ * a placeholder for the call to a real tracing function, i.e.
+ * ftrace_trace_function()
+ */
+ nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ .global ftrace_graph_call
+ftrace_graph_call:
+ /*
+ * a placeholder for the call to a real tracing function, i.e.
+ * ftrace_graph_caller()
+ */
+ nop
+#endif
+ MCOUNT_RESTORE_REGS
+ .global ftrace_stub
+ftrace_stub:
+ RETURN_BACK
+STD_ENDPROC(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+
+ .align 64
+STD_ENTRY(__mcount)
+ moveli r11, hw2_last(function_trace_stop)
+ { shl16insli r11, r11, hw1(function_trace_stop); move r12, lr }
+ { shl16insli r11, r11, hw0(function_trace_stop); move lr, r10 }
+ ld r11, r11
+ beqz r11, 1f
+ jrp r12
+
+1:
+ { move r10, lr; move lr, r12 }
+ {
+ moveli r11, hw2_last(ftrace_trace_function)
+ moveli r13, hw2_last(ftrace_stub)
+ }
+ {
+ shl16insli r11, r11, hw1(ftrace_trace_function)
+ shl16insli r13, r13, hw1(ftrace_stub)
+ }
+ {
+ shl16insli r11, r11, hw0(ftrace_trace_function)
+ shl16insli r13, r13, hw0(ftrace_stub)
+ }
+
+ ld r11, r11
+ sub r14, r13, r11
+ bnez r14, static_trace
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ moveli r15, hw2_last(ftrace_graph_return)
+ shl16insli r15, r15, hw1(ftrace_graph_return)
+ shl16insli r15, r15, hw0(ftrace_graph_return)
+ ld r15, r15
+ sub r15, r15, r13
+ bnez r15, ftrace_graph_caller
+
+ {
+ moveli r16, hw2_last(ftrace_graph_entry)
+ moveli r17, hw2_last(ftrace_graph_entry_stub)
+ }
+ {
+ shl16insli r16, r16, hw1(ftrace_graph_entry)
+ shl16insli r17, r17, hw1(ftrace_graph_entry_stub)
+ }
+ {
+ shl16insli r16, r16, hw0(ftrace_graph_entry)
+ shl16insli r17, r17, hw0(ftrace_graph_entry_stub)
+ }
+ ld r16, r16
+ sub r17, r16, r17
+ bnez r17, ftrace_graph_caller
+
+#endif
+ RETURN_BACK
+
+static_trace:
+ MCOUNT_SAVE_REGS
+
+ /* arg1: self return address */
+ /* arg2: parent's return address */
+ { move r0, lr; move r1, r10 }
+
+ /* call ftrace_trace_function() */
+ jalr r11
+
+ MCOUNT_RESTORE_REGS
+
+ .global ftrace_stub
+ftrace_stub:
+ RETURN_BACK
+STD_ENDPROC(__mcount)
+
+#endif /* ! CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+STD_ENTRY(ftrace_graph_caller)
+ftrace_graph_caller:
+#ifndef CONFIG_DYNAMIC_FTRACE
+ MCOUNT_SAVE_REGS
+#endif
+
+ /* arg1: Get the location of the parent's return address */
+ addi r0, sp, 12 * REGSIZE
+ /* arg2: Get self return address */
+ move r1, lr
+
+ jal prepare_ftrace_return
+
+ MCOUNT_RESTORE_REGS
+ RETURN_BACK
+STD_ENDPROC(ftrace_graph_caller)
+
+ .global return_to_handler
+return_to_handler:
+ MCOUNT_SAVE_REGS
+
+ jal ftrace_return_to_handler
+ /* restore the real parent address */
+ move r11, r0
+
+ MCOUNT_RESTORE_REGS
+ jr r11
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c
index 0858ee6b520..7867266f971 100644
--- a/arch/tile/kernel/messaging.c
+++ b/arch/tile/kernel/messaging.c
@@ -25,7 +25,7 @@
/* All messages are stored here */
static DEFINE_PER_CPU(HV_MsgState, msg_state);
-void __cpuinit init_messaging(void)
+void init_messaging(void)
{
/* Allocate storage for messages in kernel space */
HV_MsgState *state = &__get_cpu_var(msg_state);
@@ -68,8 +68,8 @@ void hv_message_intr(struct pt_regs *regs, int intnum)
#endif
while (1) {
- rmi = hv_receive_message(__get_cpu_var(msg_state),
- (HV_VirtAddr) message,
+ HV_MsgState *state = this_cpu_ptr(&msg_state);
+ rmi = hv_receive_message(*state, (HV_VirtAddr) message,
sizeof(message));
if (rmi.msglen == 0)
break;
diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index b90ab992567..4918d91bc3a 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -24,16 +24,6 @@
#include <asm/homecache.h>
#include <arch/opcode.h>
-#ifdef __tilegx__
-# define Elf_Rela Elf64_Rela
-# define ELF_R_SYM ELF64_R_SYM
-# define ELF_R_TYPE ELF64_R_TYPE
-#else
-# define Elf_Rela Elf32_Rela
-# define ELF_R_SYM ELF32_R_SYM
-# define ELF_R_TYPE ELF32_R_TYPE
-#endif
-
#ifdef MODULE_DEBUG
#define DEBUGP printk
#else
@@ -52,8 +42,6 @@ void *module_alloc(unsigned long size)
int i = 0;
int npages;
- if (size == 0)
- return NULL;
npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
pages = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
if (pages == NULL)
@@ -67,6 +55,8 @@ void *module_alloc(unsigned long size)
area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END);
if (!area)
goto error;
+ area->nr_pages = npages;
+ area->pages = pages;
if (map_vm_area(area, prot_rwx, &pages)) {
vunmap(area->addr);
@@ -157,7 +147,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
switch (ELF_R_TYPE(rel[i].r_info)) {
-#define MUNGE(func) (*location = ((*location & ~func(-1)) | func(value)))
+#ifdef __LITTLE_ENDIAN
+# define MUNGE(func) \
+ (*location = ((*location & ~func(-1)) | func(value)))
+#else
+/*
+ * Instructions are always little-endian, so when we read them as data,
+ * we have to swap them around before and after modifying them.
+ */
+# define MUNGE(func) \
+ (*location = swab64((swab64(*location) & ~func(-1)) | func(value)))
+#endif
#ifndef __tilegx__
case R_TILE_32:
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
index b3ed19f8779..09b58703ac2 100644
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -14,6 +14,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
+#include <linux/swiotlb.h>
#include <linux/vmalloc.h>
#include <linux/export.h>
#include <asm/tlbflush.h>
@@ -22,16 +23,22 @@
/* Generic DMA mapping functions: */
/*
- * Allocate what Linux calls "coherent" memory, which for us just
- * means uncached.
+ * Allocate what Linux calls "coherent" memory. On TILEPro this is
+ * uncached memory; on TILE-Gx it is hash-for-home memory.
*/
-void *dma_alloc_coherent(struct device *dev,
- size_t size,
- dma_addr_t *dma_handle,
- gfp_t gfp)
+#ifdef __tilepro__
+#define PAGE_HOME_DMA PAGE_HOME_UNCACHED
+#else
+#define PAGE_HOME_DMA PAGE_HOME_HASH
+#endif
+
+static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ struct dma_attrs *attrs)
{
- u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32);
- int node = dev_to_node(dev);
+ u64 dma_mask = (dev && dev->coherent_dma_mask) ?
+ dev->coherent_dma_mask : DMA_BIT_MASK(32);
+ int node = dev ? dev_to_node(dev) : 0;
int order = get_order(size);
struct page *pg;
dma_addr_t addr;
@@ -39,39 +46,42 @@ void *dma_alloc_coherent(struct device *dev,
gfp |= __GFP_ZERO;
/*
- * By forcing NUMA node 0 for 32-bit masks we ensure that the
- * high 32 bits of the resulting PA will be zero. If the mask
- * size is, e.g., 24, we may still not be able to guarantee a
- * suitable memory address, in which case we will return NULL.
- * But such devices are uncommon.
+ * If the mask specifies that the memory be in the first 4 GB, then
+ * we force the allocation to come from the DMA zone. We also
+ * force the node to 0 since that's the only node where the DMA
+ * zone isn't empty. If the mask size is smaller than 32 bits, we
+ * may still not be able to guarantee a suitable memory address, in
+ * which case we will return NULL. But such devices are uncommon.
*/
- if (dma_mask <= DMA_BIT_MASK(32))
+ if (dma_mask <= DMA_BIT_MASK(32)) {
+ gfp |= GFP_DMA;
node = 0;
+ }
- pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_UNCACHED);
+ pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
if (pg == NULL)
return NULL;
addr = page_to_phys(pg);
if (addr + size > dma_mask) {
- homecache_free_pages(addr, order);
+ __homecache_free_pages(pg, order);
return NULL;
}
*dma_handle = addr;
+
return page_address(pg);
}
-EXPORT_SYMBOL(dma_alloc_coherent);
/*
- * Free memory that was allocated with dma_alloc_coherent.
+ * Free memory that was allocated with tile_dma_alloc_coherent.
*/
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
+static void tile_dma_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
{
homecache_free_pages((unsigned long)vaddr, get_order(size));
}
-EXPORT_SYMBOL(dma_free_coherent);
/*
* The map routines "map" the specified address range for DMA
@@ -87,52 +97,285 @@ EXPORT_SYMBOL(dma_free_coherent);
* can count on nothing having been touched.
*/
-/* Flush a PA range from cache page by page. */
-static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size)
+/* Set up a single page for DMA access. */
+static void __dma_prep_page(struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction direction)
+{
+ /*
+ * Flush the page from cache if necessary.
+ * On tilegx, data is delivered to hash-for-home L3; on tilepro,
+ * data is delivered direct to memory.
+ *
+ * NOTE: If we were just doing DMA_TO_DEVICE we could optimize
+ * this to be a "flush" not a "finv" and keep some of the
+ * state in cache across the DMA operation, but it doesn't seem
+ * worth creating the necessary flush_buffer_xxx() infrastructure.
+ */
+ int home = page_home(page);
+ switch (home) {
+ case PAGE_HOME_HASH:
+#ifdef __tilegx__
+ return;
+#endif
+ break;
+ case PAGE_HOME_UNCACHED:
+#ifdef __tilepro__
+ return;
+#endif
+ break;
+ case PAGE_HOME_IMMUTABLE:
+ /* Should be going to the device only. */
+ BUG_ON(direction == DMA_FROM_DEVICE ||
+ direction == DMA_BIDIRECTIONAL);
+ return;
+ case PAGE_HOME_INCOHERENT:
+ /* Incoherent anyway, so no need to work hard here. */
+ return;
+ default:
+ BUG_ON(home < 0 || home >= NR_CPUS);
+ break;
+ }
+ homecache_finv_page(page);
+
+#ifdef DEBUG_ALIGNMENT
+ /* Warn if the region isn't cacheline aligned. */
+ if (offset & (L2_CACHE_BYTES - 1) || (size & (L2_CACHE_BYTES - 1)))
+ pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n",
+ PFN_PHYS(page_to_pfn(page)) + offset, size);
+#endif
+}
+
+/* Make the page ready to be read by the core. */
+static void __dma_complete_page(struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction direction)
+{
+#ifdef __tilegx__
+ switch (page_home(page)) {
+ case PAGE_HOME_HASH:
+ /* I/O device delivered data the way the cpu wanted it. */
+ break;
+ case PAGE_HOME_INCOHERENT:
+ /* Incoherent anyway, so no need to work hard here. */
+ break;
+ case PAGE_HOME_IMMUTABLE:
+ /* Extra read-only copies are not a problem. */
+ break;
+ default:
+ /* Flush the bogus hash-for-home I/O entries to memory. */
+ homecache_finv_map_page(page, PAGE_HOME_HASH);
+ break;
+ }
+#endif
+}
+
+static void __dma_prep_pa_range(dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
{
struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
- size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1));
+ unsigned long offset = dma_addr & (PAGE_SIZE - 1);
+ size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
+
+ while (size != 0) {
+ __dma_prep_page(page, offset, bytes, direction);
+ size -= bytes;
+ ++page;
+ offset = 0;
+ bytes = min((size_t)PAGE_SIZE, size);
+ }
+}
- while ((ssize_t)size > 0) {
- /* Flush the page. */
- homecache_flush_cache(page++, 0);
+static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
+ unsigned long offset = dma_addr & (PAGE_SIZE - 1);
+ size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
+
+ while (size != 0) {
+ __dma_complete_page(page, offset, bytes, direction);
+ size -= bytes;
+ ++page;
+ offset = 0;
+ bytes = min((size_t)PAGE_SIZE, size);
+ }
+}
+
+static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct scatterlist *sg;
+ int i;
+
+ BUG_ON(!valid_dma_direction(direction));
+
+ WARN_ON(nents == 0 || sglist->length == 0);
- /* Figure out if we need to continue on the next page. */
- size -= bytesleft;
- bytesleft = PAGE_SIZE;
+ for_each_sg(sglist, sg, nents, i) {
+ sg->dma_address = sg_phys(sg);
+ __dma_prep_pa_range(sg->dma_address, sg->length, direction);
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
}
+
+ return nents;
}
-/*
- * dma_map_single can be passed any memory address, and there appear
- * to be no alignment constraints.
- *
- * There is a chance that the start of the buffer will share a cache
- * line with some other data that has been touched in the meantime.
- */
-dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
- enum dma_data_direction direction)
+static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
- dma_addr_t dma_addr = __pa(ptr);
+ struct scatterlist *sg;
+ int i;
BUG_ON(!valid_dma_direction(direction));
- WARN_ON(size == 0);
+ for_each_sg(sglist, sg, nents, i) {
+ sg->dma_address = sg_phys(sg);
+ __dma_complete_pa_range(sg->dma_address, sg->length,
+ direction);
+ }
+}
+
+static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ BUG_ON(!valid_dma_direction(direction));
+
+ BUG_ON(offset + size > PAGE_SIZE);
+ __dma_prep_page(page, offset, size, direction);
+
+ return page_to_pa(page) + offset;
+}
+
+static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
+ size_t size, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ BUG_ON(!valid_dma_direction(direction));
+
+ __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
+ dma_address & (PAGE_SIZE - 1), size, direction);
+}
+
+static void tile_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
- __dma_map_pa_range(dma_addr, size);
+ __dma_complete_pa_range(dma_handle, size, direction);
+}
- return dma_addr;
+static void tile_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction direction)
+{
+ __dma_prep_pa_range(dma_handle, size, direction);
}
-EXPORT_SYMBOL(dma_map_single);
-void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
- enum dma_data_direction direction)
+static void tile_dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction)
{
+ struct scatterlist *sg;
+ int i;
+
BUG_ON(!valid_dma_direction(direction));
+ WARN_ON(nelems == 0 || sglist->length == 0);
+
+ for_each_sg(sglist, sg, nelems, i) {
+ dma_sync_single_for_cpu(dev, sg->dma_address,
+ sg_dma_len(sg), direction);
+ }
}
-EXPORT_SYMBOL(dma_unmap_single);
-int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
- enum dma_data_direction direction)
+static void tile_dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ int i;
+
+ BUG_ON(!valid_dma_direction(direction));
+ WARN_ON(nelems == 0 || sglist->length == 0);
+
+ for_each_sg(sglist, sg, nelems, i) {
+ dma_sync_single_for_device(dev, sg->dma_address,
+ sg_dma_len(sg), direction);
+ }
+}
+
+static inline int
+tile_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return 0;
+}
+
+static inline int
+tile_dma_supported(struct device *dev, u64 mask)
+{
+ return 1;
+}
+
+static struct dma_map_ops tile_default_dma_map_ops = {
+ .alloc = tile_dma_alloc_coherent,
+ .free = tile_dma_free_coherent,
+ .map_page = tile_dma_map_page,
+ .unmap_page = tile_dma_unmap_page,
+ .map_sg = tile_dma_map_sg,
+ .unmap_sg = tile_dma_unmap_sg,
+ .sync_single_for_cpu = tile_dma_sync_single_for_cpu,
+ .sync_single_for_device = tile_dma_sync_single_for_device,
+ .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu,
+ .sync_sg_for_device = tile_dma_sync_sg_for_device,
+ .mapping_error = tile_dma_mapping_error,
+ .dma_supported = tile_dma_supported
+};
+
+struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops;
+EXPORT_SYMBOL(tile_dma_map_ops);
+
+/* Generic PCI DMA mapping functions */
+
+static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ struct dma_attrs *attrs)
+{
+ int node = dev_to_node(dev);
+ int order = get_order(size);
+ struct page *pg;
+ dma_addr_t addr;
+
+ gfp |= __GFP_ZERO;
+
+ pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
+ if (pg == NULL)
+ return NULL;
+
+ addr = page_to_phys(pg);
+
+ *dma_handle = addr + get_dma_offset(dev);
+
+ return page_address(pg);
+}
+
+/*
+ * Free memory that was allocated with tile_pci_dma_alloc_coherent.
+ */
+static void tile_pci_dma_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ homecache_free_pages((unsigned long)vaddr, get_order(size));
+}
+
+static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
struct scatterlist *sg;
int i;
@@ -143,73 +386,103 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
for_each_sg(sglist, sg, nents, i) {
sg->dma_address = sg_phys(sg);
- __dma_map_pa_range(sg->dma_address, sg->length);
+ __dma_prep_pa_range(sg->dma_address, sg->length, direction);
+
+ sg->dma_address = sg->dma_address + get_dma_offset(dev);
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
}
return nents;
}
-EXPORT_SYMBOL(dma_map_sg);
-void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
- enum dma_data_direction direction)
+static void tile_pci_dma_unmap_sg(struct device *dev,
+ struct scatterlist *sglist, int nents,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
+ struct scatterlist *sg;
+ int i;
+
BUG_ON(!valid_dma_direction(direction));
+ for_each_sg(sglist, sg, nents, i) {
+ sg->dma_address = sg_phys(sg);
+ __dma_complete_pa_range(sg->dma_address, sg->length,
+ direction);
+ }
}
-EXPORT_SYMBOL(dma_unmap_sg);
-dma_addr_t dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
+static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
BUG_ON(!valid_dma_direction(direction));
BUG_ON(offset + size > PAGE_SIZE);
- homecache_flush_cache(page, 0);
+ __dma_prep_page(page, offset, size, direction);
- return page_to_pa(page) + offset;
+ return page_to_pa(page) + offset + get_dma_offset(dev);
}
-EXPORT_SYMBOL(dma_map_page);
-void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
- enum dma_data_direction direction)
+static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
+ size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
BUG_ON(!valid_dma_direction(direction));
+
+ dma_address -= get_dma_offset(dev);
+
+ __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
+ dma_address & (PAGE_SIZE - 1), size, direction);
}
-EXPORT_SYMBOL(dma_unmap_page);
-void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
- size_t size, enum dma_data_direction direction)
+static void tile_pci_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size,
+ enum dma_data_direction direction)
{
BUG_ON(!valid_dma_direction(direction));
+
+ dma_handle -= get_dma_offset(dev);
+
+ __dma_complete_pa_range(dma_handle, size, direction);
}
-EXPORT_SYMBOL(dma_sync_single_for_cpu);
-void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
- size_t size, enum dma_data_direction direction)
+static void tile_pci_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size,
+ enum dma_data_direction
+ direction)
{
- unsigned long start = PFN_DOWN(dma_handle);
- unsigned long end = PFN_DOWN(dma_handle + size - 1);
- unsigned long i;
+ dma_handle -= get_dma_offset(dev);
- BUG_ON(!valid_dma_direction(direction));
- for (i = start; i <= end; ++i)
- homecache_flush_cache(pfn_to_page(i), 0);
+ __dma_prep_pa_range(dma_handle, size, direction);
}
-EXPORT_SYMBOL(dma_sync_single_for_device);
-void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
- enum dma_data_direction direction)
+static void tile_pci_dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sglist,
+ int nelems,
+ enum dma_data_direction direction)
{
+ struct scatterlist *sg;
+ int i;
+
BUG_ON(!valid_dma_direction(direction));
- WARN_ON(nelems == 0 || sg[0].length == 0);
+ WARN_ON(nelems == 0 || sglist->length == 0);
+
+ for_each_sg(sglist, sg, nelems, i) {
+ dma_sync_single_for_cpu(dev, sg->dma_address,
+ sg_dma_len(sg), direction);
+ }
}
-EXPORT_SYMBOL(dma_sync_sg_for_cpu);
-/*
- * Flush and invalidate cache for scatterlist.
- */
-void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction)
+static void tile_pci_dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sglist,
+ int nelems,
+ enum dma_data_direction direction)
{
struct scatterlist *sg;
int i;
@@ -222,31 +495,136 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
sg_dma_len(sg), direction);
}
}
-EXPORT_SYMBOL(dma_sync_sg_for_device);
-void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
+static inline int
+tile_pci_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return 0;
+}
+
+static inline int
+tile_pci_dma_supported(struct device *dev, u64 mask)
+{
+ return 1;
+}
+
+static struct dma_map_ops tile_pci_default_dma_map_ops = {
+ .alloc = tile_pci_dma_alloc_coherent,
+ .free = tile_pci_dma_free_coherent,
+ .map_page = tile_pci_dma_map_page,
+ .unmap_page = tile_pci_dma_unmap_page,
+ .map_sg = tile_pci_dma_map_sg,
+ .unmap_sg = tile_pci_dma_unmap_sg,
+ .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
+ .sync_single_for_device = tile_pci_dma_sync_single_for_device,
+ .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
+ .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
+ .mapping_error = tile_pci_dma_mapping_error,
+ .dma_supported = tile_pci_dma_supported
+};
+
+struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops;
+EXPORT_SYMBOL(gx_pci_dma_map_ops);
+
+/* PCI DMA mapping functions for legacy PCI devices */
+
+#ifdef CONFIG_SWIOTLB
+static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ struct dma_attrs *attrs)
{
- dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction);
+ gfp |= GFP_DMA;
+ return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
}
-EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
-void dma_sync_single_range_for_device(struct device *dev,
- dma_addr_t dma_handle,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
+static void tile_swiotlb_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ struct dma_attrs *attrs)
{
- dma_sync_single_for_device(dev, dma_handle + offset, size, direction);
+ swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+}
+
+static struct dma_map_ops pci_swiotlb_dma_ops = {
+ .alloc = tile_swiotlb_alloc_coherent,
+ .free = tile_swiotlb_free_coherent,
+ .map_page = swiotlb_map_page,
+ .unmap_page = swiotlb_unmap_page,
+ .map_sg = swiotlb_map_sg_attrs,
+ .unmap_sg = swiotlb_unmap_sg_attrs,
+ .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+ .sync_single_for_device = swiotlb_sync_single_for_device,
+ .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+ .sync_sg_for_device = swiotlb_sync_sg_for_device,
+ .dma_supported = swiotlb_dma_supported,
+ .mapping_error = swiotlb_dma_mapping_error,
+};
+
+static struct dma_map_ops pci_hybrid_dma_ops = {
+ .alloc = tile_swiotlb_alloc_coherent,
+ .free = tile_swiotlb_free_coherent,
+ .map_page = tile_pci_dma_map_page,
+ .unmap_page = tile_pci_dma_unmap_page,
+ .map_sg = tile_pci_dma_map_sg,
+ .unmap_sg = tile_pci_dma_unmap_sg,
+ .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
+ .sync_single_for_device = tile_pci_dma_sync_single_for_device,
+ .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
+ .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
+ .mapping_error = tile_pci_dma_mapping_error,
+ .dma_supported = tile_pci_dma_supported
+};
+
+struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops;
+struct dma_map_ops *gx_hybrid_pci_dma_map_ops = &pci_hybrid_dma_ops;
+#else
+struct dma_map_ops *gx_legacy_pci_dma_map_ops;
+struct dma_map_ops *gx_hybrid_pci_dma_map_ops;
+#endif
+EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops);
+EXPORT_SYMBOL(gx_hybrid_pci_dma_map_ops);
+
+#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ /*
+ * For PCI devices with 64-bit DMA addressing capability, promote
+ * the dma_ops to full capability for both streams and consistent
+ * memory access. For 32-bit capable devices, limit the consistent
+ * memory DMA range to max_direct_dma_addr.
+ */
+ if (dma_ops == gx_pci_dma_map_ops ||
+ dma_ops == gx_hybrid_pci_dma_map_ops ||
+ dma_ops == gx_legacy_pci_dma_map_ops) {
+ if (mask == DMA_BIT_MASK(64))
+ set_dma_ops(dev, gx_pci_dma_map_ops);
+ else if (mask > dev->archdata.max_direct_dma_addr)
+ mask = dev->archdata.max_direct_dma_addr;
+ }
+
+ if (!dma_supported(dev, mask))
+ return -EIO;
+ dev->coherent_dma_mask = mask;
+ return 0;
}
-EXPORT_SYMBOL(dma_sync_single_range_for_device);
+EXPORT_SYMBOL(dma_set_coherent_mask);
+#endif
+#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
/*
- * dma_alloc_noncoherent() returns non-cacheable memory, so there's no
- * need to do any flushing here.
+ * The generic dma_get_required_mask() uses the highest physical address
+ * (max_pfn) to provide the hint to the PCI drivers regarding 32-bit or
+ * 64-bit DMA configuration. Since TILEGx has I/O TLB/MMU, allowing the
+ * DMAs to use the full 64-bit PCI address space and not limited by
+ * the physical memory space, we always let the PCI devices use
+ * 64-bit DMA if they have that capability, by returning the 64-bit
+ * DMA mask here. The device driver has the option to use 32-bit DMA if
+ * the device is not capable of 64-bit DMA.
*/
-void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
- enum dma_data_direction direction)
+u64 dma_get_required_mask(struct device *dev)
{
+ return DMA_BIT_MASK(64);
}
-EXPORT_SYMBOL(dma_cache_sync);
+EXPORT_SYMBOL_GPL(dma_get_required_mask);
+#endif
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index a1bb59eecc1..1f80a88c75a 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -20,7 +20,6 @@
#include <linux/capability.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/bootmem.h>
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/uaccess.h>
@@ -52,6 +51,8 @@
*
*/
+static int pci_probe = 1;
+
/*
* This flag tells if the platform is TILEmpower that needs
* special configuration for the PLX switch chip.
@@ -81,7 +82,7 @@ EXPORT_SYMBOL(pcibios_align_resource);
* controller_id is the controller number, config type is 0 or 1 for
* config0 or config1 operations.
*/
-static int __devinit tile_pcie_open(int controller_id, int config_type)
+static int tile_pcie_open(int controller_id, int config_type)
{
char filename[32];
int fd;
@@ -97,8 +98,7 @@ static int __devinit tile_pcie_open(int controller_id, int config_type)
/*
* Get the IRQ numbers from the HV and set up the handlers for them.
*/
-static int __devinit tile_init_irqs(int controller_id,
- struct pci_controller *controller)
+static int tile_init_irqs(int controller_id, struct pci_controller *controller)
{
char filename[32];
int fd;
@@ -141,10 +141,15 @@ static int __devinit tile_init_irqs(int controller_id,
*
* Returns the number of controllers discovered.
*/
-int __devinit tile_pci_init(void)
+int __init tile_pci_init(void)
{
int i;
+ if (!pci_probe) {
+ pr_info("PCI: disabled by boot argument\n");
+ return 0;
+ }
+
pr_info("PCI: Searching for controllers...\n");
/* Re-init number of PCIe controllers to support hot-plug feature. */
@@ -193,7 +198,6 @@ int __devinit tile_pci_init(void)
controller->hv_cfg_fd[0] = hv_cfg_fd0;
controller->hv_cfg_fd[1] = hv_cfg_fd1;
controller->hv_mem_fd = hv_mem_fd;
- controller->first_busno = 0;
controller->last_busno = 0xff;
controller->ops = &tile_cfg_ops;
@@ -237,7 +241,7 @@ static int tile_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
}
-static void __devinit fixup_read_and_payload_sizes(void)
+static void fixup_read_and_payload_sizes(void)
{
struct pci_dev *dev = NULL;
int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */
@@ -245,39 +249,20 @@ static void __devinit fixup_read_and_payload_sizes(void)
u16 new_values;
/* Scan for the smallest maximum payload size. */
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
- int pcie_caps_offset;
- u32 devcap;
- int max_payload;
-
- pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
- if (pcie_caps_offset == 0)
+ for_each_pci_dev(dev) {
+ if (!pci_is_pcie(dev))
continue;
- pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP,
- &devcap);
- max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD;
- if (max_payload < smallest_max_payload)
- smallest_max_payload = max_payload;
+ if (dev->pcie_mpss < smallest_max_payload)
+ smallest_max_payload = dev->pcie_mpss;
}
/* Now, set the max_payload_size for all devices to that value. */
new_values = (max_read_size << 12) | (smallest_max_payload << 5);
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
- int pcie_caps_offset;
- u16 devctl;
-
- pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
- if (pcie_caps_offset == 0)
- continue;
-
- pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
- &devctl);
- devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ);
- devctl |= new_values;
- pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
- devctl);
- }
+ for_each_pci_dev(dev)
+ pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ,
+ new_values);
}
@@ -287,7 +272,7 @@ static void __devinit fixup_read_and_payload_sizes(void)
* The controllers have been set up by the time we get here, by a call to
* tile_pci_init.
*/
-int __devinit pcibios_init(void)
+int __init pcibios_init(void)
{
int i;
@@ -298,7 +283,7 @@ int __devinit pcibios_init(void)
* known to require at least 20ms here, but we use a more
* conservative value.
*/
- mdelay(250);
+ msleep(250);
/* Scan all of the recorded PCI controllers. */
for (i = 0; i < TILE_NUM_PCIE; i++) {
@@ -310,6 +295,7 @@ int __devinit pcibios_init(void)
if (pci_scan_flags[i] == 0 && controllers[i].ops != NULL) {
struct pci_controller *controller = &controllers[i];
struct pci_bus *bus;
+ LIST_HEAD(resources);
if (tile_init_irqs(i, controller)) {
pr_err("PCI: Could not initialize IRQs\n");
@@ -318,18 +304,12 @@ int __devinit pcibios_init(void)
pr_info("PCI: initializing controller #%d\n", i);
- /*
- * This comes from the generic Linux PCI driver.
- *
- * It reads the PCI tree for this bus into the Linux
- * data structures.
- *
- * This is inlined in linux/pci.h and calls into
- * pci_scan_bus_parented() in probe.c.
- */
- bus = pci_scan_bus(0, controller->ops, controller);
+ pci_add_resource(&resources, &ioport_resource);
+ pci_add_resource(&resources, &iomem_resource);
+ bus = pci_scan_root_bus(NULL, 0, controller->ops,
+ controller, &resources);
controller->root_bus = bus;
- controller->last_busno = bus->subordinate;
+ controller->last_busno = bus->busn_res.end;
}
}
@@ -390,7 +370,7 @@ subsys_initcall(pcibios_init);
/*
* No bus fixups needed.
*/
-void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+void pcibios_fixup_bus(struct pci_bus *bus)
{
/* Nothing needs to be done. */
}
@@ -400,25 +380,17 @@ void pcibios_set_master(struct pci_dev *dev)
/* No special bus mastering setup handling. */
}
-/*
- * This can be called from the generic PCI layer, but doesn't need to
- * do anything.
- */
-char __devinit *pcibios_setup(char *str)
+/* Process any "pci=" kernel boot arguments. */
+char *__init pcibios_setup(char *str)
{
- /* Nothing needs to be done. */
+ if (!strcmp(str, "off")) {
+ pci_probe = 0;
+ return NULL;
+ }
return str;
}
/*
- * This is called from the generic Linux layer.
- */
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
-{
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
-/*
* Enable memory and/or address decoding, as appropriate, for the
* device described by the 'dev' struct.
*
@@ -487,11 +459,8 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
* specified bus & slot.
*/
-static int __devinit tile_cfg_read(struct pci_bus *bus,
- unsigned int devfn,
- int offset,
- int size,
- u32 *val)
+static int tile_cfg_read(struct pci_bus *bus, unsigned int devfn, int offset,
+ int size, u32 *val)
{
struct pci_controller *controller = bus->sysdata;
int busnum = bus->number & 0xff;
@@ -533,11 +502,8 @@ static int __devinit tile_cfg_read(struct pci_bus *bus,
* See tile_cfg_read() for relevant comments.
* Note that "val" is the value to write, not a pointer to that value.
*/
-static int __devinit tile_cfg_write(struct pci_bus *bus,
- unsigned int devfn,
- int offset,
- int size,
- u32 val)
+static int tile_cfg_write(struct pci_bus *bus, unsigned int devfn, int offset,
+ int size, u32 val)
{
struct pci_controller *controller = bus->sysdata;
int busnum = bus->number & 0xff;
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
new file mode 100644
index 00000000000..e39f9c54280
--- /dev/null
+++ b/arch/tile/kernel/pci_gx.c
@@ -0,0 +1,1610 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/capability.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/ctype.h>
+
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/byteorder.h>
+
+#include <gxio/iorpc_globals.h>
+#include <gxio/kiorpc.h>
+#include <gxio/trio.h>
+#include <gxio/iorpc_trio.h>
+#include <hv/drv_trio_intf.h>
+
+#include <arch/sim.h>
+
+/*
+ * This file containes the routines to search for PCI buses,
+ * enumerate the buses, and configure any attached devices.
+ */
+
+#define DEBUG_PCI_CFG 0
+
+#if DEBUG_PCI_CFG
+#define TRACE_CFG_WR(size, val, bus, dev, func, offset) \
+ pr_info("CFG WR %d-byte VAL %#x to bus %d dev %d func %d addr %u\n", \
+ size, val, bus, dev, func, offset & 0xFFF);
+#define TRACE_CFG_RD(size, val, bus, dev, func, offset) \
+ pr_info("CFG RD %d-byte VAL %#x from bus %d dev %d func %d addr %u\n", \
+ size, val, bus, dev, func, offset & 0xFFF);
+#else
+#define TRACE_CFG_WR(...)
+#define TRACE_CFG_RD(...)
+#endif
+
+static int pci_probe = 1;
+
+/* Information on the PCIe RC ports configuration. */
+static int pcie_rc[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES];
+
+/*
+ * On some platforms with one or more Gx endpoint ports, we need to
+ * delay the PCIe RC port probe for a few seconds to work around
+ * a HW PCIe link-training bug. The exact delay is specified with
+ * a kernel boot argument in the form of "pcie_rc_delay=T,P,S",
+ * where T is the TRIO instance number, P is the port number and S is
+ * the delay in seconds. If the argument is specified, but the delay is
+ * not provided, the value will be DEFAULT_RC_DELAY.
+ */
+static int rc_delay[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES];
+
+/* Default number of seconds that the PCIe RC port probe can be delayed. */
+#define DEFAULT_RC_DELAY 10
+
+/* The PCI I/O space size in each PCI domain. */
+#define IO_SPACE_SIZE 0x10000
+
+/* Provide shorter versions of some very long constant names. */
+#define AUTO_CONFIG_RC \
+ TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC
+#define AUTO_CONFIG_RC_G1 \
+ TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC_G1
+#define AUTO_CONFIG_EP \
+ TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT
+#define AUTO_CONFIG_EP_G1 \
+ TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT_G1
+
+/* Array of the PCIe ports configuration info obtained from the BIB. */
+struct pcie_trio_ports_property pcie_ports[TILEGX_NUM_TRIO];
+
+/* Number of configured TRIO instances. */
+int num_trio_shims;
+
+/* All drivers share the TRIO contexts defined here. */
+gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
+
+/* Pointer to an array of PCIe RC controllers. */
+struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES];
+int num_rc_controllers;
+
+static struct pci_ops tile_cfg_ops;
+
+/* Mask of CPUs that should receive PCIe interrupts. */
+static struct cpumask intr_cpus_map;
+
+/* We don't need to worry about the alignment of resources. */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+ resource_size_t size,
+ resource_size_t align)
+{
+ return res->start;
+}
+EXPORT_SYMBOL(pcibios_align_resource);
+
+/*
+ * Pick a CPU to receive and handle the PCIe interrupts, based on the IRQ #.
+ * For now, we simply send interrupts to non-dataplane CPUs.
+ * We may implement methods to allow user to specify the target CPUs,
+ * e.g. via boot arguments.
+ */
+static int tile_irq_cpu(int irq)
+{
+ unsigned int count;
+ int i = 0;
+ int cpu;
+
+ count = cpumask_weight(&intr_cpus_map);
+ if (unlikely(count == 0)) {
+ pr_warning("intr_cpus_map empty, interrupts will be"
+ " delievered to dataplane tiles\n");
+ return irq % (smp_height * smp_width);
+ }
+
+ count = irq % count;
+ for_each_cpu(cpu, &intr_cpus_map) {
+ if (i++ == count)
+ break;
+ }
+ return cpu;
+}
+
+/* Open a file descriptor to the TRIO shim. */
+static int tile_pcie_open(int trio_index)
+{
+ gxio_trio_context_t *context = &trio_contexts[trio_index];
+ int ret;
+ int mac;
+
+ /* This opens a file descriptor to the TRIO shim. */
+ ret = gxio_trio_init(context, trio_index);
+ if (ret < 0)
+ goto gxio_trio_init_failure;
+
+ /* Allocate an ASID for the kernel. */
+ ret = gxio_trio_alloc_asids(context, 1, 0, 0);
+ if (ret < 0) {
+ pr_err("PCI: ASID alloc failure on TRIO %d, give up\n",
+ trio_index);
+ goto asid_alloc_failure;
+ }
+
+ context->asid = ret;
+
+#ifdef USE_SHARED_PCIE_CONFIG_REGION
+ /*
+ * Alloc a PIO region for config access, shared by all MACs per TRIO.
+ * This shouldn't fail since the kernel is supposed to the first
+ * client of the TRIO's PIO regions.
+ */
+ ret = gxio_trio_alloc_pio_regions(context, 1, 0, 0);
+ if (ret < 0) {
+ pr_err("PCI: CFG PIO alloc failure on TRIO %d, give up\n",
+ trio_index);
+ goto pio_alloc_failure;
+ }
+
+ context->pio_cfg_index = ret;
+
+ /*
+ * For PIO CFG, the bus_address_hi parameter is 0. The mac parameter
+ * is also 0 because it is specified in PIO_REGION_SETUP_CFG_ADDR.
+ */
+ ret = gxio_trio_init_pio_region_aux(context, context->pio_cfg_index,
+ 0, 0, HV_TRIO_PIO_FLAG_CONFIG_SPACE);
+ if (ret < 0) {
+ pr_err("PCI: CFG PIO init failure on TRIO %d, give up\n",
+ trio_index);
+ goto pio_alloc_failure;
+ }
+#endif
+
+ /* Get the properties of the PCIe ports on this TRIO instance. */
+ ret = gxio_trio_get_port_property(context, &pcie_ports[trio_index]);
+ if (ret < 0) {
+ pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d,"
+ " on TRIO %d\n", ret, trio_index);
+ goto get_port_property_failure;
+ }
+
+ context->mmio_base_mac =
+ iorpc_ioremap(context->fd, 0, HV_TRIO_CONFIG_IOREMAP_SIZE);
+ if (context->mmio_base_mac == NULL) {
+ pr_err("PCI: TRIO config space mapping failure, error %d,"
+ " on TRIO %d\n", ret, trio_index);
+ ret = -ENOMEM;
+
+ goto trio_mmio_mapping_failure;
+ }
+
+ /* Check the port strap state which will override the BIB setting. */
+ for (mac = 0; mac < TILEGX_TRIO_PCIES; mac++) {
+ TRIO_PCIE_INTFC_PORT_CONFIG_t port_config;
+ unsigned int reg_offset;
+
+ /* Ignore ports that are not specified in the BIB. */
+ if (!pcie_ports[trio_index].ports[mac].allow_rc &&
+ !pcie_ports[trio_index].ports[mac].allow_ep)
+ continue;
+
+ reg_offset =
+ (TRIO_PCIE_INTFC_PORT_CONFIG <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+
+ port_config.word =
+ __gxio_mmio_read(context->mmio_base_mac + reg_offset);
+
+ if (port_config.strap_state != AUTO_CONFIG_RC &&
+ port_config.strap_state != AUTO_CONFIG_RC_G1) {
+ /*
+ * If this is really intended to be an EP port, record
+ * it so that the endpoint driver will know about it.
+ */
+ if (port_config.strap_state == AUTO_CONFIG_EP ||
+ port_config.strap_state == AUTO_CONFIG_EP_G1)
+ pcie_ports[trio_index].ports[mac].allow_ep = 1;
+ }
+ }
+
+ return ret;
+
+trio_mmio_mapping_failure:
+get_port_property_failure:
+asid_alloc_failure:
+#ifdef USE_SHARED_PCIE_CONFIG_REGION
+pio_alloc_failure:
+#endif
+ hv_dev_close(context->fd);
+gxio_trio_init_failure:
+ context->fd = -1;
+
+ return ret;
+}
+
+static int __init tile_trio_init(void)
+{
+ int i;
+
+ /* We loop over all the TRIO shims. */
+ for (i = 0; i < TILEGX_NUM_TRIO; i++) {
+ if (tile_pcie_open(i) < 0)
+ continue;
+ num_trio_shims++;
+ }
+
+ return 0;
+}
+postcore_initcall(tile_trio_init);
+
+static void tilegx_legacy_irq_ack(struct irq_data *d)
+{
+ __insn_mtspr(SPR_IPI_EVENT_RESET_K, 1UL << d->irq);
+}
+
+static void tilegx_legacy_irq_mask(struct irq_data *d)
+{
+ __insn_mtspr(SPR_IPI_MASK_SET_K, 1UL << d->irq);
+}
+
+static void tilegx_legacy_irq_unmask(struct irq_data *d)
+{
+ __insn_mtspr(SPR_IPI_MASK_RESET_K, 1UL << d->irq);
+}
+
+static struct irq_chip tilegx_legacy_irq_chip = {
+ .name = "tilegx_legacy_irq",
+ .irq_ack = tilegx_legacy_irq_ack,
+ .irq_mask = tilegx_legacy_irq_mask,
+ .irq_unmask = tilegx_legacy_irq_unmask,
+
+ /* TBD: support set_affinity. */
+};
+
+/*
+ * This is a wrapper function of the kernel level-trigger interrupt
+ * handler handle_level_irq() for PCI legacy interrupts. The TRIO
+ * is configured such that only INTx Assert interrupts are proxied
+ * to Linux which just calls handle_level_irq() after clearing the
+ * MAC INTx Assert status bit associated with this interrupt.
+ */
+static void trio_handle_level_irq(unsigned int irq, struct irq_desc *desc)
+{
+ struct pci_controller *controller = irq_desc_get_handler_data(desc);
+ gxio_trio_context_t *trio_context = controller->trio;
+ uint64_t intx = (uint64_t)irq_desc_get_chip_data(desc);
+ int mac = controller->mac;
+ unsigned int reg_offset;
+ uint64_t level_mask;
+
+ handle_level_irq(irq, desc);
+
+ /*
+ * Clear the INTx Level status, otherwise future interrupts are
+ * not sent.
+ */
+ reg_offset = (TRIO_PCIE_INTFC_MAC_INT_STS <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+
+ level_mask = TRIO_PCIE_INTFC_MAC_INT_STS__INT_LEVEL_MASK << intx;
+
+ __gxio_mmio_write(trio_context->mmio_base_mac + reg_offset, level_mask);
+}
+
+/*
+ * Create kernel irqs and set up the handlers for the legacy interrupts.
+ * Also some minimum initialization for the MSI support.
+ */
+static int tile_init_irqs(struct pci_controller *controller)
+{
+ int i;
+ int j;
+ int irq;
+ int result;
+
+ cpumask_copy(&intr_cpus_map, cpu_online_mask);
+
+
+ for (i = 0; i < 4; i++) {
+ gxio_trio_context_t *context = controller->trio;
+ int cpu;
+
+ /* Ask the kernel to allocate an IRQ. */
+ irq = irq_alloc_hwirq(-1);
+ if (!irq) {
+ pr_err("PCI: no free irq vectors, failed for %d\n", i);
+ goto free_irqs;
+ }
+ controller->irq_intx_table[i] = irq;
+
+ /* Distribute the 4 IRQs to different tiles. */
+ cpu = tile_irq_cpu(irq);
+
+ /* Configure the TRIO intr binding for this IRQ. */
+ result = gxio_trio_config_legacy_intr(context, cpu_x(cpu),
+ cpu_y(cpu), KERNEL_PL,
+ irq, controller->mac, i);
+ if (result < 0) {
+ pr_err("PCI: MAC intx config failed for %d\n", i);
+
+ goto free_irqs;
+ }
+
+ /* Register the IRQ handler with the kernel. */
+ irq_set_chip_and_handler(irq, &tilegx_legacy_irq_chip,
+ trio_handle_level_irq);
+ irq_set_chip_data(irq, (void *)(uint64_t)i);
+ irq_set_handler_data(irq, controller);
+ }
+
+ return 0;
+
+free_irqs:
+ for (j = 0; j < i; j++)
+ irq_free_hwirq(controller->irq_intx_table[j]);
+
+ return -1;
+}
+
+/*
+ * Return 1 if the port is strapped to operate in RC mode.
+ */
+static int
+strapped_for_rc(gxio_trio_context_t *trio_context, int mac)
+{
+ TRIO_PCIE_INTFC_PORT_CONFIG_t port_config;
+ unsigned int reg_offset;
+
+ /* Check the port configuration. */
+ reg_offset =
+ (TRIO_PCIE_INTFC_PORT_CONFIG <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+ port_config.word =
+ __gxio_mmio_read(trio_context->mmio_base_mac + reg_offset);
+
+ if (port_config.strap_state == AUTO_CONFIG_RC ||
+ port_config.strap_state == AUTO_CONFIG_RC_G1)
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * Find valid controllers and fill in pci_controller structs for each
+ * of them.
+ *
+ * Return the number of controllers discovered.
+ */
+int __init tile_pci_init(void)
+{
+ int ctl_index = 0;
+ int i, j;
+
+ if (!pci_probe) {
+ pr_info("PCI: disabled by boot argument\n");
+ return 0;
+ }
+
+ pr_info("PCI: Searching for controllers...\n");
+
+ if (num_trio_shims == 0 || sim_is_simulator())
+ return 0;
+
+ /*
+ * Now determine which PCIe ports are configured to operate in RC
+ * mode. There is a differece in the port configuration capability
+ * between the Gx36 and Gx72 devices.
+ *
+ * The Gx36 has configuration capability for each of the 3 PCIe
+ * interfaces (disable, auto endpoint, auto RC, etc.).
+ * On the Gx72, you can only select one of the 3 PCIe interfaces per
+ * TRIO to train automatically. Further, the allowable training modes
+ * are reduced to four options (auto endpoint, auto RC, stream x1,
+ * stream x4).
+ *
+ * For Gx36 ports, it must be allowed to be in RC mode by the
+ * Board Information Block, and the hardware strapping pins must be
+ * set to RC mode.
+ *
+ * For Gx72 ports, the port will operate in RC mode if either of the
+ * following is true:
+ * 1. It is allowed to be in RC mode by the Board Information Block,
+ * and the BIB doesn't allow the EP mode.
+ * 2. It is allowed to be in either the RC or the EP mode by the BIB,
+ * and the hardware strapping pin is set to RC mode.
+ */
+ for (i = 0; i < TILEGX_NUM_TRIO; i++) {
+ gxio_trio_context_t *context = &trio_contexts[i];
+
+ if (context->fd < 0)
+ continue;
+
+ for (j = 0; j < TILEGX_TRIO_PCIES; j++) {
+ int is_rc = 0;
+
+ if (pcie_ports[i].is_gx72 &&
+ pcie_ports[i].ports[j].allow_rc) {
+ if (!pcie_ports[i].ports[j].allow_ep ||
+ strapped_for_rc(context, j))
+ is_rc = 1;
+ } else if (pcie_ports[i].ports[j].allow_rc &&
+ strapped_for_rc(context, j)) {
+ is_rc = 1;
+ }
+ if (is_rc) {
+ pcie_rc[i][j] = 1;
+ num_rc_controllers++;
+ }
+ }
+ }
+
+ /* Return if no PCIe ports are configured to operate in RC mode. */
+ if (num_rc_controllers == 0)
+ return 0;
+
+ /* Set the TRIO pointer and MAC index for each PCIe RC port. */
+ for (i = 0; i < TILEGX_NUM_TRIO; i++) {
+ for (j = 0; j < TILEGX_TRIO_PCIES; j++) {
+ if (pcie_rc[i][j]) {
+ pci_controllers[ctl_index].trio =
+ &trio_contexts[i];
+ pci_controllers[ctl_index].mac = j;
+ pci_controllers[ctl_index].trio_index = i;
+ ctl_index++;
+ if (ctl_index == num_rc_controllers)
+ goto out;
+ }
+ }
+ }
+
+out:
+ /* Configure each PCIe RC port. */
+ for (i = 0; i < num_rc_controllers; i++) {
+
+ /* Configure the PCIe MAC to run in RC mode. */
+ struct pci_controller *controller = &pci_controllers[i];
+
+ controller->index = i;
+ controller->ops = &tile_cfg_ops;
+
+ controller->io_space.start = PCIBIOS_MIN_IO +
+ (i * IO_SPACE_SIZE);
+ controller->io_space.end = controller->io_space.start +
+ IO_SPACE_SIZE - 1;
+ BUG_ON(controller->io_space.end > IO_SPACE_LIMIT);
+ controller->io_space.flags = IORESOURCE_IO;
+ snprintf(controller->io_space_name,
+ sizeof(controller->io_space_name),
+ "PCI I/O domain %d", i);
+ controller->io_space.name = controller->io_space_name;
+
+ /*
+ * The PCI memory resource is located above the PA space.
+ * For every host bridge, the BAR window or the MMIO aperture
+ * is in range [3GB, 4GB - 1] of a 4GB space beyond the
+ * PA space.
+ */
+ controller->mem_offset = TILE_PCI_MEM_START +
+ (i * TILE_PCI_BAR_WINDOW_TOP);
+ controller->mem_space.start = controller->mem_offset +
+ TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE;
+ controller->mem_space.end = controller->mem_offset +
+ TILE_PCI_BAR_WINDOW_TOP - 1;
+ controller->mem_space.flags = IORESOURCE_MEM;
+ snprintf(controller->mem_space_name,
+ sizeof(controller->mem_space_name),
+ "PCI mem domain %d", i);
+ controller->mem_space.name = controller->mem_space_name;
+ }
+
+ return num_rc_controllers;
+}
+
+/*
+ * (pin - 1) converts from the PCI standard's [1:4] convention to
+ * a normal [0:3] range.
+ */
+static int tile_map_irq(const struct pci_dev *dev, u8 device, u8 pin)
+{
+ struct pci_controller *controller =
+ (struct pci_controller *)dev->sysdata;
+ return controller->irq_intx_table[pin - 1];
+}
+
+static void fixup_read_and_payload_sizes(struct pci_controller *controller)
+{
+ gxio_trio_context_t *trio_context = controller->trio;
+ struct pci_bus *root_bus = controller->root_bus;
+ TRIO_PCIE_RC_DEVICE_CONTROL_t dev_control;
+ TRIO_PCIE_RC_DEVICE_CAP_t rc_dev_cap;
+ unsigned int reg_offset;
+ struct pci_bus *child;
+ int mac;
+ int err;
+
+ mac = controller->mac;
+
+ /* Set our max read request size to be 4KB. */
+ reg_offset =
+ (TRIO_PCIE_RC_DEVICE_CONTROL <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+
+ dev_control.word = __gxio_mmio_read32(trio_context->mmio_base_mac +
+ reg_offset);
+ dev_control.max_read_req_sz = 5;
+ __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset,
+ dev_control.word);
+
+ /*
+ * Set the max payload size supported by this Gx PCIe MAC.
+ * Though Gx PCIe supports Max Payload Size of up to 1024 bytes,
+ * experiments have shown that setting MPS to 256 yields the
+ * best performance.
+ */
+ reg_offset =
+ (TRIO_PCIE_RC_DEVICE_CAP <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+
+ rc_dev_cap.word = __gxio_mmio_read32(trio_context->mmio_base_mac +
+ reg_offset);
+ rc_dev_cap.mps_sup = 1;
+ __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset,
+ rc_dev_cap.word);
+
+ /* Configure PCI Express MPS setting. */
+ list_for_each_entry(child, &root_bus->children, node)
+ pcie_bus_configure_settings(child);
+
+ /*
+ * Set the mac_config register in trio based on the MPS/MRS of the link.
+ */
+ reg_offset =
+ (TRIO_PCIE_RC_DEVICE_CONTROL <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+
+ dev_control.word = __gxio_mmio_read32(trio_context->mmio_base_mac +
+ reg_offset);
+
+ err = gxio_trio_set_mps_mrs(trio_context,
+ dev_control.max_payload_size,
+ dev_control.max_read_req_sz,
+ mac);
+ if (err < 0) {
+ pr_err("PCI: PCIE_CONFIGURE_MAC_MPS_MRS failure, "
+ "MAC %d on TRIO %d\n",
+ mac, controller->trio_index);
+ }
+}
+
+static int setup_pcie_rc_delay(char *str)
+{
+ unsigned long delay = 0;
+ unsigned long trio_index;
+ unsigned long mac;
+
+ if (str == NULL || !isdigit(*str))
+ return -EINVAL;
+ trio_index = simple_strtoul(str, (char **)&str, 10);
+ if (trio_index >= TILEGX_NUM_TRIO)
+ return -EINVAL;
+
+ if (*str != ',')
+ return -EINVAL;
+
+ str++;
+ if (!isdigit(*str))
+ return -EINVAL;
+ mac = simple_strtoul(str, (char **)&str, 10);
+ if (mac >= TILEGX_TRIO_PCIES)
+ return -EINVAL;
+
+ if (*str != '\0') {
+ if (*str != ',')
+ return -EINVAL;
+
+ str++;
+ if (!isdigit(*str))
+ return -EINVAL;
+ delay = simple_strtoul(str, (char **)&str, 10);
+ }
+
+ rc_delay[trio_index][mac] = delay ? : DEFAULT_RC_DELAY;
+ return 0;
+}
+early_param("pcie_rc_delay", setup_pcie_rc_delay);
+
+/* PCI initialization entry point, called by subsys_initcall. */
+int __init pcibios_init(void)
+{
+ resource_size_t offset;
+ LIST_HEAD(resources);
+ int next_busno;
+ int i;
+
+ tile_pci_init();
+
+ if (num_rc_controllers == 0)
+ return 0;
+
+ /*
+ * Delay a bit in case devices aren't ready. Some devices are
+ * known to require at least 20ms here, but we use a more
+ * conservative value.
+ */
+ msleep(250);
+
+ /* Scan all of the recorded PCI controllers. */
+ for (next_busno = 0, i = 0; i < num_rc_controllers; i++) {
+ struct pci_controller *controller = &pci_controllers[i];
+ gxio_trio_context_t *trio_context = controller->trio;
+ TRIO_PCIE_INTFC_PORT_STATUS_t port_status;
+ TRIO_PCIE_INTFC_TX_FIFO_CTL_t tx_fifo_ctl;
+ struct pci_bus *bus;
+ unsigned int reg_offset;
+ unsigned int class_code_revision;
+ int trio_index;
+ int mac;
+ int ret;
+
+ if (trio_context->fd < 0)
+ continue;
+
+ trio_index = controller->trio_index;
+ mac = controller->mac;
+
+ /*
+ * Check for PCIe link-up status to decide if we need
+ * to force the link to come up.
+ */
+ reg_offset =
+ (TRIO_PCIE_INTFC_PORT_STATUS <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+
+ port_status.word =
+ __gxio_mmio_read(trio_context->mmio_base_mac +
+ reg_offset);
+ if (!port_status.dl_up) {
+ if (rc_delay[trio_index][mac]) {
+ pr_info("Delaying PCIe RC TRIO init %d sec"
+ " on MAC %d on TRIO %d\n",
+ rc_delay[trio_index][mac], mac,
+ trio_index);
+ msleep(rc_delay[trio_index][mac] * 1000);
+ }
+ ret = gxio_trio_force_rc_link_up(trio_context, mac);
+ if (ret < 0)
+ pr_err("PCI: PCIE_FORCE_LINK_UP failure, "
+ "MAC %d on TRIO %d\n", mac, trio_index);
+ }
+
+ pr_info("PCI: Found PCI controller #%d on TRIO %d MAC %d\n", i,
+ trio_index, controller->mac);
+
+ /* Delay the bus probe if needed. */
+ if (rc_delay[trio_index][mac]) {
+ pr_info("Delaying PCIe RC bus enumerating %d sec"
+ " on MAC %d on TRIO %d\n",
+ rc_delay[trio_index][mac], mac,
+ trio_index);
+ msleep(rc_delay[trio_index][mac] * 1000);
+ } else {
+ /*
+ * Wait a bit here because some EP devices
+ * take longer to come up.
+ */
+ msleep(1000);
+ }
+
+ /* Check for PCIe link-up status again. */
+ port_status.word =
+ __gxio_mmio_read(trio_context->mmio_base_mac +
+ reg_offset);
+ if (!port_status.dl_up) {
+ if (pcie_ports[trio_index].ports[mac].removable) {
+ pr_info("PCI: link is down, MAC %d on TRIO %d\n",
+ mac, trio_index);
+ pr_info("This is expected if no PCIe card"
+ " is connected to this link\n");
+ } else
+ pr_err("PCI: link is down, MAC %d on TRIO %d\n",
+ mac, trio_index);
+ continue;
+ }
+
+ /*
+ * Ensure that the link can come out of L1 power down state.
+ * Strictly speaking, this is needed only in the case of
+ * heavy RC-initiated DMAs.
+ */
+ reg_offset =
+ (TRIO_PCIE_INTFC_TX_FIFO_CTL <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+ tx_fifo_ctl.word =
+ __gxio_mmio_read(trio_context->mmio_base_mac +
+ reg_offset);
+ tx_fifo_ctl.min_p_credits = 0;
+ __gxio_mmio_write(trio_context->mmio_base_mac + reg_offset,
+ tx_fifo_ctl.word);
+
+ /*
+ * Change the device ID so that Linux bus crawl doesn't confuse
+ * the internal bridge with any Tilera endpoints.
+ */
+ reg_offset =
+ (TRIO_PCIE_RC_DEVICE_ID_VEN_ID <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+
+ __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset,
+ (TILERA_GX36_RC_DEV_ID <<
+ TRIO_PCIE_RC_DEVICE_ID_VEN_ID__DEV_ID_SHIFT) |
+ TILERA_VENDOR_ID);
+
+ /* Set the internal P2P bridge class code. */
+ reg_offset =
+ (TRIO_PCIE_RC_REVISION_ID <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+
+ class_code_revision =
+ __gxio_mmio_read32(trio_context->mmio_base_mac +
+ reg_offset);
+ class_code_revision = (class_code_revision & 0xff) |
+ (PCI_CLASS_BRIDGE_PCI << 16);
+
+ __gxio_mmio_write32(trio_context->mmio_base_mac +
+ reg_offset, class_code_revision);
+
+#ifdef USE_SHARED_PCIE_CONFIG_REGION
+
+ /* Map in the MMIO space for the PIO region. */
+ offset = HV_TRIO_PIO_OFFSET(trio_context->pio_cfg_index) |
+ (((unsigned long long)mac) <<
+ TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT);
+
+#else
+
+ /* Alloc a PIO region for PCI config access per MAC. */
+ ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
+ if (ret < 0) {
+ pr_err("PCI: PCI CFG PIO alloc failure for mac %d "
+ "on TRIO %d, give up\n", mac, trio_index);
+
+ continue;
+ }
+
+ trio_context->pio_cfg_index[mac] = ret;
+
+ /* For PIO CFG, the bus_address_hi parameter is 0. */
+ ret = gxio_trio_init_pio_region_aux(trio_context,
+ trio_context->pio_cfg_index[mac],
+ mac, 0, HV_TRIO_PIO_FLAG_CONFIG_SPACE);
+ if (ret < 0) {
+ pr_err("PCI: PCI CFG PIO init failure for mac %d "
+ "on TRIO %d, give up\n", mac, trio_index);
+
+ continue;
+ }
+
+ offset = HV_TRIO_PIO_OFFSET(trio_context->pio_cfg_index[mac]) |
+ (((unsigned long long)mac) <<
+ TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT);
+
+#endif
+
+ /*
+ * To save VMALLOC space, we take advantage of the fact that
+ * bit 29 in the PIO CFG address format is reserved 0. With
+ * TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT being 30,
+ * this cuts VMALLOC space usage from 1GB to 512MB per mac.
+ */
+ trio_context->mmio_base_pio_cfg[mac] =
+ iorpc_ioremap(trio_context->fd, offset, (1UL <<
+ (TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT - 1)));
+ if (trio_context->mmio_base_pio_cfg[mac] == NULL) {
+ pr_err("PCI: PIO map failure for mac %d on TRIO %d\n",
+ mac, trio_index);
+
+ continue;
+ }
+
+ /* Initialize the PCIe interrupts. */
+ if (tile_init_irqs(controller)) {
+ pr_err("PCI: IRQs init failure for mac %d on TRIO %d\n",
+ mac, trio_index);
+
+ continue;
+ }
+
+ /*
+ * The PCI memory resource is located above the PA space.
+ * The memory range for the PCI root bus should not overlap
+ * with the physical RAM.
+ */
+ pci_add_resource_offset(&resources, &controller->mem_space,
+ controller->mem_offset);
+ pci_add_resource(&resources, &controller->io_space);
+ controller->first_busno = next_busno;
+ bus = pci_scan_root_bus(NULL, next_busno, controller->ops,
+ controller, &resources);
+ controller->root_bus = bus;
+ next_busno = bus->busn_res.end + 1;
+ }
+
+ /* Do machine dependent PCI interrupt routing */
+ pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
+
+ /*
+ * This comes from the generic Linux PCI driver.
+ *
+ * It allocates all of the resources (I/O memory, etc)
+ * associated with the devices read in above.
+ */
+ pci_assign_unassigned_resources();
+
+ /* Record the I/O resources in the PCI controller structure. */
+ for (i = 0; i < num_rc_controllers; i++) {
+ struct pci_controller *controller = &pci_controllers[i];
+ gxio_trio_context_t *trio_context = controller->trio;
+ struct pci_bus *root_bus = pci_controllers[i].root_bus;
+ int ret;
+ int j;
+
+ /*
+ * Skip controllers that are not properly initialized or
+ * have down links.
+ */
+ if (root_bus == NULL)
+ continue;
+
+ /* Configure the max_payload_size values for this domain. */
+ fixup_read_and_payload_sizes(controller);
+
+ /* Alloc a PIO region for PCI memory access for each RC port. */
+ ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
+ if (ret < 0) {
+ pr_err("PCI: MEM PIO alloc failure on TRIO %d mac %d, "
+ "give up\n", controller->trio_index,
+ controller->mac);
+
+ continue;
+ }
+
+ controller->pio_mem_index = ret;
+
+ /*
+ * For PIO MEM, the bus_address_hi parameter is hard-coded 0
+ * because we always assign 32-bit PCI bus BAR ranges.
+ */
+ ret = gxio_trio_init_pio_region_aux(trio_context,
+ controller->pio_mem_index,
+ controller->mac,
+ 0,
+ 0);
+ if (ret < 0) {
+ pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, "
+ "give up\n", controller->trio_index,
+ controller->mac);
+
+ continue;
+ }
+
+#ifdef CONFIG_TILE_PCI_IO
+ /*
+ * Alloc a PIO region for PCI I/O space access for each RC port.
+ */
+ ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
+ if (ret < 0) {
+ pr_err("PCI: I/O PIO alloc failure on TRIO %d mac %d, "
+ "give up\n", controller->trio_index,
+ controller->mac);
+
+ continue;
+ }
+
+ controller->pio_io_index = ret;
+
+ /*
+ * For PIO IO, the bus_address_hi parameter is hard-coded 0
+ * because PCI I/O address space is 32-bit.
+ */
+ ret = gxio_trio_init_pio_region_aux(trio_context,
+ controller->pio_io_index,
+ controller->mac,
+ 0,
+ HV_TRIO_PIO_FLAG_IO_SPACE);
+ if (ret < 0) {
+ pr_err("PCI: I/O PIO init failure on TRIO %d mac %d, "
+ "give up\n", controller->trio_index,
+ controller->mac);
+
+ continue;
+ }
+#endif
+
+ /*
+ * Configure a Mem-Map region for each memory controller so
+ * that Linux can map all of its PA space to the PCI bus.
+ * Use the IOMMU to handle hash-for-home memory.
+ */
+ for_each_online_node(j) {
+ unsigned long start_pfn = node_start_pfn[j];
+ unsigned long end_pfn = node_end_pfn[j];
+ unsigned long nr_pages = end_pfn - start_pfn;
+
+ ret = gxio_trio_alloc_memory_maps(trio_context, 1, 0,
+ 0);
+ if (ret < 0) {
+ pr_err("PCI: Mem-Map alloc failure on TRIO %d "
+ "mac %d for MC %d, give up\n",
+ controller->trio_index,
+ controller->mac, j);
+
+ goto alloc_mem_map_failed;
+ }
+
+ controller->mem_maps[j] = ret;
+
+ /*
+ * Initialize the Mem-Map and the I/O MMU so that all
+ * the physical memory can be accessed by the endpoint
+ * devices. The base bus address is set to the base CPA
+ * of this memory controller plus an offset (see pci.h).
+ * The region's base VA is set to the base CPA. The
+ * I/O MMU table essentially translates the CPA to
+ * the real PA. Implicitly, for node 0, we create
+ * a separate Mem-Map region that serves as the inbound
+ * window for legacy 32-bit devices. This is a direct
+ * map of the low 4GB CPA space.
+ */
+ ret = gxio_trio_init_memory_map_mmu_aux(trio_context,
+ controller->mem_maps[j],
+ start_pfn << PAGE_SHIFT,
+ nr_pages << PAGE_SHIFT,
+ trio_context->asid,
+ controller->mac,
+ (start_pfn << PAGE_SHIFT) +
+ TILE_PCI_MEM_MAP_BASE_OFFSET,
+ j,
+ GXIO_TRIO_ORDER_MODE_UNORDERED);
+ if (ret < 0) {
+ pr_err("PCI: Mem-Map init failure on TRIO %d "
+ "mac %d for MC %d, give up\n",
+ controller->trio_index,
+ controller->mac, j);
+
+ goto alloc_mem_map_failed;
+ }
+ continue;
+
+alloc_mem_map_failed:
+ break;
+ }
+ }
+
+ return 0;
+}
+subsys_initcall(pcibios_init);
+
+/* No bus fixups needed. */
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+}
+
+/* Process any "pci=" kernel boot arguments. */
+char *__init pcibios_setup(char *str)
+{
+ if (!strcmp(str, "off")) {
+ pci_probe = 0;
+ return NULL;
+ }
+ return str;
+}
+
+/*
+ * Called for each device after PCI setup is done.
+ * We initialize the PCI device capabilities conservatively, assuming that
+ * all devices can only address the 32-bit DMA space. The exception here is
+ * that the device dma_offset is set to the value that matches the 64-bit
+ * capable devices. This is OK because dma_offset is not used by legacy
+ * dma_ops, nor by the hybrid dma_ops's streaming DMAs, which are 64-bit ops.
+ * This implementation matches the kernel design of setting PCI devices'
+ * coherent_dma_mask to 0xffffffffull by default, allowing the device drivers
+ * to skip calling pci_set_consistent_dma_mask(DMA_BIT_MASK(32)).
+ */
+static void pcibios_fixup_final(struct pci_dev *pdev)
+{
+ set_dma_ops(&pdev->dev, gx_legacy_pci_dma_map_ops);
+ set_dma_offset(&pdev->dev, TILE_PCI_MEM_MAP_BASE_OFFSET);
+ pdev->dev.archdata.max_direct_dma_addr =
+ TILE_PCI_MAX_DIRECT_DMA_ADDRESS;
+ pdev->dev.coherent_dma_mask = TILE_PCI_MAX_DIRECT_DMA_ADDRESS;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
+
+/* Map a PCI MMIO bus address into VA space. */
+void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
+{
+ struct pci_controller *controller = NULL;
+ resource_size_t bar_start;
+ resource_size_t bar_end;
+ resource_size_t offset;
+ resource_size_t start;
+ resource_size_t end;
+ int trio_fd;
+ int i;
+
+ start = phys_addr;
+ end = phys_addr + size - 1;
+
+ /*
+ * By searching phys_addr in each controller's mem_space, we can
+ * determine the controller that should accept the PCI memory access.
+ */
+ for (i = 0; i < num_rc_controllers; i++) {
+ /*
+ * Skip controllers that are not properly initialized or
+ * have down links.
+ */
+ if (pci_controllers[i].root_bus == NULL)
+ continue;
+
+ bar_start = pci_controllers[i].mem_space.start;
+ bar_end = pci_controllers[i].mem_space.end;
+
+ if ((start >= bar_start) && (end <= bar_end)) {
+ controller = &pci_controllers[i];
+ break;
+ }
+ }
+
+ if (controller == NULL)
+ return NULL;
+
+ trio_fd = controller->trio->fd;
+
+ /* Convert the resource start to the bus address offset. */
+ start = phys_addr - controller->mem_offset;
+
+ offset = HV_TRIO_PIO_OFFSET(controller->pio_mem_index) + start;
+
+ /* We need to keep the PCI bus address's in-page offset in the VA. */
+ return iorpc_ioremap(trio_fd, offset, size) +
+ (start & (PAGE_SIZE - 1));
+}
+EXPORT_SYMBOL(ioremap);
+
+#ifdef CONFIG_TILE_PCI_IO
+/* Map a PCI I/O address into VA space. */
+void __iomem *ioport_map(unsigned long port, unsigned int size)
+{
+ struct pci_controller *controller = NULL;
+ resource_size_t bar_start;
+ resource_size_t bar_end;
+ resource_size_t offset;
+ resource_size_t start;
+ resource_size_t end;
+ int trio_fd;
+ int i;
+
+ start = port;
+ end = port + size - 1;
+
+ /*
+ * By searching the port in each controller's io_space, we can
+ * determine the controller that should accept the PCI I/O access.
+ */
+ for (i = 0; i < num_rc_controllers; i++) {
+ /*
+ * Skip controllers that are not properly initialized or
+ * have down links.
+ */
+ if (pci_controllers[i].root_bus == NULL)
+ continue;
+
+ bar_start = pci_controllers[i].io_space.start;
+ bar_end = pci_controllers[i].io_space.end;
+
+ if ((start >= bar_start) && (end <= bar_end)) {
+ controller = &pci_controllers[i];
+ break;
+ }
+ }
+
+ if (controller == NULL)
+ return NULL;
+
+ trio_fd = controller->trio->fd;
+
+ /* Convert the resource start to the bus address offset. */
+ port -= controller->io_space.start;
+
+ offset = HV_TRIO_PIO_OFFSET(controller->pio_io_index) + port;
+
+ /* We need to keep the PCI bus address's in-page offset in the VA. */
+ return iorpc_ioremap(trio_fd, offset, size) + (port & (PAGE_SIZE - 1));
+}
+EXPORT_SYMBOL(ioport_map);
+
+void ioport_unmap(void __iomem *addr)
+{
+ iounmap(addr);
+}
+EXPORT_SYMBOL(ioport_unmap);
+#endif
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+ iounmap(addr);
+}
+EXPORT_SYMBOL(pci_iounmap);
+
+/****************************************************************
+ *
+ * Tile PCI config space read/write routines
+ *
+ ****************************************************************/
+
+/*
+ * These are the normal read and write ops
+ * These are expanded with macros from pci_bus_read_config_byte() etc.
+ *
+ * devfn is the combined PCI device & function.
+ *
+ * offset is in bytes, from the start of config space for the
+ * specified bus & device.
+ */
+static int tile_cfg_read(struct pci_bus *bus, unsigned int devfn, int offset,
+ int size, u32 *val)
+{
+ struct pci_controller *controller = bus->sysdata;
+ gxio_trio_context_t *trio_context = controller->trio;
+ int busnum = bus->number & 0xff;
+ int device = PCI_SLOT(devfn);
+ int function = PCI_FUNC(devfn);
+ int config_type = 1;
+ TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR_t cfg_addr;
+ void *mmio_addr;
+
+ /*
+ * Map all accesses to the local device on root bus into the
+ * MMIO space of the MAC. Accesses to the downstream devices
+ * go to the PIO space.
+ */
+ if (pci_is_root_bus(bus)) {
+ if (device == 0) {
+ /*
+ * This is the internal downstream P2P bridge,
+ * access directly.
+ */
+ unsigned int reg_offset;
+
+ reg_offset = ((offset & 0xFFF) <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_PROTECTED
+ << TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
+ (controller->mac <<
+ TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+
+ mmio_addr = trio_context->mmio_base_mac + reg_offset;
+
+ goto valid_device;
+
+ } else {
+ /*
+ * We fake an empty device for (device > 0),
+ * since there is only one device on bus 0.
+ */
+ goto invalid_device;
+ }
+ }
+
+ /*
+ * Accesses to the directly attached device have to be
+ * sent as type-0 configs.
+ */
+ if (busnum == (controller->first_busno + 1)) {
+ /*
+ * There is only one device off of our built-in P2P bridge.
+ */
+ if (device != 0)
+ goto invalid_device;
+
+ config_type = 0;
+ }
+
+ cfg_addr.word = 0;
+ cfg_addr.reg_addr = (offset & 0xFFF);
+ cfg_addr.fn = function;
+ cfg_addr.dev = device;
+ cfg_addr.bus = busnum;
+ cfg_addr.type = config_type;
+
+ /*
+ * Note that we don't set the mac field in cfg_addr because the
+ * mapping is per port.
+ */
+ mmio_addr = trio_context->mmio_base_pio_cfg[controller->mac] +
+ cfg_addr.word;
+
+valid_device:
+
+ switch (size) {
+ case 4:
+ *val = __gxio_mmio_read32(mmio_addr);
+ break;
+
+ case 2:
+ *val = __gxio_mmio_read16(mmio_addr);
+ break;
+
+ case 1:
+ *val = __gxio_mmio_read8(mmio_addr);
+ break;
+
+ default:
+ return PCIBIOS_FUNC_NOT_SUPPORTED;
+ }
+
+ TRACE_CFG_RD(size, *val, busnum, device, function, offset);
+
+ return 0;
+
+invalid_device:
+
+ switch (size) {
+ case 4:
+ *val = 0xFFFFFFFF;
+ break;
+
+ case 2:
+ *val = 0xFFFF;
+ break;
+
+ case 1:
+ *val = 0xFF;
+ break;
+
+ default:
+ return PCIBIOS_FUNC_NOT_SUPPORTED;
+ }
+
+ return 0;
+}
+
+
+/*
+ * See tile_cfg_read() for relevent comments.
+ * Note that "val" is the value to write, not a pointer to that value.
+ */
+static int tile_cfg_write(struct pci_bus *bus, unsigned int devfn, int offset,
+ int size, u32 val)
+{
+ struct pci_controller *controller = bus->sysdata;
+ gxio_trio_context_t *trio_context = controller->trio;
+ int busnum = bus->number & 0xff;
+ int device = PCI_SLOT(devfn);
+ int function = PCI_FUNC(devfn);
+ int config_type = 1;
+ TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR_t cfg_addr;
+ void *mmio_addr;
+ u32 val_32 = (u32)val;
+ u16 val_16 = (u16)val;
+ u8 val_8 = (u8)val;
+
+ /*
+ * Map all accesses to the local device on root bus into the
+ * MMIO space of the MAC. Accesses to the downstream devices
+ * go to the PIO space.
+ */
+ if (pci_is_root_bus(bus)) {
+ if (device == 0) {
+ /*
+ * This is the internal downstream P2P bridge,
+ * access directly.
+ */
+ unsigned int reg_offset;
+
+ reg_offset = ((offset & 0xFFF) <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_PROTECTED
+ << TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
+ (controller->mac <<
+ TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+
+ mmio_addr = trio_context->mmio_base_mac + reg_offset;
+
+ goto valid_device;
+
+ } else {
+ /*
+ * We fake an empty device for (device > 0),
+ * since there is only one device on bus 0.
+ */
+ goto invalid_device;
+ }
+ }
+
+ /*
+ * Accesses to the directly attached device have to be
+ * sent as type-0 configs.
+ */
+ if (busnum == (controller->first_busno + 1)) {
+ /*
+ * There is only one device off of our built-in P2P bridge.
+ */
+ if (device != 0)
+ goto invalid_device;
+
+ config_type = 0;
+ }
+
+ cfg_addr.word = 0;
+ cfg_addr.reg_addr = (offset & 0xFFF);
+ cfg_addr.fn = function;
+ cfg_addr.dev = device;
+ cfg_addr.bus = busnum;
+ cfg_addr.type = config_type;
+
+ /*
+ * Note that we don't set the mac field in cfg_addr because the
+ * mapping is per port.
+ */
+ mmio_addr = trio_context->mmio_base_pio_cfg[controller->mac] +
+ cfg_addr.word;
+
+valid_device:
+
+ switch (size) {
+ case 4:
+ __gxio_mmio_write32(mmio_addr, val_32);
+ TRACE_CFG_WR(size, val_32, busnum, device, function, offset);
+ break;
+
+ case 2:
+ __gxio_mmio_write16(mmio_addr, val_16);
+ TRACE_CFG_WR(size, val_16, busnum, device, function, offset);
+ break;
+
+ case 1:
+ __gxio_mmio_write8(mmio_addr, val_8);
+ TRACE_CFG_WR(size, val_8, busnum, device, function, offset);
+ break;
+
+ default:
+ return PCIBIOS_FUNC_NOT_SUPPORTED;
+ }
+
+invalid_device:
+
+ return 0;
+}
+
+
+static struct pci_ops tile_cfg_ops = {
+ .read = tile_cfg_read,
+ .write = tile_cfg_write,
+};
+
+
+/* MSI support starts here. */
+static unsigned int tilegx_msi_startup(struct irq_data *d)
+{
+ if (d->msi_desc)
+ unmask_msi_irq(d);
+
+ return 0;
+}
+
+static void tilegx_msi_ack(struct irq_data *d)
+{
+ __insn_mtspr(SPR_IPI_EVENT_RESET_K, 1UL << d->irq);
+}
+
+static void tilegx_msi_mask(struct irq_data *d)
+{
+ mask_msi_irq(d);
+ __insn_mtspr(SPR_IPI_MASK_SET_K, 1UL << d->irq);
+}
+
+static void tilegx_msi_unmask(struct irq_data *d)
+{
+ __insn_mtspr(SPR_IPI_MASK_RESET_K, 1UL << d->irq);
+ unmask_msi_irq(d);
+}
+
+static struct irq_chip tilegx_msi_chip = {
+ .name = "tilegx_msi",
+ .irq_startup = tilegx_msi_startup,
+ .irq_ack = tilegx_msi_ack,
+ .irq_mask = tilegx_msi_mask,
+ .irq_unmask = tilegx_msi_unmask,
+
+ /* TBD: support set_affinity. */
+};
+
+int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+{
+ struct pci_controller *controller;
+ gxio_trio_context_t *trio_context;
+ struct msi_msg msg;
+ int default_irq;
+ uint64_t mem_map_base;
+ uint64_t mem_map_limit;
+ u64 msi_addr;
+ int mem_map;
+ int cpu;
+ int irq;
+ int ret;
+
+ irq = irq_alloc_hwirq(-1);
+ if (!irq)
+ return -ENOSPC;
+
+ /*
+ * Since we use a 64-bit Mem-Map to accept the MSI write, we fail
+ * devices that are not capable of generating a 64-bit message address.
+ * These devices will fall back to using the legacy interrupts.
+ * Most PCIe endpoint devices do support 64-bit message addressing.
+ */
+ if (desc->msi_attrib.is_64 == 0) {
+ dev_printk(KERN_INFO, &pdev->dev,
+ "64-bit MSI message address not supported, "
+ "falling back to legacy interrupts.\n");
+
+ ret = -ENOMEM;
+ goto is_64_failure;
+ }
+
+ default_irq = desc->msi_attrib.default_irq;
+ controller = irq_get_handler_data(default_irq);
+
+ BUG_ON(!controller);
+
+ trio_context = controller->trio;
+
+ /*
+ * Allocate a scatter-queue that will accept the MSI write and
+ * trigger the TILE-side interrupts. We use the scatter-queue regions
+ * before the mem map regions, because the latter are needed by more
+ * applications.
+ */
+ mem_map = gxio_trio_alloc_scatter_queues(trio_context, 1, 0, 0);
+ if (mem_map >= 0) {
+ TRIO_MAP_SQ_DOORBELL_FMT_t doorbell_template = {{
+ .pop = 0,
+ .doorbell = 1,
+ }};
+
+ mem_map += TRIO_NUM_MAP_MEM_REGIONS;
+ mem_map_base = MEM_MAP_INTR_REGIONS_BASE +
+ mem_map * MEM_MAP_INTR_REGION_SIZE;
+ mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1;
+
+ msi_addr = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 8;
+ msg.data = (unsigned int)doorbell_template.word;
+ } else {
+ /* SQ regions are out, allocate from map mem regions. */
+ mem_map = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0);
+ if (mem_map < 0) {
+ dev_printk(KERN_INFO, &pdev->dev,
+ "%s Mem-Map alloc failure. "
+ "Failed to initialize MSI interrupts. "
+ "Falling back to legacy interrupts.\n",
+ desc->msi_attrib.is_msix ? "MSI-X" : "MSI");
+ ret = -ENOMEM;
+ goto msi_mem_map_alloc_failure;
+ }
+
+ mem_map_base = MEM_MAP_INTR_REGIONS_BASE +
+ mem_map * MEM_MAP_INTR_REGION_SIZE;
+ mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1;
+
+ msi_addr = mem_map_base + TRIO_MAP_MEM_REG_INT3 -
+ TRIO_MAP_MEM_REG_INT0;
+
+ msg.data = mem_map;
+ }
+
+ /* We try to distribute different IRQs to different tiles. */
+ cpu = tile_irq_cpu(irq);
+
+ /*
+ * Now call up to the HV to configure the MSI interrupt and
+ * set up the IPI binding.
+ */
+ ret = gxio_trio_config_msi_intr(trio_context, cpu_x(cpu), cpu_y(cpu),
+ KERNEL_PL, irq, controller->mac,
+ mem_map, mem_map_base, mem_map_limit,
+ trio_context->asid);
+ if (ret < 0) {
+ dev_printk(KERN_INFO, &pdev->dev, "HV MSI config failed.\n");
+
+ goto hv_msi_config_failure;
+ }
+
+ irq_set_msi_desc(irq, desc);
+
+ msg.address_hi = msi_addr >> 32;
+ msg.address_lo = msi_addr & 0xffffffff;
+
+ write_msi_msg(irq, &msg);
+ irq_set_chip_and_handler(irq, &tilegx_msi_chip, handle_level_irq);
+ irq_set_handler_data(irq, controller);
+
+ return 0;
+
+hv_msi_config_failure:
+ /* Free mem-map */
+msi_mem_map_alloc_failure:
+is_64_failure:
+ irq_free_hwirq(irq);
+ return ret;
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+ irq_free_hwirq(irq);
+}
diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c
new file mode 100644
index 00000000000..2bf6c9c135c
--- /dev/null
+++ b/arch/tile/kernel/perf_event.c
@@ -0,0 +1,1005 @@
+/*
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ *
+ * Perf_events support for Tile processor.
+ *
+ * This code is based upon the x86 perf event
+ * code, which is:
+ *
+ * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ * Copyright (C) 2009 Jaswinder Singh Rajput
+ * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ * Copyright (C) 2009 Google, Inc., Stephane Eranian
+ */
+
+#include <linux/kprobes.h>
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+#include <linux/bitmap.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+#include <linux/atomic.h>
+#include <asm/traps.h>
+#include <asm/stack.h>
+#include <asm/pmc.h>
+#include <hv/hypervisor.h>
+
+#define TILE_MAX_COUNTERS 4
+
+#define PERF_COUNT_0_IDX 0
+#define PERF_COUNT_1_IDX 1
+#define AUX_PERF_COUNT_0_IDX 2
+#define AUX_PERF_COUNT_1_IDX 3
+
+struct cpu_hw_events {
+ int n_events;
+ struct perf_event *events[TILE_MAX_COUNTERS]; /* counter order */
+ struct perf_event *event_list[TILE_MAX_COUNTERS]; /* enabled
+ order */
+ int assign[TILE_MAX_COUNTERS];
+ unsigned long active_mask[BITS_TO_LONGS(TILE_MAX_COUNTERS)];
+ unsigned long used_mask;
+};
+
+/* TILE arch specific performance monitor unit */
+struct tile_pmu {
+ const char *name;
+ int version;
+ const int *hw_events; /* generic hw events table */
+ /* generic hw cache events table */
+ const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
+ int (*map_hw_event)(u64); /*method used to map
+ hw events */
+ int (*map_cache_event)(u64); /*method used to map
+ cache events */
+
+ u64 max_period; /* max sampling period */
+ u64 cntval_mask; /* counter width mask */
+ int cntval_bits; /* counter width */
+ int max_events; /* max generic hw events
+ in map */
+ int num_counters; /* number base + aux counters */
+ int num_base_counters; /* number base counters */
+};
+
+DEFINE_PER_CPU(u64, perf_irqs);
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+#define TILE_OP_UNSUPP (-1)
+
+#ifndef __tilegx__
+/* TILEPro hardware events map */
+static const int tile_hw_event_map[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x01, /* ONE */
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x06, /* MP_BUNDLE_RETIRED */
+ [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP,
+ [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x16, /*
+ MP_CONDITIONAL_BRANCH_ISSUED */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x14, /*
+ MP_CONDITIONAL_BRANCH_MISSPREDICT */
+ [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP,
+};
+#else
+/* TILEGx hardware events map */
+static const int tile_hw_event_map[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x181, /* ONE */
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0xdb, /* INSTRUCTION_BUNDLE */
+ [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP,
+ [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0xd9, /*
+ COND_BRANCH_PRED_CORRECT */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0xda, /*
+ COND_BRANCH_PRED_INCORRECT */
+ [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP,
+};
+#endif
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Generalized hw caching related hw_event table, filled
+ * in on a per model basis. A value of -1 means
+ * 'not supported', any other value means the
+ * raw hw_event ID.
+ */
+#ifndef __tilegx__
+/* TILEPro hardware cache event map */
+static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = 0x21, /* RD_MISS */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = 0x22, /* WR_MISS */
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+[C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x12, /* MP_ICACHE_HIT_ISSUED */
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+[C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+[C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x1d, /* TLB_CNT */
+ [C(RESULT_MISS)] = 0x20, /* TLB_EXCEPTION */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+[C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x13, /* MP_ITLB_HIT_ISSUED */
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+[C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+};
+#else
+/* TILEGx hardware events map */
+static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+ /*
+ * Like some other architectures (e.g. ARM), the performance
+ * counters don't differentiate between read and write
+ * accesses/misses, so this isn't strictly correct, but it's the
+ * best we can do. Writes and reads get combined.
+ */
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = 0x44, /* RD_MISS */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = 0x45, /* WR_MISS */
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+[C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+[C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+[C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */
+ [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */
+ [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+[C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+[C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+ [C(RESULT_MISS)] = TILE_OP_UNSUPP,
+ },
+},
+};
+#endif
+
+static atomic_t tile_active_events;
+static DEFINE_MUTEX(perf_intr_reserve_mutex);
+
+static int tile_map_hw_event(u64 config);
+static int tile_map_cache_event(u64 config);
+
+static int tile_pmu_handle_irq(struct pt_regs *regs, int fault);
+
+/*
+ * To avoid new_raw_count getting larger then pre_raw_count
+ * in tile_perf_event_update(), we limit the value of max_period to 2^31 - 1.
+ */
+static const struct tile_pmu tilepmu = {
+#ifndef __tilegx__
+ .name = "tilepro",
+#else
+ .name = "tilegx",
+#endif
+ .max_events = ARRAY_SIZE(tile_hw_event_map),
+ .map_hw_event = tile_map_hw_event,
+ .hw_events = tile_hw_event_map,
+ .map_cache_event = tile_map_cache_event,
+ .cache_events = &tile_cache_event_map,
+ .cntval_bits = 32,
+ .cntval_mask = (1ULL << 32) - 1,
+ .max_period = (1ULL << 31) - 1,
+ .num_counters = TILE_MAX_COUNTERS,
+ .num_base_counters = TILE_BASE_COUNTERS,
+};
+
+static const struct tile_pmu *tile_pmu __read_mostly;
+
+/*
+ * Check whether perf event is enabled.
+ */
+int tile_perf_enabled(void)
+{
+ return atomic_read(&tile_active_events) != 0;
+}
+
+/*
+ * Read Performance Counters.
+ */
+static inline u64 read_counter(int idx)
+{
+ u64 val = 0;
+
+ /* __insn_mfspr() only takes an immediate argument */
+ switch (idx) {
+ case PERF_COUNT_0_IDX:
+ val = __insn_mfspr(SPR_PERF_COUNT_0);
+ break;
+ case PERF_COUNT_1_IDX:
+ val = __insn_mfspr(SPR_PERF_COUNT_1);
+ break;
+ case AUX_PERF_COUNT_0_IDX:
+ val = __insn_mfspr(SPR_AUX_PERF_COUNT_0);
+ break;
+ case AUX_PERF_COUNT_1_IDX:
+ val = __insn_mfspr(SPR_AUX_PERF_COUNT_1);
+ break;
+ default:
+ WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX ||
+ idx < PERF_COUNT_0_IDX);
+ }
+
+ return val;
+}
+
+/*
+ * Write Performance Counters.
+ */
+static inline void write_counter(int idx, u64 value)
+{
+ /* __insn_mtspr() only takes an immediate argument */
+ switch (idx) {
+ case PERF_COUNT_0_IDX:
+ __insn_mtspr(SPR_PERF_COUNT_0, value);
+ break;
+ case PERF_COUNT_1_IDX:
+ __insn_mtspr(SPR_PERF_COUNT_1, value);
+ break;
+ case AUX_PERF_COUNT_0_IDX:
+ __insn_mtspr(SPR_AUX_PERF_COUNT_0, value);
+ break;
+ case AUX_PERF_COUNT_1_IDX:
+ __insn_mtspr(SPR_AUX_PERF_COUNT_1, value);
+ break;
+ default:
+ WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX ||
+ idx < PERF_COUNT_0_IDX);
+ }
+}
+
+/*
+ * Enable performance event by setting
+ * Performance Counter Control registers.
+ */
+static inline void tile_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long cfg, mask;
+ int shift, idx = hwc->idx;
+
+ /*
+ * prevent early activation from tile_pmu_start() in hw_perf_enable
+ */
+
+ if (WARN_ON_ONCE(idx == -1))
+ return;
+
+ if (idx < tile_pmu->num_base_counters)
+ cfg = __insn_mfspr(SPR_PERF_COUNT_CTL);
+ else
+ cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL);
+
+ switch (idx) {
+ case PERF_COUNT_0_IDX:
+ case AUX_PERF_COUNT_0_IDX:
+ mask = TILE_EVENT_MASK;
+ shift = 0;
+ break;
+ case PERF_COUNT_1_IDX:
+ case AUX_PERF_COUNT_1_IDX:
+ mask = TILE_EVENT_MASK << 16;
+ shift = 16;
+ break;
+ default:
+ WARN_ON_ONCE(idx < PERF_COUNT_0_IDX ||
+ idx > AUX_PERF_COUNT_1_IDX);
+ return;
+ }
+
+ /* Clear mask bits to enable the event. */
+ cfg &= ~mask;
+ cfg |= hwc->config << shift;
+
+ if (idx < tile_pmu->num_base_counters)
+ __insn_mtspr(SPR_PERF_COUNT_CTL, cfg);
+ else
+ __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg);
+}
+
+/*
+ * Disable performance event by clearing
+ * Performance Counter Control registers.
+ */
+static inline void tile_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long cfg, mask;
+ int idx = hwc->idx;
+
+ if (idx == -1)
+ return;
+
+ if (idx < tile_pmu->num_base_counters)
+ cfg = __insn_mfspr(SPR_PERF_COUNT_CTL);
+ else
+ cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL);
+
+ switch (idx) {
+ case PERF_COUNT_0_IDX:
+ case AUX_PERF_COUNT_0_IDX:
+ mask = TILE_PLM_MASK;
+ break;
+ case PERF_COUNT_1_IDX:
+ case AUX_PERF_COUNT_1_IDX:
+ mask = TILE_PLM_MASK << 16;
+ break;
+ default:
+ WARN_ON_ONCE(idx < PERF_COUNT_0_IDX ||
+ idx > AUX_PERF_COUNT_1_IDX);
+ return;
+ }
+
+ /* Set mask bits to disable the event. */
+ cfg |= mask;
+
+ if (idx < tile_pmu->num_base_counters)
+ __insn_mtspr(SPR_PERF_COUNT_CTL, cfg);
+ else
+ __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg);
+}
+
+/*
+ * Propagate event elapsed time into the generic event.
+ * Can only be executed on the CPU where the event is active.
+ * Returns the delta events processed.
+ */
+static u64 tile_perf_event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int shift = 64 - tile_pmu->cntval_bits;
+ u64 prev_raw_count, new_raw_count;
+ u64 oldval;
+ int idx = hwc->idx;
+ u64 delta;
+
+ /*
+ * Careful: an NMI might modify the previous event value.
+ *
+ * Our tactic to handle this is to first atomically read and
+ * exchange a new raw count - then add that new-prev delta
+ * count to the generic event atomically:
+ */
+again:
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = read_counter(idx);
+
+ oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count);
+ if (oldval != prev_raw_count)
+ goto again;
+
+ /*
+ * Now we have the new raw value and have updated the prev
+ * timestamp already. We can now calculate the elapsed delta
+ * (event-)time and add that to the generic event.
+ *
+ * Careful, not all hw sign-extends above the physical width
+ * of the count.
+ */
+ delta = (new_raw_count << shift) - (prev_raw_count << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+
+ return new_raw_count;
+}
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the event disabled in hw:
+ */
+static int tile_event_set_period(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int ret = 0;
+
+ /*
+ * If we are way outside a reasonable range then just skip forward:
+ */
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+
+ if (unlikely(left <= 0)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+ if (left > tile_pmu->max_period)
+ left = tile_pmu->max_period;
+
+ /*
+ * The hw event starts counting from this event offset,
+ * mark it to be able to extra future deltas:
+ */
+ local64_set(&hwc->prev_count, (u64)-left);
+
+ write_counter(idx, (u64)(-left) & tile_pmu->cntval_mask);
+
+ perf_event_update_userpage(event);
+
+ return ret;
+}
+
+/*
+ * Stop the event but do not release the PMU counter
+ */
+static void tile_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (__test_and_clear_bit(idx, cpuc->active_mask)) {
+ tile_pmu_disable_event(event);
+ cpuc->events[hwc->idx] = NULL;
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+ }
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ /*
+ * Drain the remaining delta count out of a event
+ * that we are disabling:
+ */
+ tile_perf_event_update(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+/*
+ * Start an event (without re-assigning counter)
+ */
+static void tile_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ int idx = event->hw.idx;
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ if (WARN_ON_ONCE(idx == -1))
+ return;
+
+ if (flags & PERF_EF_RELOAD) {
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+ tile_event_set_period(event);
+ }
+
+ event->hw.state = 0;
+
+ cpuc->events[idx] = event;
+ __set_bit(idx, cpuc->active_mask);
+
+ unmask_pmc_interrupts();
+
+ tile_pmu_enable_event(event);
+
+ perf_event_update_userpage(event);
+}
+
+/*
+ * Add a single event to the PMU.
+ *
+ * The event is added to the group of enabled events
+ * but only if it can be scehduled with existing events.
+ */
+static int tile_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc;
+ unsigned long mask;
+ int b, max_cnt;
+
+ hwc = &event->hw;
+
+ /*
+ * We are full.
+ */
+ if (cpuc->n_events == tile_pmu->num_counters)
+ return -ENOSPC;
+
+ cpuc->event_list[cpuc->n_events] = event;
+ cpuc->n_events++;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ if (!(flags & PERF_EF_START))
+ hwc->state |= PERF_HES_ARCH;
+
+ /*
+ * Find first empty counter.
+ */
+ max_cnt = tile_pmu->num_counters;
+ mask = ~cpuc->used_mask;
+
+ /* Find next free counter. */
+ b = find_next_bit(&mask, max_cnt, 0);
+
+ /* Should not happen. */
+ if (WARN_ON_ONCE(b == max_cnt))
+ return -ENOSPC;
+
+ /*
+ * Assign counter to event.
+ */
+ event->hw.idx = b;
+ __set_bit(b, &cpuc->used_mask);
+
+ /*
+ * Start if requested.
+ */
+ if (flags & PERF_EF_START)
+ tile_pmu_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+/*
+ * Delete a single event from the PMU.
+ *
+ * The event is deleted from the group of enabled events.
+ * If it is the last event, disable PMU interrupt.
+ */
+static void tile_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ int i;
+
+ /*
+ * Remove event from list, compact list if necessary.
+ */
+ for (i = 0; i < cpuc->n_events; i++) {
+ if (cpuc->event_list[i] == event) {
+ while (++i < cpuc->n_events)
+ cpuc->event_list[i-1] = cpuc->event_list[i];
+ --cpuc->n_events;
+ cpuc->events[event->hw.idx] = NULL;
+ __clear_bit(event->hw.idx, &cpuc->used_mask);
+ tile_pmu_stop(event, PERF_EF_UPDATE);
+ break;
+ }
+ }
+ /*
+ * If there are no events left, then mask PMU interrupt.
+ */
+ if (cpuc->n_events == 0)
+ mask_pmc_interrupts();
+ perf_event_update_userpage(event);
+}
+
+/*
+ * Propagate event elapsed time into the event.
+ */
+static inline void tile_pmu_read(struct perf_event *event)
+{
+ tile_perf_event_update(event);
+}
+
+/*
+ * Map generic events to Tile PMU.
+ */
+static int tile_map_hw_event(u64 config)
+{
+ if (config >= tile_pmu->max_events)
+ return -EINVAL;
+ return tile_pmu->hw_events[config];
+}
+
+/*
+ * Map generic hardware cache events to Tile PMU.
+ */
+static int tile_map_cache_event(u64 config)
+{
+ unsigned int cache_type, cache_op, cache_result;
+ int code;
+
+ if (!tile_pmu->cache_events)
+ return -ENOENT;
+
+ cache_type = (config >> 0) & 0xff;
+ if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+ return -EINVAL;
+
+ cache_op = (config >> 8) & 0xff;
+ if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+ return -EINVAL;
+
+ cache_result = (config >> 16) & 0xff;
+ if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+ return -EINVAL;
+
+ code = (*tile_pmu->cache_events)[cache_type][cache_op][cache_result];
+ if (code == TILE_OP_UNSUPP)
+ return -EINVAL;
+
+ return code;
+}
+
+static void tile_event_destroy(struct perf_event *event)
+{
+ if (atomic_dec_return(&tile_active_events) == 0)
+ release_pmc_hardware();
+}
+
+static int __tile_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ int code;
+
+ switch (attr->type) {
+ case PERF_TYPE_HARDWARE:
+ code = tile_pmu->map_hw_event(attr->config);
+ break;
+ case PERF_TYPE_HW_CACHE:
+ code = tile_pmu->map_cache_event(attr->config);
+ break;
+ case PERF_TYPE_RAW:
+ code = attr->config & TILE_EVENT_MASK;
+ break;
+ default:
+ /* Should not happen. */
+ return -EOPNOTSUPP;
+ }
+
+ if (code < 0)
+ return code;
+
+ hwc->config = code;
+ hwc->idx = -1;
+
+ if (attr->exclude_user)
+ hwc->config |= TILE_CTL_EXCL_USER;
+
+ if (attr->exclude_kernel)
+ hwc->config |= TILE_CTL_EXCL_KERNEL;
+
+ if (attr->exclude_hv)
+ hwc->config |= TILE_CTL_EXCL_HV;
+
+ if (!hwc->sample_period) {
+ hwc->sample_period = tile_pmu->max_period;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+ }
+ event->destroy = tile_event_destroy;
+ return 0;
+}
+
+static int tile_event_init(struct perf_event *event)
+{
+ int err = 0;
+ perf_irq_t old_irq_handler = NULL;
+
+ if (atomic_inc_return(&tile_active_events) == 1)
+ old_irq_handler = reserve_pmc_hardware(tile_pmu_handle_irq);
+
+ if (old_irq_handler) {
+ pr_warn("PMC hardware busy (reserved by oprofile)\n");
+
+ atomic_dec(&tile_active_events);
+ return -EBUSY;
+ }
+
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ err = __tile_event_init(event);
+ if (err) {
+ if (event->destroy)
+ event->destroy(event);
+ }
+ return err;
+}
+
+static struct pmu tilera_pmu = {
+ .event_init = tile_event_init,
+ .add = tile_pmu_add,
+ .del = tile_pmu_del,
+
+ .start = tile_pmu_start,
+ .stop = tile_pmu_stop,
+
+ .read = tile_pmu_read,
+};
+
+/*
+ * PMU's IRQ handler, PMU has 2 interrupts, they share the same handler.
+ */
+int tile_pmu_handle_irq(struct pt_regs *regs, int fault)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct perf_event *event;
+ struct hw_perf_event *hwc;
+ u64 val;
+ unsigned long status;
+ int bit;
+
+ __get_cpu_var(perf_irqs)++;
+
+ if (!atomic_read(&tile_active_events))
+ return 0;
+
+ status = pmc_get_overflow();
+ pmc_ack_overflow(status);
+
+ for_each_set_bit(bit, &status, tile_pmu->num_counters) {
+
+ event = cpuc->events[bit];
+
+ if (!event)
+ continue;
+
+ if (!test_bit(bit, cpuc->active_mask))
+ continue;
+
+ hwc = &event->hw;
+
+ val = tile_perf_event_update(event);
+ if (val & (1ULL << (tile_pmu->cntval_bits - 1)))
+ continue;
+
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ if (!tile_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ tile_pmu_stop(event, 0);
+ }
+
+ return 0;
+}
+
+static bool __init supported_pmu(void)
+{
+ tile_pmu = &tilepmu;
+ return true;
+}
+
+int __init init_hw_perf_events(void)
+{
+ supported_pmu();
+ perf_pmu_register(&tilera_pmu, "cpu", PERF_TYPE_RAW);
+ return 0;
+}
+arch_initcall(init_hw_perf_events);
+
+/* Callchain handling code. */
+
+/*
+ * Tile specific backtracing code for perf_events.
+ */
+static inline void perf_callchain(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+ struct KBacktraceIterator kbt;
+ unsigned int i;
+
+ /*
+ * Get the address just after the "jalr" instruction that
+ * jumps to the handler for a syscall. When we find this
+ * address in a backtrace, we silently ignore it, which gives
+ * us a one-step backtrace connection from the sys_xxx()
+ * function in the kernel to the xxx() function in libc.
+ * Otherwise, we lose the ability to properly attribute time
+ * from the libc calls to the kernel implementations, since
+ * oprofile only considers PCs from backtraces a pair at a time.
+ */
+ unsigned long handle_syscall_pc = handle_syscall_link_address();
+
+ KBacktraceIterator_init(&kbt, NULL, regs);
+ kbt.profile = 1;
+
+ /*
+ * The sample for the pc is already recorded. Now we are adding the
+ * address of the callsites on the stack. Our iterator starts
+ * with the frame of the (already sampled) call site. If our
+ * iterator contained a "return address" field, we could have just
+ * used it and wouldn't have needed to skip the first
+ * frame. That's in effect what the arm and x86 versions do.
+ * Instead we peel off the first iteration to get the equivalent
+ * behavior.
+ */
+
+ if (KBacktraceIterator_end(&kbt))
+ return;
+ KBacktraceIterator_next(&kbt);
+
+ /*
+ * Set stack depth to 16 for user and kernel space respectively, that
+ * is, total 32 stack frames.
+ */
+ for (i = 0; i < 16; ++i) {
+ unsigned long pc;
+ if (KBacktraceIterator_end(&kbt))
+ break;
+ pc = kbt.it.pc;
+ if (pc != handle_syscall_pc)
+ perf_callchain_store(entry, pc);
+ KBacktraceIterator_next(&kbt);
+ }
+}
+
+void perf_callchain_user(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+ perf_callchain(entry, regs);
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+ perf_callchain(entry, regs);
+}
diff --git a/arch/tile/kernel/pmc.c b/arch/tile/kernel/pmc.c
new file mode 100644
index 00000000000..db62cc34b95
--- /dev/null
+++ b/arch/tile/kernel/pmc.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/interrupt.h>
+
+#include <asm/processor.h>
+#include <asm/pmc.h>
+
+perf_irq_t perf_irq = NULL;
+int handle_perf_interrupt(struct pt_regs *regs, int fault)
+{
+ int retval;
+
+ if (!perf_irq)
+ panic("Unexpected PERF_COUNT interrupt %d\n", fault);
+
+ nmi_enter();
+ retval = perf_irq(regs, fault);
+ nmi_exit();
+ return retval;
+}
+
+/* Reserve PMC hardware if it is available. */
+perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq)
+{
+ return cmpxchg(&perf_irq, NULL, new_perf_irq);
+}
+EXPORT_SYMBOL(reserve_pmc_hardware);
+
+/* Release PMC hardware. */
+void release_pmc_hardware(void)
+{
+ perf_irq = NULL;
+}
+EXPORT_SYMBOL(release_pmc_hardware);
+
+
+/*
+ * Get current overflow status of each performance counter,
+ * and auxiliary performance counter.
+ */
+unsigned long
+pmc_get_overflow(void)
+{
+ unsigned long status;
+
+ /*
+ * merge base+aux into a single vector
+ */
+ status = __insn_mfspr(SPR_PERF_COUNT_STS);
+ status |= __insn_mfspr(SPR_AUX_PERF_COUNT_STS) << TILE_BASE_COUNTERS;
+ return status;
+}
+
+/*
+ * Clear the status bit for the corresponding counter, if written
+ * with a one.
+ */
+void
+pmc_ack_overflow(unsigned long status)
+{
+ /*
+ * clear overflow status by writing ones
+ */
+ __insn_mtspr(SPR_PERF_COUNT_STS, status);
+ __insn_mtspr(SPR_AUX_PERF_COUNT_STS, status >> TILE_BASE_COUNTERS);
+}
+
+/*
+ * The perf count interrupts are masked and unmasked explicitly,
+ * and only here. The normal irq_enable() does not enable them,
+ * and irq_disable() does not disable them. That lets these
+ * routines drive the perf count interrupts orthogonally.
+ *
+ * We also mask the perf count interrupts on entry to the perf count
+ * interrupt handler in assembly code, and by default unmask them
+ * again (with interrupt critical section protection) just before
+ * returning from the interrupt. If the perf count handler returns
+ * a non-zero error code, then we don't re-enable them before returning.
+ *
+ * For Pro, we rely on both interrupts being in the same word to update
+ * them atomically so we never have one enabled and one disabled.
+ */
+
+#if CHIP_HAS_SPLIT_INTR_MASK()
+# if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32
+# error Fix assumptions about which word PERF_COUNT interrupts are in
+# endif
+#endif
+
+static inline unsigned long long pmc_mask(void)
+{
+ unsigned long long mask = 1ULL << INT_PERF_COUNT;
+ mask |= 1ULL << INT_AUX_PERF_COUNT;
+ return mask;
+}
+
+void unmask_pmc_interrupts(void)
+{
+ interrupt_mask_reset_mask(pmc_mask());
+}
+
+void mask_pmc_interrupts(void)
+{
+ interrupt_mask_set_mask(pmc_mask());
+}
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
index 62d820833c6..6829a950864 100644
--- a/arch/tile/kernel/proc.c
+++ b/arch/tile/kernel/proc.c
@@ -22,7 +22,9 @@
#include <linux/proc_fs.h>
#include <linux/sysctl.h>
#include <linux/hardirq.h>
+#include <linux/hugetlb.h>
#include <linux/mman.h>
+#include <asm/unaligned.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/sections.h>
@@ -111,8 +113,7 @@ arch_initcall(proc_tile_init);
* Support /proc/sys/tile directory
*/
-#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */
-static ctl_table unaligned_subtable[] = {
+static struct ctl_table unaligned_subtable[] = {
{
.procname = "enabled",
.data = &unaligned_fixup,
@@ -137,7 +138,7 @@ static ctl_table unaligned_subtable[] = {
{}
};
-static ctl_table unaligned_table[] = {
+static struct ctl_table unaligned_table[] = {
{
.procname = "unaligned_fixup",
.mode = 0555,
@@ -145,7 +146,6 @@ static ctl_table unaligned_table[] = {
},
{}
};
-#endif
static struct ctl_path tile_path[] = {
{ .procname = "tile" },
@@ -154,9 +154,7 @@ static struct ctl_path tile_path[] = {
static int __init proc_sys_tile_init(void)
{
-#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */
register_sysctl_paths(tile_path, unaligned_table);
-#endif
return 0;
}
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 4c1ac6e5347..16ed5894875 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -27,24 +27,25 @@
#include <linux/kernel.h>
#include <linux/tracehook.h>
#include <linux/signal.h>
-#include <asm/system.h>
#include <asm/stack.h>
+#include <asm/switch_to.h>
#include <asm/homecache.h>
#include <asm/syscalls.h>
#include <asm/traps.h>
+#include <asm/setup.h>
+#include <asm/uaccess.h>
#ifdef CONFIG_HARDWALL
#include <asm/hardwall.h>
#endif
#include <arch/chip.h>
#include <arch/abi.h>
-
+#include <arch/sim_def.h>
/*
* Use the (x86) "idle=poll" option to prefer low latency when leaving the
* idle loop over low power while in the idle loop, e.g. if we have
* one thread per core and we want to get threads out of futex waits fast.
*/
-static int no_idle_nap;
static int __init idle_setup(char *str)
{
if (!str)
@@ -52,105 +53,28 @@ static int __init idle_setup(char *str)
if (!strcmp(str, "poll")) {
pr_info("using polling idle threads.\n");
- no_idle_nap = 1;
- } else if (!strcmp(str, "halt"))
- no_idle_nap = 0;
- else
- return -1;
-
- return 0;
-}
-early_param("idle", idle_setup);
-
-/*
- * The idle thread. There's no useful work to be
- * done, so just try to conserve power and have a
- * low exit latency (ie sit in a loop waiting for
- * somebody to say that they'd like to reschedule)
- */
-void cpu_idle(void)
-{
- int cpu = smp_processor_id();
-
-
- current_thread_info()->status |= TS_POLLING;
-
- if (no_idle_nap) {
- while (1) {
- while (!need_resched())
- cpu_relax();
- schedule();
- }
- }
-
- /* endless idle loop with no priority at all */
- while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
- while (!need_resched()) {
- if (cpu_is_offline(cpu))
- BUG(); /* no HOTPLUG_CPU */
-
- local_irq_disable();
- __get_cpu_var(irq_stat).idle_timestamp = jiffies;
- current_thread_info()->status &= ~TS_POLLING;
- /*
- * TS_POLLING-cleared state must be visible before we
- * test NEED_RESCHED:
- */
- smp_mb();
-
- if (!need_resched())
- _cpu_idle();
- else
- local_irq_enable();
- current_thread_info()->status |= TS_POLLING;
- }
- rcu_idle_exit();
- tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ cpu_idle_poll_ctrl(true);
+ return 0;
+ } else if (!strcmp(str, "halt")) {
+ return 0;
}
+ return -1;
}
+early_param("idle", idle_setup);
-struct thread_info *alloc_thread_info_node(struct task_struct *task, int node)
+void arch_cpu_idle(void)
{
- struct page *page;
- gfp_t flags = GFP_KERNEL;
-
-#ifdef CONFIG_DEBUG_STACK_USAGE
- flags |= __GFP_ZERO;
-#endif
-
- page = alloc_pages_node(node, flags, THREAD_SIZE_ORDER);
- if (!page)
- return NULL;
-
- return (struct thread_info *)page_address(page);
+ __get_cpu_var(irq_stat).idle_timestamp = jiffies;
+ _cpu_idle();
}
/*
- * Free a thread_info node, and all of its derivative
- * data structures.
+ * Release a thread_info structure
*/
-void free_thread_info(struct thread_info *info)
+void arch_release_thread_info(struct thread_info *info)
{
struct single_step_state *step_state = info->step_state;
-#ifdef CONFIG_HARDWALL
- /*
- * We free a thread_info from the context of the task that has
- * been scheduled next, so the original task is already dead.
- * Calling deactivate here just frees up the data structures.
- * If the task we're freeing held the last reference to a
- * hardwall fd, it would have been released prior to this point
- * anyway via exit_files(), and "hardwall" would be NULL by now.
- */
- if (info->task->thread.hardwall)
- hardwall_deactivate(info->task);
-#endif
-
if (step_state) {
/*
@@ -169,25 +93,54 @@ void free_thread_info(struct thread_info *info)
*/
kfree(step_state);
}
-
- free_pages((unsigned long)info, THREAD_SIZE_ORDER);
}
static void save_arch_state(struct thread_struct *t);
int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long stack_size,
- struct task_struct *p, struct pt_regs *regs)
+ unsigned long arg, struct task_struct *p)
{
- struct pt_regs *childregs;
+ struct pt_regs *childregs = task_pt_regs(p);
unsigned long ksp;
+ unsigned long *callee_regs;
+
+ /*
+ * Set up the stack and stack pointer appropriately for the
+ * new child to find itself woken up in __switch_to().
+ * The callee-saved registers must be on the stack to be read;
+ * the new task will then jump to assembly support to handle
+ * calling schedule_tail(), etc., and (for userspace tasks)
+ * returning to the context set up in the pt_regs.
+ */
+ ksp = (unsigned long) childregs;
+ ksp -= C_ABI_SAVE_AREA_SIZE; /* interrupt-entry save area */
+ ((long *)ksp)[0] = ((long *)ksp)[1] = 0;
+ ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long);
+ callee_regs = (unsigned long *)ksp;
+ ksp -= C_ABI_SAVE_AREA_SIZE; /* __switch_to() save area */
+ ((long *)ksp)[0] = ((long *)ksp)[1] = 0;
+ p->thread.ksp = ksp;
+
+ /* Record the pid of the task that created this one. */
+ p->thread.creator_pid = current->pid;
+
+ if (unlikely(p->flags & PF_KTHREAD)) {
+ /* kernel thread */
+ memset(childregs, 0, sizeof(struct pt_regs));
+ memset(&callee_regs[2], 0,
+ (CALLEE_SAVED_REGS_COUNT - 2) * sizeof(unsigned long));
+ callee_regs[0] = sp; /* r30 = function */
+ callee_regs[1] = arg; /* r31 = arg */
+ childregs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
+ p->thread.pc = (unsigned long) ret_from_kernel_thread;
+ return 0;
+ }
/*
- * When creating a new kernel thread we pass sp as zero.
- * Assign it to a reasonable value now that we have the stack.
+ * Start new thread in ret_from_fork so it schedules properly
+ * and then return from interrupt like the parent.
*/
- if (sp == 0 && regs->ex1 == PL_ICS_EX1(KERNEL_PL, 0))
- sp = KSTK_TOP(p);
+ p->thread.pc = (unsigned long) ret_from_fork;
/*
* Do not clone step state from the parent; each thread
@@ -195,52 +148,35 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
*/
task_thread_info(p)->step_state = NULL;
+#ifdef __tilegx__
/*
- * Start new thread in ret_from_fork so it schedules properly
- * and then return from interrupt like the parent.
+ * Do not clone unalign jit fixup from the parent; each thread
+ * must allocate its own on demand.
*/
- p->thread.pc = (unsigned long) ret_from_fork;
-
- /* Save user stack top pointer so we can ID the stack vm area later. */
- p->thread.usp0 = sp;
-
- /* Record the pid of the process that created this one. */
- p->thread.creator_pid = current->pid;
+ task_thread_info(p)->unalign_jit_base = NULL;
+#endif
/*
* Copy the registers onto the kernel stack so the
* return-from-interrupt code will reload it into registers.
*/
- childregs = task_pt_regs(p);
- *childregs = *regs;
+ *childregs = *current_pt_regs();
childregs->regs[0] = 0; /* return value is zero */
- childregs->sp = sp; /* override with new user stack pointer */
+ if (sp)
+ childregs->sp = sp; /* override with new user stack pointer */
+ memcpy(callee_regs, &childregs->regs[CALLEE_SAVED_FIRST_REG],
+ CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long));
+
+ /* Save user stack top pointer so we can ID the stack vm area later. */
+ p->thread.usp0 = childregs->sp;
/*
* If CLONE_SETTLS is set, set "tp" in the new task to "r4",
* which is passed in as arg #5 to sys_clone().
*/
if (clone_flags & CLONE_SETTLS)
- childregs->tp = regs->regs[4];
+ childregs->tp = childregs->regs[4];
- /*
- * Copy the callee-saved registers from the passed pt_regs struct
- * into the context-switch callee-saved registers area.
- * This way when we start the interrupt-return sequence, the
- * callee-save registers will be correctly in registers, which
- * is how we assume the compiler leaves them as we start doing
- * the normal return-from-interrupt path after calling C code.
- * Zero out the C ABI save area to mark the top of the stack.
- */
- ksp = (unsigned long) childregs;
- ksp -= C_ABI_SAVE_AREA_SIZE; /* interrupt-entry save area */
- ((long *)ksp)[0] = ((long *)ksp)[1] = 0;
- ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long);
- memcpy((void *)ksp, &regs->regs[CALLEE_SAVED_FIRST_REG],
- CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long));
- ksp -= C_ABI_SAVE_AREA_SIZE; /* __switch_to() save area */
- ((long *)ksp)[0] = ((long *)ksp)[1] = 0;
- p->thread.ksp = ksp;
#if CHIP_HAS_TILE_DMA()
/*
@@ -251,20 +187,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb));
#endif
-#if CHIP_HAS_SN_PROC()
- /* Likewise, the new thread is not running static processor code. */
- p->thread.sn_proc_running = 0;
- memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb));
-#endif
-
-#if CHIP_HAS_PROC_STATUS_SPR()
/* New thread has its miscellaneous processor state bits clear. */
p->thread.proc_status = 0;
-#endif
#ifdef CONFIG_HARDWALL
/* New thread does not own any networks. */
- p->thread.hardwall = NULL;
+ memset(&p->thread.hardwall[0], 0,
+ sizeof(struct hardwall_task) * HARDWALL_TYPES);
#endif
@@ -277,19 +206,32 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
return 0;
}
+int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
+{
+ task_thread_info(tsk)->align_ctl = val;
+ return 0;
+}
+
+int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
+{
+ return put_user(task_thread_info(tsk)->align_ctl,
+ (unsigned int __user *)adr);
+}
+
+static struct task_struct corrupt_current = { .comm = "<corrupt>" };
+
/*
* Return "current" if it looks plausible, or else a pointer to a dummy.
* This can be helpful if we are just trying to emit a clean panic.
*/
struct task_struct *validate_current(void)
{
- static struct task_struct corrupt = { .comm = "<corrupt>" };
struct task_struct *tsk = current;
if (unlikely((unsigned long)tsk < PAGE_OFFSET ||
- (void *)tsk > high_memory ||
+ (high_memory && (void *)tsk > high_memory) ||
((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) {
pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer);
- tsk = &corrupt;
+ tsk = &corrupt_current;
}
return tsk;
}
@@ -428,15 +370,11 @@ static void save_arch_state(struct thread_struct *t)
t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2);
t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3);
t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS);
-#if CHIP_HAS_PROC_STATUS_SPR()
t->proc_status = __insn_mfspr(SPR_PROC_STATUS);
-#endif
#if !CHIP_HAS_FIXED_INTVEC_BASE()
t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0);
#endif
-#if CHIP_HAS_TILE_RTF_HWM()
t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM);
-#endif
#if CHIP_HAS_DSTREAM_PF()
t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF);
#endif
@@ -457,15 +395,11 @@ static void restore_arch_state(const struct thread_struct *t)
__insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]);
__insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]);
__insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0);
-#if CHIP_HAS_PROC_STATUS_SPR()
__insn_mtspr(SPR_PROC_STATUS, t->proc_status);
-#endif
#if !CHIP_HAS_FIXED_INTVEC_BASE()
__insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base);
#endif
-#if CHIP_HAS_TILE_RTF_HWM()
__insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm);
-#endif
#if CHIP_HAS_DSTREAM_PF()
__insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf);
#endif
@@ -474,26 +408,11 @@ static void restore_arch_state(const struct thread_struct *t)
void _prepare_arch_switch(struct task_struct *next)
{
-#if CHIP_HAS_SN_PROC()
- int snctl;
-#endif
#if CHIP_HAS_TILE_DMA()
struct tile_dma_state *dma = &current->thread.tile_dma_state;
if (dma->enabled)
save_tile_dma_state(dma);
#endif
-#if CHIP_HAS_SN_PROC()
- /*
- * Suspend the static network processor if it was running.
- * We do not suspend the fabric itself, just like we don't
- * try to suspend the UDN.
- */
- snctl = __insn_mfspr(SPR_SNCTL);
- current->thread.sn_proc_running =
- (snctl & SPR_SNCTL__FRZPROC_MASK) == 0;
- if (current->thread.sn_proc_running)
- __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK);
-#endif
}
@@ -521,25 +440,9 @@ struct task_struct *__sched _switch_to(struct task_struct *prev,
/* Restore other arch state. */
restore_arch_state(&next->thread);
-#if CHIP_HAS_SN_PROC()
- /*
- * Restart static network processor in the new process
- * if it was running before.
- */
- if (next->thread.sn_proc_running) {
- int snctl = __insn_mfspr(SPR_SNCTL);
- __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK);
- }
-#endif
-
#ifdef CONFIG_HARDWALL
/* Enable or disable access to the network registers appropriately. */
- if (prev->thread.hardwall != NULL) {
- if (next->thread.hardwall == NULL)
- restrict_network_mpls();
- } else if (next->thread.hardwall != NULL) {
- grant_network_mpls();
- }
+ hardwall_switch_tasks(prev, next);
#endif
/*
@@ -567,11 +470,18 @@ struct task_struct *__sched _switch_to(struct task_struct *prev,
*/
int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
{
+ /* If we enter in kernel mode, do nothing and exit the caller loop. */
+ if (!user_mode(regs))
+ return 0;
+
+ /* Enable interrupts; they are disabled again on return to caller. */
+ local_irq_enable();
+
if (thread_info_flags & _TIF_NEED_RESCHED) {
schedule();
return 1;
}
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+#if CHIP_HAS_TILE_DMA()
if (thread_info_flags & _TIF_ASYNC_TLB) {
do_async_page_fault(regs);
return 1;
@@ -584,74 +494,15 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
- if (current->replacement_session_keyring)
- key_replace_session_keyring();
return 1;
}
if (thread_info_flags & _TIF_SINGLESTEP) {
- if ((regs->ex1 & SPR_EX_CONTEXT_1_1__PL_MASK) == 0)
- single_step_once(regs);
+ single_step_once(regs);
return 0;
}
panic("work_pending: bad flags %#x\n", thread_info_flags);
}
-/* Note there is an implicit fifth argument if (clone_flags & CLONE_SETTLS). */
-SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
- void __user *, parent_tidptr, void __user *, child_tidptr,
- struct pt_regs *, regs)
-{
- if (!newsp)
- newsp = regs->sp;
- return do_fork(clone_flags, newsp, regs, 0,
- parent_tidptr, child_tidptr);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-SYSCALL_DEFINE4(execve, const char __user *, path,
- const char __user *const __user *, argv,
- const char __user *const __user *, envp,
- struct pt_regs *, regs)
-{
- long error;
- char *filename;
-
- filename = getname(path);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- goto out;
- error = do_execve(filename, argv, envp, regs);
- putname(filename);
- if (error == 0)
- single_step_execve();
-out:
- return error;
-}
-
-#ifdef CONFIG_COMPAT
-long compat_sys_execve(const char __user *path,
- compat_uptr_t __user *argv,
- compat_uptr_t __user *envp,
- struct pt_regs *regs)
-{
- long error;
- char *filename;
-
- filename = getname(path);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- goto out;
- error = compat_do_execve(filename, argv, envp, regs);
- putname(filename);
- if (error == 0)
- single_step_execve();
-out:
- return error;
-}
-#endif
-
unsigned long get_wchan(struct task_struct *p)
{
struct KBacktraceIterator kbt;
@@ -669,37 +520,6 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
}
-/*
- * We pass in lr as zero (cleared in kernel_thread) and the caller
- * part of the backtrace ABI on the stack also zeroed (in copy_thread)
- * so that backtraces will stop with this function.
- * Note that we don't use r0, since copy_thread() clears it.
- */
-static void start_kernel_thread(int dummy, int (*fn)(int), int arg)
-{
- do_exit(fn(arg));
-}
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
- struct pt_regs regs;
-
- memset(&regs, 0, sizeof(regs));
- regs.ex1 = PL_ICS_EX1(KERNEL_PL, 0); /* run at kernel PL, no ICS */
- regs.pc = (long) start_kernel_thread;
- regs.flags = PT_FLAGS_CALLER_SAVES; /* need to restore r1 and r2 */
- regs.regs[1] = (long) fn; /* function pointer */
- regs.regs[2] = (long) arg; /* parameter register */
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs,
- 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
/* Flush thread state. */
void flush_thread(void)
{
@@ -711,7 +531,15 @@ void flush_thread(void)
*/
void exit_thread(void)
{
- /* Nothing */
+#ifdef CONFIG_HARDWALL
+ /*
+ * Remove the task from the list of tasks that are associated
+ * with any live hardwalls. (If the task that is exiting held
+ * the last reference to a hardwall fd, it would already have
+ * been released and deactivated at this point.)
+ */
+ hardwall_deactivate_all(current);
+#endif
}
void show_regs(struct pt_regs *regs)
@@ -720,24 +548,24 @@ void show_regs(struct pt_regs *regs)
int i;
pr_err("\n");
- pr_err(" Pid: %d, comm: %20s, CPU: %d\n",
- tsk->pid, tsk->comm, smp_processor_id());
+ if (tsk != &corrupt_current)
+ show_regs_print_info(KERN_ERR);
#ifdef __tilegx__
- for (i = 0; i < 51; i += 3)
+ for (i = 0; i < 17; i++)
pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
- i, regs->regs[i], i+1, regs->regs[i+1],
- i+2, regs->regs[i+2]);
- pr_err(" r51: "REGFMT" r52: "REGFMT" tp : "REGFMT"\n",
- regs->regs[51], regs->regs[52], regs->tp);
+ i, regs->regs[i], i+18, regs->regs[i+18],
+ i+36, regs->regs[i+36]);
+ pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n",
+ regs->regs[17], regs->regs[35], regs->tp);
pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr);
#else
- for (i = 0; i < 52; i += 4)
+ for (i = 0; i < 13; i++)
pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT
" r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
- i, regs->regs[i], i+1, regs->regs[i+1],
- i+2, regs->regs[i+2], i+3, regs->regs[i+3]);
- pr_err(" r52: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n",
- regs->regs[52], regs->tp, regs->sp, regs->lr);
+ i, regs->regs[i], i+14, regs->regs[i+14],
+ i+27, regs->regs[i+27], i+40, regs->regs[i+40]);
+ pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n",
+ regs->regs[13], regs->tp, regs->sp, regs->lr);
#endif
pr_err(" pc : "REGFMT" ex1: %ld faultnum: %ld\n",
regs->pc, regs->ex1, regs->faultnum);
diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
index e92e40527d6..de98c6ddf13 100644
--- a/arch/tile/kernel/ptrace.c
+++ b/arch/tile/kernel/ptrace.c
@@ -19,7 +19,14 @@
#include <linux/kprobes.h>
#include <linux/compat.h>
#include <linux/uaccess.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
#include <asm/traps.h>
+#include <arch/chip.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
void user_enable_single_step(struct task_struct *child)
{
@@ -45,6 +52,100 @@ void ptrace_disable(struct task_struct *child)
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
}
+/*
+ * Get registers from task and ready the result for userspace.
+ * Note that we localize the API issues to getregs() and putregs() at
+ * some cost in performance, e.g. we need a full pt_regs copy for
+ * PEEKUSR, and two copies for POKEUSR. But in general we expect
+ * GETREGS/PUTREGS to be the API of choice anyway.
+ */
+static char *getregs(struct task_struct *child, struct pt_regs *uregs)
+{
+ *uregs = *task_pt_regs(child);
+
+ /* Set up flags ABI bits. */
+ uregs->flags = 0;
+#ifdef CONFIG_COMPAT
+ if (task_thread_info(child)->status & TS_COMPAT)
+ uregs->flags |= PT_FLAGS_COMPAT;
+#endif
+
+ return (char *)uregs;
+}
+
+/* Put registers back to task. */
+static void putregs(struct task_struct *child, struct pt_regs *uregs)
+{
+ struct pt_regs *regs = task_pt_regs(child);
+
+ /* Don't allow overwriting the kernel-internal flags word. */
+ uregs->flags = regs->flags;
+
+ /* Only allow setting the ICS bit in the ex1 word. */
+ uregs->ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(uregs->ex1));
+
+ *regs = *uregs;
+}
+
+enum tile_regset {
+ REGSET_GPR,
+};
+
+static int tile_gpr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct pt_regs regs;
+
+ getregs(target, &regs);
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &regs, 0,
+ sizeof(regs));
+}
+
+static int tile_gpr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ struct pt_regs regs;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &regs, 0,
+ sizeof(regs));
+ if (ret)
+ return ret;
+
+ putregs(target, &regs);
+
+ return 0;
+}
+
+static const struct user_regset tile_user_regset[] = {
+ [REGSET_GPR] = {
+ .core_note_type = NT_PRSTATUS,
+ .n = ELF_NGREG,
+ .size = sizeof(elf_greg_t),
+ .align = sizeof(elf_greg_t),
+ .get = tile_gpr_get,
+ .set = tile_gpr_set,
+ },
+};
+
+static const struct user_regset_view tile_user_regset_view = {
+ .name = CHIP_ARCH_NAME,
+ .e_machine = ELF_ARCH,
+ .ei_osabi = ELF_OSABI,
+ .regsets = tile_user_regset,
+ .n = ARRAY_SIZE(tile_user_regset),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+ return &tile_user_regset_view;
+}
+
long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
@@ -53,14 +154,13 @@ long arch_ptrace(struct task_struct *child, long request,
long ret = -EIO;
char *childreg;
struct pt_regs copyregs;
- int ex1_offset;
switch (request) {
case PTRACE_PEEKUSR: /* Read register from pt_regs. */
if (addr >= PTREGS_SIZE)
break;
- childreg = (char *)task_pt_regs(child) + addr;
+ childreg = getregs(child, &copyregs) + addr;
#ifdef CONFIG_COMPAT
if (is_compat_task()) {
if (addr & (sizeof(compat_long_t)-1))
@@ -79,17 +179,7 @@ long arch_ptrace(struct task_struct *child, long request,
case PTRACE_POKEUSR: /* Write register in pt_regs. */
if (addr >= PTREGS_SIZE)
break;
- childreg = (char *)task_pt_regs(child) + addr;
-
- /* Guard against overwrites of the privilege level. */
- ex1_offset = PTREGS_OFFSET_EX1;
-#if defined(CONFIG_COMPAT) && defined(__BIG_ENDIAN)
- if (is_compat_task()) /* point at low word */
- ex1_offset += sizeof(compat_long_t);
-#endif
- if (addr == ex1_offset)
- data = PL_ICS_EX1(USER_PL, EX1_ICS(data));
-
+ childreg = getregs(child, &copyregs) + addr;
#ifdef CONFIG_COMPAT
if (is_compat_task()) {
if (addr & (sizeof(compat_long_t)-1))
@@ -102,24 +192,20 @@ long arch_ptrace(struct task_struct *child, long request,
break;
*(long *)childreg = data;
}
+ putregs(child, &copyregs);
ret = 0;
break;
case PTRACE_GETREGS: /* Get all registers from the child. */
- if (copy_to_user(datap, task_pt_regs(child),
- sizeof(struct pt_regs)) == 0) {
- ret = 0;
- }
+ ret = copy_regset_to_user(child, &tile_user_regset_view,
+ REGSET_GPR, 0,
+ sizeof(struct pt_regs), datap);
break;
case PTRACE_SETREGS: /* Set all registers in the child. */
- if (copy_from_user(&copyregs, datap,
- sizeof(struct pt_regs)) == 0) {
- copyregs.ex1 =
- PL_ICS_EX1(USER_PL, EX1_ICS(copyregs.ex1));
- *task_pt_regs(child) = copyregs;
- ret = 0;
- }
+ ret = copy_regset_from_user(child, &tile_user_regset_view,
+ REGSET_GPR, 0,
+ sizeof(struct pt_regs), datap);
break;
case PTRACE_GETFPREGS: /* Get the child FPU state. */
@@ -128,12 +214,16 @@ long arch_ptrace(struct task_struct *child, long request,
case PTRACE_SETOPTIONS:
/* Support TILE-specific ptrace options. */
- child->ptrace &= ~PT_TRACE_MASK_TILE;
+ BUILD_BUG_ON(PTRACE_O_MASK_TILE & PTRACE_O_MASK);
tmp = data & PTRACE_O_MASK_TILE;
data &= ~PTRACE_O_MASK_TILE;
ret = ptrace_request(child, request, addr, data);
- if (tmp & PTRACE_O_TRACEMIGRATE)
- child->ptrace |= PT_TRACE_MIGRATE;
+ if (ret == 0) {
+ unsigned int flags = child->ptrace;
+ flags &= ~(PTRACE_O_MASK_TILE << PT_OPT_FLAG_SHIFT);
+ flags |= (tmp << PT_OPT_FLAG_SHIFT);
+ child->ptrace = flags;
+ }
break;
default:
@@ -160,32 +250,44 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
}
#endif
-void do_syscall_trace(void)
+int do_syscall_trace_enter(struct pt_regs *regs)
{
- if (!test_thread_flag(TIF_SYSCALL_TRACE))
- return;
+ if (test_thread_flag(TIF_SYSCALL_TRACE)) {
+ if (tracehook_report_syscall_entry(regs))
+ regs->regs[TREG_SYSCALL_NR] = -1;
+ }
- if (!(current->ptrace & PT_PTRACED))
- return;
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_enter(regs, regs->regs[TREG_SYSCALL_NR]);
- /*
- * The 0x80 provides a way for the tracing parent to distinguish
- * between a syscall stop and SIGTRAP delivery
- */
- ptrace_notify(SIGTRAP|((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
+ return regs->regs[TREG_SYSCALL_NR];
+}
+
+void do_syscall_trace_exit(struct pt_regs *regs)
+{
+ long errno;
/*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
+ * The standard tile calling convention returns the value (or negative
+ * errno) in r0, and zero (or positive errno) in r1.
+ * It saves a couple of cycles on the hot path to do this work in
+ * registers only as we return, rather than updating the in-memory
+ * struct ptregs.
*/
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
+ errno = (long) regs->regs[0];
+ if (errno < 0 && errno > -4096)
+ regs->regs[1] = -errno;
+ else
+ regs->regs[1] = 0;
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, 0);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_exit(regs, regs->regs[0]);
}
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs)
{
struct siginfo info;
@@ -201,5 +303,5 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
/* Handle synthetic interrupt delivered only by the simulator. */
void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num)
{
- send_sigtrap(current, regs, fault_num);
+ send_sigtrap(current, regs);
}
diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c
index baa3d905fee..6c5d2c070a1 100644
--- a/arch/tile/kernel/reboot.c
+++ b/arch/tile/kernel/reboot.c
@@ -16,6 +16,7 @@
#include <linux/reboot.h>
#include <linux/smp.h>
#include <linux/pm.h>
+#include <linux/export.h>
#include <asm/page.h>
#include <asm/setup.h>
#include <hv/hypervisor.h>
@@ -26,7 +27,6 @@
void machine_halt(void)
{
- warn_early_printk();
arch_local_irq_disable_all();
smp_send_stop();
hv_halt();
@@ -34,7 +34,6 @@ void machine_halt(void)
void machine_power_off(void)
{
- warn_early_printk();
arch_local_irq_disable_all();
smp_send_stop();
hv_power_off();
@@ -49,3 +48,4 @@ void machine_restart(char *cmd)
/* No interesting distinction to be made here. */
void (*pm_power_off)(void) = NULL;
+EXPORT_SYMBOL(pm_power_off);
diff --git a/arch/tile/kernel/regs_32.S b/arch/tile/kernel/regs_32.S
index caa13101c26..542cae17a93 100644
--- a/arch/tile/kernel/regs_32.S
+++ b/arch/tile/kernel/regs_32.S
@@ -13,14 +13,14 @@
*/
#include <linux/linkage.h>
-#include <asm/system.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
#include <arch/spr_def.h>
#include <asm/processor.h>
+#include <asm/switch_to.h>
/*
- * See <asm/system.h>; called with prev and next task_struct pointers.
+ * See <asm/switch_to.h>; called with prev and next task_struct pointers.
* "prev" is returned in r0 for _switch_to and also for ret_from_fork.
*
* We want to save pc/sp in "prev", and get the new pc/sp from "next".
@@ -39,7 +39,7 @@
*/
#if CALLEE_SAVED_REGS_COUNT != 24
-# error Mismatch between <asm/system.h> and kernel/entry.S
+# error Mismatch between <asm/switch_to.h> and kernel/entry.S
#endif
#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 4)
diff --git a/arch/tile/kernel/regs_64.S b/arch/tile/kernel/regs_64.S
index f748c1e8528..bbffcc6f340 100644
--- a/arch/tile/kernel/regs_64.S
+++ b/arch/tile/kernel/regs_64.S
@@ -13,14 +13,14 @@
*/
#include <linux/linkage.h>
-#include <asm/system.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
#include <arch/spr_def.h>
#include <asm/processor.h>
+#include <asm/switch_to.h>
/*
- * See <asm/system.h>; called with prev and next task_struct pointers.
+ * See <asm/switch_to.h>; called with prev and next task_struct pointers.
* "prev" is returned in r0 for _switch_to and also for ret_from_fork.
*
* We want to save pc/sp in "prev", and get the new pc/sp from "next".
@@ -39,7 +39,7 @@
*/
#if CALLEE_SAVED_REGS_COUNT != 24
-# error Mismatch between <asm/system.h> and kernel/entry.S
+# error Mismatch between <asm/switch_to.h> and kernel/entry.S
#endif
#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 8)
diff --git a/arch/tile/kernel/relocate_kernel.S b/arch/tile/kernel/relocate_kernel_32.S
index 010b418515f..e44fbcf8cbd 100644
--- a/arch/tile/kernel/relocate_kernel.S
+++ b/arch/tile/kernel/relocate_kernel_32.S
@@ -20,15 +20,6 @@
#include <asm/page.h>
#include <hv/hypervisor.h>
-#define ___hvb MEM_SV_INTRPT + HV_GLUE_START_CPA
-
-#define ___hv_dispatch(f) (___hvb + (HV_DISPATCH_ENTRY_SIZE * f))
-
-#define ___hv_console_putc ___hv_dispatch(HV_DISPATCH_CONSOLE_PUTC)
-#define ___hv_halt ___hv_dispatch(HV_DISPATCH_HALT)
-#define ___hv_reexec ___hv_dispatch(HV_DISPATCH_REEXEC)
-#define ___hv_flush_remote ___hv_dispatch(HV_DISPATCH_FLUSH_REMOTE)
-
#undef RELOCATE_NEW_KERNEL_VERBOSE
STD_ENTRY(relocate_new_kernel)
@@ -43,8 +34,8 @@ STD_ENTRY(relocate_new_kernel)
addi sp, sp, -8
/* we now have a stack (whether we need one or not) */
- moveli r40, lo16(___hv_console_putc)
- auli r40, r40, ha16(___hv_console_putc)
+ moveli r40, lo16(hv_console_putc)
+ auli r40, r40, ha16(hv_console_putc)
#ifdef RELOCATE_NEW_KERNEL_VERBOSE
moveli r0, 'r'
@@ -86,7 +77,6 @@ STD_ENTRY(relocate_new_kernel)
move r30, sp
addi sp, sp, -8
-#if CHIP_HAS_CBOX_HOME_MAP()
/*
* On TILEPro, we need to flush all tiles' caches, since we may
* have been doing hash-for-home caching there. Note that we
@@ -114,15 +104,14 @@ STD_ENTRY(relocate_new_kernel)
}
{
move r8, zero /* asids */
- moveli r20, lo16(___hv_flush_remote)
+ moveli r20, lo16(hv_flush_remote)
}
{
move r9, zero /* asidcount */
- auli r20, r20, ha16(___hv_flush_remote)
+ auli r20, r20, ha16(hv_flush_remote)
}
jalr r20
-#endif
/* r33 is destination pointer, default to zero */
@@ -175,8 +164,8 @@ STD_ENTRY(relocate_new_kernel)
move r0, r32
moveli r1, 0 /* arg to hv_reexec is 64 bits */
- moveli r41, lo16(___hv_reexec)
- auli r41, r41, ha16(___hv_reexec)
+ moveli r41, lo16(hv_reexec)
+ auli r41, r41, ha16(hv_reexec)
jalr r41
@@ -267,8 +256,8 @@ STD_ENTRY(relocate_new_kernel)
moveli r0, '\n'
jalr r40
.Lhalt:
- moveli r41, lo16(___hv_halt)
- auli r41, r41, ha16(___hv_halt)
+ moveli r41, lo16(hv_halt)
+ auli r41, r41, ha16(hv_halt)
jalr r41
STD_ENDPROC(relocate_new_kernel)
diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S
new file mode 100644
index 00000000000..d9d8cf6176e
--- /dev/null
+++ b/arch/tile/kernel/relocate_kernel_64.S
@@ -0,0 +1,263 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * copy new kernel into place and then call hv_reexec
+ *
+ */
+
+#include <linux/linkage.h>
+#include <arch/chip.h>
+#include <asm/page.h>
+#include <hv/hypervisor.h>
+
+#undef RELOCATE_NEW_KERNEL_VERBOSE
+
+STD_ENTRY(relocate_new_kernel)
+
+ move r30, r0 /* page list */
+ move r31, r1 /* address of page we are on */
+ move r32, r2 /* start address of new kernel */
+
+ shrui r1, r1, PAGE_SHIFT
+ addi r1, r1, 1
+ shli sp, r1, PAGE_SHIFT
+ addi sp, sp, -8
+ /* we now have a stack (whether we need one or not) */
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r40, hw2_last(hv_console_putc)
+ shl16insli r40, r40, hw1(hv_console_putc)
+ shl16insli r40, r40, hw0(hv_console_putc)
+
+ moveli r0, 'r'
+ jalr r40
+
+ moveli r0, '_'
+ jalr r40
+
+ moveli r0, 'n'
+ jalr r40
+
+ moveli r0, '_'
+ jalr r40
+
+ moveli r0, 'k'
+ jalr r40
+
+ moveli r0, '\n'
+ jalr r40
+#endif
+
+ /*
+ * Throughout this code r30 is pointer to the element of page
+ * list we are working on.
+ *
+ * Normally we get to the next element of the page list by
+ * incrementing r30 by eight. The exception is if the element
+ * on the page list is an IND_INDIRECTION in which case we use
+ * the element with the low bits masked off as the new value
+ * of r30.
+ *
+ * To get this started, we need the value passed to us (which
+ * will always be an IND_INDIRECTION) in memory somewhere with
+ * r30 pointing at it. To do that, we push the value passed
+ * to us on the stack and make r30 point to it.
+ */
+
+ st sp, r30
+ move r30, sp
+ addi sp, sp, -16
+
+ /*
+ * On TILE-GX, we need to flush all tiles' caches, since we may
+ * have been doing hash-for-home caching there. Note that we
+ * must do this _after_ we're completely done modifying any memory
+ * other than our output buffer (which we know is locally cached).
+ * We want the caches to be fully clean when we do the reexec,
+ * because the hypervisor is going to do this flush again at that
+ * point, and we don't want that second flush to overwrite any memory.
+ */
+ {
+ move r0, zero /* cache_pa */
+ moveli r1, hw2_last(HV_FLUSH_EVICT_L2)
+ }
+ {
+ shl16insli r1, r1, hw1(HV_FLUSH_EVICT_L2)
+ movei r2, -1 /* cache_cpumask; -1 means all client tiles */
+ }
+ {
+ shl16insli r1, r1, hw0(HV_FLUSH_EVICT_L2) /* cache_control */
+ move r3, zero /* tlb_va */
+ }
+ {
+ move r4, zero /* tlb_length */
+ move r5, zero /* tlb_pgsize */
+ }
+ {
+ move r6, zero /* tlb_cpumask */
+ move r7, zero /* asids */
+ }
+ {
+ moveli r20, hw2_last(hv_flush_remote)
+ move r8, zero /* asidcount */
+ }
+ shl16insli r20, r20, hw1(hv_flush_remote)
+ shl16insli r20, r20, hw0(hv_flush_remote)
+
+ jalr r20
+
+ /* r33 is destination pointer, default to zero */
+
+ moveli r33, 0
+
+.Lloop: ld r10, r30
+
+ andi r9, r10, 0xf /* low 4 bits tell us what type it is */
+ xor r10, r10, r9 /* r10 is now value with low 4 bits stripped */
+
+ cmpeqi r0, r9, 0x1 /* IND_DESTINATION */
+ beqzt r0, .Ltry2
+
+ move r33, r10
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, 'd'
+ jalr r40
+#endif
+
+ addi r30, r30, 8
+ j .Lloop
+
+.Ltry2:
+ cmpeqi r0, r9, 0x2 /* IND_INDIRECTION */
+ beqzt r0, .Ltry4
+
+ move r30, r10
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, 'i'
+ jalr r40
+#endif
+
+ j .Lloop
+
+.Ltry4:
+ cmpeqi r0, r9, 0x4 /* IND_DONE */
+ beqzt r0, .Ltry8
+
+ mf
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, 'D'
+ jalr r40
+ moveli r0, '\n'
+ jalr r40
+#endif
+
+ move r0, r32
+
+ moveli r41, hw2_last(hv_reexec)
+ shl16insli r41, r41, hw1(hv_reexec)
+ shl16insli r41, r41, hw0(hv_reexec)
+
+ jalr r41
+
+ /* we should not get here */
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, '?'
+ jalr r40
+ moveli r0, '\n'
+ jalr r40
+#endif
+
+ j .Lhalt
+
+.Ltry8: cmpeqi r0, r9, 0x8 /* IND_SOURCE */
+ beqz r0, .Lerr /* unknown type */
+
+ /* copy page at r10 to page at r33 */
+
+ move r11, r33
+
+ moveli r0, hw2_last(PAGE_SIZE)
+ shl16insli r0, r0, hw1(PAGE_SIZE)
+ shl16insli r0, r0, hw0(PAGE_SIZE)
+ add r33, r33, r0
+
+ /* copy word at r10 to word at r11 until r11 equals r33 */
+
+ /* We know page size must be multiple of 8, so we can unroll
+ * 8 times safely without any edge case checking.
+ *
+ * Issue a flush of the destination every 8 words to avoid
+ * incoherence when starting the new kernel. (Now this is
+ * just good paranoia because the hv_reexec call will also
+ * take care of this.)
+ */
+
+1:
+ { ld r0, r10; addi r10, r10, 8 }
+ { st r11, r0; addi r11, r11, 8 }
+ { ld r0, r10; addi r10, r10, 8 }
+ { st r11, r0; addi r11, r11, 8 }
+ { ld r0, r10; addi r10, r10, 8 }
+ { st r11, r0; addi r11, r11, 8 }
+ { ld r0, r10; addi r10, r10, 8 }
+ { st r11, r0; addi r11, r11, 8 }
+ { ld r0, r10; addi r10, r10, 8 }
+ { st r11, r0; addi r11, r11, 8 }
+ { ld r0, r10; addi r10, r10, 8 }
+ { st r11, r0; addi r11, r11, 8 }
+ { ld r0, r10; addi r10, r10, 8 }
+ { st r11, r0; addi r11, r11, 8 }
+ { ld r0, r10; addi r10, r10, 8 }
+ { st r11, r0 }
+ { flush r11 ; addi r11, r11, 8 }
+
+ cmpeq r0, r33, r11
+ beqzt r0, 1b
+
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, 's'
+ jalr r40
+#endif
+
+ addi r30, r30, 8
+ j .Lloop
+
+
+.Lerr:
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, 'e'
+ jalr r40
+ moveli r0, 'r'
+ jalr r40
+ moveli r0, 'r'
+ jalr r40
+ moveli r0, '\n'
+ jalr r40
+#endif
+.Lhalt:
+ moveli r41, hw2_last(hv_halt)
+ shl16insli r41, r41, hw1(hv_halt)
+ shl16insli r41, r41, hw0(hv_halt)
+
+ jalr r41
+ STD_ENDPROC(relocate_new_kernel)
+
+ .section .rodata,"a"
+
+ .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+ .long .Lend_relocate_new_kernel - relocate_new_kernel
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 5f85d8b34db..112ababa9e5 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -23,11 +23,15 @@
#include <linux/irq.h>
#include <linux/kexec.h>
#include <linux/pci.h>
+#include <linux/swiotlb.h>
#include <linux/initrd.h>
#include <linux/io.h>
#include <linux/highmem.h>
#include <linux/smp.h>
#include <linux/timex.h>
+#include <linux/hugetlb.h>
+#include <linux/start_kernel.h>
+#include <linux/screen_info.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/cacheflush.h>
@@ -46,24 +50,41 @@ static inline int ABS(int x) { return x >= 0 ? x : -x; }
/* Chip information */
char chip_model[64] __write_once;
+#ifdef CONFIG_VT
+struct screen_info screen_info;
+#endif
+
struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
-/* We only create bootmem data on node 0. */
-static bootmem_data_t __initdata node0_bdata;
-
/* Information on the NUMA nodes that we compute early */
-unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES];
-unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES];
+unsigned long node_start_pfn[MAX_NUMNODES];
+unsigned long node_end_pfn[MAX_NUMNODES];
unsigned long __initdata node_memmap_pfn[MAX_NUMNODES];
unsigned long __initdata node_percpu_pfn[MAX_NUMNODES];
unsigned long __initdata node_free_pfn[MAX_NUMNODES];
static unsigned long __initdata node_percpu[MAX_NUMNODES];
+/*
+ * per-CPU stack and boot info.
+ */
+DEFINE_PER_CPU(unsigned long, boot_sp) =
+ (unsigned long)init_stack + THREAD_SIZE;
+
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel;
+#else
+/*
+ * The variable must be __initdata since it references __init code.
+ * With CONFIG_SMP it is per-cpu data, which is exempt from validation.
+ */
+unsigned long __initdata boot_pc = (unsigned long)start_kernel;
+#endif
+
#ifdef CONFIG_HIGHMEM
/* Page frame index of end of lowmem on each controller. */
-unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES];
+unsigned long node_lowmem_end_pfn[MAX_NUMNODES];
/* Number of pages that can be mapped into lowmem. */
static unsigned long __initdata mappable_physpages;
@@ -94,7 +115,7 @@ static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = {
};
static nodemask_t __initdata isolnodes;
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
enum { DEFAULT_PCI_RESERVE_MB = 64 };
static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB;
unsigned long __initdata pci_reserve_start_pfn = -1U;
@@ -103,13 +124,11 @@ unsigned long __initdata pci_reserve_end_pfn = -1U;
static int __init setup_maxmem(char *str)
{
- long maxmem_mb;
- if (str == NULL || strict_strtol(str, 0, &maxmem_mb) != 0 ||
- maxmem_mb == 0)
+ unsigned long long maxmem;
+ if (str == NULL || (maxmem = memparse(str, NULL)) == 0)
return -EINVAL;
- maxmem_pfn = (maxmem_mb >> (HPAGE_SHIFT - 20)) <<
- (HPAGE_SHIFT - PAGE_SHIFT);
+ maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT);
pr_info("Forcing RAM used to no more than %dMB\n",
maxmem_pfn >> (20 - PAGE_SHIFT));
return 0;
@@ -119,14 +138,15 @@ early_param("maxmem", setup_maxmem);
static int __init setup_maxnodemem(char *str)
{
char *endp;
- long maxnodemem_mb, node;
+ unsigned long long maxnodemem;
+ long node;
node = str ? simple_strtoul(str, &endp, 0) : INT_MAX;
- if (node >= MAX_NUMNODES || *endp != ':' ||
- strict_strtol(endp+1, 0, &maxnodemem_mb) != 0)
+ if (node >= MAX_NUMNODES || *endp != ':')
return -EINVAL;
- maxnodemem_pfn[node] = (maxnodemem_mb >> (HPAGE_SHIFT - 20)) <<
+ maxnodemem = memparse(endp+1, NULL);
+ maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) <<
(HPAGE_SHIFT - PAGE_SHIFT);
pr_info("Forcing RAM used on node %ld to no more than %dMB\n",
node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT));
@@ -134,6 +154,65 @@ static int __init setup_maxnodemem(char *str)
}
early_param("maxnodemem", setup_maxnodemem);
+struct memmap_entry {
+ u64 addr; /* start of memory segment */
+ u64 size; /* size of memory segment */
+};
+static struct memmap_entry memmap_map[64];
+static int memmap_nr;
+
+static void add_memmap_region(u64 addr, u64 size)
+{
+ if (memmap_nr >= ARRAY_SIZE(memmap_map)) {
+ pr_err("Ooops! Too many entries in the memory map!\n");
+ return;
+ }
+ memmap_map[memmap_nr].addr = addr;
+ memmap_map[memmap_nr].size = size;
+ memmap_nr++;
+}
+
+static int __init setup_memmap(char *p)
+{
+ char *oldp;
+ u64 start_at, mem_size;
+
+ if (!p)
+ return -EINVAL;
+
+ if (!strncmp(p, "exactmap", 8)) {
+ pr_err("\"memmap=exactmap\" not valid on tile\n");
+ return 0;
+ }
+
+ oldp = p;
+ mem_size = memparse(p, &p);
+ if (p == oldp)
+ return -EINVAL;
+
+ if (*p == '@') {
+ pr_err("\"memmap=nn@ss\" (force RAM) invalid on tile\n");
+ } else if (*p == '#') {
+ pr_err("\"memmap=nn#ss\" (force ACPI data) invalid on tile\n");
+ } else if (*p == '$') {
+ start_at = memparse(p+1, &p);
+ add_memmap_region(start_at, mem_size);
+ } else {
+ if (mem_size == 0)
+ return -EINVAL;
+ maxmem_pfn = (mem_size >> HPAGE_SHIFT) <<
+ (HPAGE_SHIFT - PAGE_SHIFT);
+ }
+ return *p == '\0' ? 0 : -EINVAL;
+}
+early_param("memmap", setup_memmap);
+
+static int __init setup_mem(char *str)
+{
+ return setup_maxmem(str);
+}
+early_param("mem", setup_mem); /* compatibility with x86 */
+
static int __init setup_isolnodes(char *str)
{
char buf[MAX_NUMNODES * 5];
@@ -146,18 +225,15 @@ static int __init setup_isolnodes(char *str)
}
early_param("isolnodes", setup_isolnodes);
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
static int __init setup_pci_reserve(char* str)
{
- unsigned long mb;
-
- if (str == NULL || strict_strtoul(str, 0, &mb) != 0 ||
- mb > 3 * 1024)
+ if (str == NULL || kstrtouint(str, 0, &pci_reserve_mb) != 0 ||
+ pci_reserve_mb > 3 * 1024)
return -EINVAL;
- pci_reserve_mb = mb;
pr_info("Reserving %dMB for PCIE root complex mappings\n",
- pci_reserve_mb);
+ pci_reserve_mb);
return 0;
}
early_param("pci_reserve", setup_pci_reserve);
@@ -189,7 +265,7 @@ early_param("vmalloc", parse_vmalloc);
/*
* Determine for each controller where its lowmem is mapped and how much of
* it is mapped there. On controller zero, the first few megabytes are
- * already mapped in as code at MEM_SV_INTRPT, so in principle we could
+ * already mapped in as code at MEM_SV_START, so in principle we could
* start our data mappings higher up, but for now we don't bother, to avoid
* additional confusion.
*
@@ -270,7 +346,7 @@ static void *__init setup_pa_va_mapping(void)
* This is up to 4 mappings for lowmem, one mapping per memory
* controller, plus one for our text segment.
*/
-static void __cpuinit store_permanent_mappings(void)
+static void store_permanent_mappings(void)
{
int i;
@@ -287,8 +363,8 @@ static void __cpuinit store_permanent_mappings(void)
hv_store_mapping(addr, pages << PAGE_SHIFT, pa);
}
- hv_store_mapping((HV_VirtAddr)_stext,
- (uint32_t)(_einittext - _stext), 0);
+ hv_store_mapping((HV_VirtAddr)_text,
+ (uint32_t)(_einittext - _text), 0);
}
/*
@@ -309,6 +385,7 @@ static void __init setup_memory(void)
#if defined(CONFIG_HIGHMEM) || defined(__tilegx__)
long lowmem_pages;
#endif
+ unsigned long physpages = 0;
/* We are using a char to hold the cpu_2_node[] mapping */
BUILD_BUG_ON(MAX_NUMNODES > 127);
@@ -368,8 +445,8 @@ static void __init setup_memory(void)
continue;
}
}
- if (num_physpages + PFN_DOWN(range.size) > maxmem_pfn) {
- int max_size = maxmem_pfn - num_physpages;
+ if (physpages + PFN_DOWN(range.size) > maxmem_pfn) {
+ int max_size = maxmem_pfn - physpages;
if (max_size > 0) {
pr_err("Maxmem reduced node %d to %d pages\n",
i, max_size);
@@ -397,7 +474,7 @@ static void __init setup_memory(void)
continue;
}
#endif
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
/*
* Blocks that overlap the pci reserved region must
* have enough space to hold the maximum percpu data
@@ -426,7 +503,7 @@ static void __init setup_memory(void)
node_start_pfn[i] = start;
node_end_pfn[i] = end;
node_controller[i] = range.controller;
- num_physpages += size;
+ physpages += size;
max_pfn = end;
/* Mark node as online */
@@ -445,7 +522,7 @@ static void __init setup_memory(void)
* we're willing to use at 8 million pages (32GB of 4KB pages).
*/
cap = 8 * 1024 * 1024; /* 8 million pages */
- if (num_physpages > cap) {
+ if (physpages > cap) {
int num_nodes = num_online_nodes();
int cap_each = cap / num_nodes;
unsigned long dropped_pages = 0;
@@ -456,10 +533,10 @@ static void __init setup_memory(void)
node_end_pfn[i] = node_start_pfn[i] + cap_each;
}
}
- num_physpages -= dropped_pages;
+ physpages -= dropped_pages;
pr_warning("Only using %ldMB memory;"
" ignoring %ldMB.\n",
- num_physpages >> (20 - PAGE_SHIFT),
+ physpages >> (20 - PAGE_SHIFT),
dropped_pages >> (20 - PAGE_SHIFT));
pr_warning("Consider using a larger page size.\n");
}
@@ -477,7 +554,7 @@ static void __init setup_memory(void)
lowmem_pages = (mappable_physpages > MAXMEM_PFN) ?
MAXMEM_PFN : mappable_physpages;
- highmem_pages = (long) (num_physpages - lowmem_pages);
+ highmem_pages = (long) (physpages - lowmem_pages);
pr_notice("%ldMB HIGHMEM available.\n",
pages_to_mb(highmem_pages > 0 ? highmem_pages : 0));
@@ -494,7 +571,6 @@ static void __init setup_memory(void)
pr_warning("Use a HIGHMEM enabled kernel.\n");
max_low_pfn = MAXMEM_PFN;
max_pfn = MAXMEM_PFN;
- num_physpages = MAXMEM_PFN;
node_end_pfn[0] = MAXMEM_PFN;
} else {
pr_notice("%ldMB memory available.\n",
@@ -519,41 +595,125 @@ static void __init setup_memory(void)
#endif
}
-static void __init setup_bootmem_allocator(void)
+/*
+ * On 32-bit machines, we only put bootmem on the low controller,
+ * since PAs > 4GB can't be used in bootmem. In principle one could
+ * imagine, e.g., multiple 1 GB controllers all of which could support
+ * bootmem, but in practice using controllers this small isn't a
+ * particularly interesting scenario, so we just keep it simple and
+ * use only the first controller for bootmem on 32-bit machines.
+ */
+static inline int node_has_bootmem(int nid)
+{
+#ifdef CONFIG_64BIT
+ return 1;
+#else
+ return nid == 0;
+#endif
+}
+
+static inline unsigned long alloc_bootmem_pfn(int nid,
+ unsigned long size,
+ unsigned long goal)
+{
+ void *kva = __alloc_bootmem_node(NODE_DATA(nid), size,
+ PAGE_SIZE, goal);
+ unsigned long pfn = kaddr_to_pfn(kva);
+ BUG_ON(goal && PFN_PHYS(pfn) != goal);
+ return pfn;
+}
+
+static void __init setup_bootmem_allocator_node(int i)
{
- unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn;
+ unsigned long start, end, mapsize, mapstart;
+
+ if (node_has_bootmem(i)) {
+ NODE_DATA(i)->bdata = &bootmem_node_data[i];
+ } else {
+ /* Share controller zero's bdata for now. */
+ NODE_DATA(i)->bdata = &bootmem_node_data[0];
+ return;
+ }
- /* Provide a node 0 bdata. */
- NODE_DATA(0)->bdata = &node0_bdata;
+ /* Skip up to after the bss in node 0. */
+ start = (i == 0) ? min_low_pfn : node_start_pfn[i];
-#ifdef CONFIG_PCI
- /* Don't let boot memory alias the PCI region. */
- last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn);
+ /* Only lowmem, if we're a HIGHMEM build. */
+#ifdef CONFIG_HIGHMEM
+ end = node_lowmem_end_pfn[i];
#else
- last_alloc_pfn = max_low_pfn;
+ end = node_end_pfn[i];
#endif
- /*
- * Initialize the boot-time allocator (with low memory only):
- * The first argument says where to put the bitmap, and the
- * second says where the end of allocatable memory is.
- */
- bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn);
+ /* No memory here. */
+ if (end == start)
+ return;
+
+ /* Figure out where the bootmem bitmap is located. */
+ mapsize = bootmem_bootmap_pages(end - start);
+ if (i == 0) {
+ /* Use some space right before the heap on node 0. */
+ mapstart = start;
+ start += mapsize;
+ } else {
+ /* Allocate bitmap on node 0 to avoid page table issues. */
+ mapstart = alloc_bootmem_pfn(0, PFN_PHYS(mapsize), 0);
+ }
+
+ /* Initialize a node. */
+ init_bootmem_node(NODE_DATA(i), mapstart, start, end);
+ /* Free all the space back into the allocator. */
+ free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start));
+
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
/*
- * Let the bootmem allocator use all the space we've given it
- * except for its own bitmap.
+ * Throw away any memory aliased by the PCI region.
*/
- first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size);
- if (first_alloc_pfn >= last_alloc_pfn)
- early_panic("Not enough memory on controller 0 for bootmem\n");
+ if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start) {
+ start = max(pci_reserve_start_pfn, start);
+ end = min(pci_reserve_end_pfn, end);
+ reserve_bootmem(PFN_PHYS(start), PFN_PHYS(end - start),
+ BOOTMEM_EXCLUSIVE);
+ }
+#endif
+}
+
+static void __init setup_bootmem_allocator(void)
+{
+ int i;
+ for (i = 0; i < MAX_NUMNODES; ++i)
+ setup_bootmem_allocator_node(i);
+
+ /* Reserve any memory excluded by "memmap" arguments. */
+ for (i = 0; i < memmap_nr; ++i) {
+ struct memmap_entry *m = &memmap_map[i];
+ reserve_bootmem(m->addr, m->size, BOOTMEM_DEFAULT);
+ }
- free_bootmem(PFN_PHYS(first_alloc_pfn),
- PFN_PHYS(last_alloc_pfn - first_alloc_pfn));
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (initrd_start) {
+ /* Make sure the initrd memory region is not modified. */
+ if (reserve_bootmem(initrd_start, initrd_end - initrd_start,
+ BOOTMEM_EXCLUSIVE)) {
+ pr_crit("The initrd memory region has been polluted. Disabling it.\n");
+ initrd_start = 0;
+ initrd_end = 0;
+ } else {
+ /*
+ * Translate initrd_start & initrd_end from PA to VA for
+ * future access.
+ */
+ initrd_start += PAGE_OFFSET;
+ initrd_end += PAGE_OFFSET;
+ }
+ }
+#endif
#ifdef CONFIG_KEXEC
if (crashk_res.start != crashk_res.end)
- reserve_bootmem(crashk_res.start, resource_size(&crashk_res), 0);
+ reserve_bootmem(crashk_res.start, resource_size(&crashk_res),
+ BOOTMEM_DEFAULT);
#endif
}
@@ -580,19 +740,13 @@ static int __init percpu_size(void)
return size;
}
-static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal)
-{
- void *kva = __alloc_bootmem(size, PAGE_SIZE, goal);
- unsigned long pfn = kaddr_to_pfn(kva);
- BUG_ON(goal && PFN_PHYS(pfn) != goal);
- return pfn;
-}
-
static void __init zone_sizes_init(void)
{
unsigned long zones_size[MAX_NR_ZONES] = { 0 };
int size = percpu_size();
int num_cpus = smp_height * smp_width;
+ const unsigned long dma_end = (1UL << (32 - PAGE_SHIFT));
+
int i;
for (i = 0; i < num_cpus; ++i)
@@ -625,21 +779,22 @@ static void __init zone_sizes_init(void)
* though, there'll be no lowmem, so we just alloc_bootmem
* the memmap. There will be no percpu memory either.
*/
- if (__pfn_to_highbits(start) == 0) {
- /* In low PAs, allocate via bootmem. */
+ if (i != 0 && cpu_isset(i, isolnodes)) {
+ node_memmap_pfn[i] =
+ alloc_bootmem_pfn(0, memmap_size, 0);
+ BUG_ON(node_percpu[i] != 0);
+ } else if (node_has_bootmem(start)) {
unsigned long goal = 0;
node_memmap_pfn[i] =
- alloc_bootmem_pfn(memmap_size, goal);
+ alloc_bootmem_pfn(i, memmap_size, 0);
if (kdata_huge)
goal = PFN_PHYS(lowmem_end) - node_percpu[i];
if (node_percpu[i])
node_percpu_pfn[i] =
- alloc_bootmem_pfn(node_percpu[i], goal);
- } else if (cpu_isset(i, isolnodes)) {
- node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0);
- BUG_ON(node_percpu[i] != 0);
+ alloc_bootmem_pfn(i, node_percpu[i],
+ goal);
} else {
- /* In high PAs, just reserve some pages. */
+ /* In non-bootmem zones, just reserve some pages. */
node_memmap_pfn[i] = node_free_pfn[i];
node_free_pfn[i] += PFN_UP(memmap_size);
if (!kdata_huge) {
@@ -663,23 +818,24 @@ static void __init zone_sizes_init(void)
zones_size[ZONE_NORMAL] = end - start;
#endif
- /*
- * Everyone shares node 0's bootmem allocator, but
- * we use alloc_remap(), above, to put the actual
- * struct page array on the individual controllers,
- * which is most of the data that we actually care about.
- * We can't place bootmem allocators on the other
- * controllers since the bootmem allocator can only
- * operate on 32-bit physical addresses.
- */
- NODE_DATA(i)->bdata = NODE_DATA(0)->bdata;
+ if (start < dma_end) {
+ zones_size[ZONE_DMA] = min(zones_size[ZONE_NORMAL],
+ dma_end - start);
+ zones_size[ZONE_NORMAL] -= zones_size[ZONE_DMA];
+ } else {
+ zones_size[ZONE_DMA] = 0;
+ }
+
+ /* Take zone metadata from controller 0 if we're isolnode. */
+ if (node_isset(i, isolnodes))
+ NODE_DATA(i)->bdata = &bootmem_node_data[0];
free_area_init_node(i, zones_size, start, NULL);
printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n",
PFN_UP(node_percpu[i]));
/* Track the type of memory on each node */
- if (zones_size[ZONE_NORMAL])
+ if (zones_size[ZONE_NORMAL] || zones_size[ZONE_DMA])
node_set_state(i, N_NORMAL_MEMORY);
#ifdef CONFIG_HIGHMEM
if (end != start)
@@ -855,13 +1011,29 @@ subsys_initcall(topology_init);
#endif /* CONFIG_NUMA */
+/*
+ * Initialize hugepage support on this cpu. We do this on all cores
+ * early in boot: before argument parsing for the boot cpu, and after
+ * argument parsing but before the init functions run on the secondaries.
+ * So the values we set up here in the hypervisor may be overridden on
+ * the boot cpu as arguments are parsed.
+ */
+static void init_super_pages(void)
+{
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+ int i;
+ for (i = 0; i < HUGE_SHIFT_ENTRIES; ++i)
+ hv_set_pte_super_shift(i, huge_shift[i]);
+#endif
+}
+
/**
* setup_cpu() - Do all necessary per-cpu, tile-specific initialization.
* @boot: Is this the boot cpu?
*
* Called from setup_arch() on the boot cpu, or online_secondary().
*/
-void __cpuinit setup_cpu(int boot)
+void setup_cpu(int boot)
{
/* The boot cpu sets up its permanent mappings much earlier. */
if (!boot)
@@ -872,9 +1044,6 @@ void __cpuinit setup_cpu(int boot)
arch_local_irq_unmask(INT_DMATLB_MISS);
arch_local_irq_unmask(INT_DMATLB_ACCESS);
#endif
-#if CHIP_HAS_SN_PROC()
- arch_local_irq_unmask(INT_SNITLB_MISS);
-#endif
#ifdef __tilegx__
arch_local_irq_unmask(INT_SINGLE_STEP_K);
#endif
@@ -889,10 +1058,6 @@ void __cpuinit setup_cpu(int boot)
/* Static network is not restricted. */
__insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1);
#endif
-#if CHIP_HAS_SN_PROC()
- __insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1);
- __insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1);
-#endif
/*
* Set the MPL for interrupt control 0 & 1 to the corresponding
@@ -909,12 +1074,14 @@ void __cpuinit setup_cpu(int boot)
/* Reset the network state on this cpu. */
reset_network_state();
#endif
+
+ init_super_pages();
}
#ifdef CONFIG_BLK_DEV_INITRD
static int __initdata set_initramfs_file;
-static char __initdata initramfs_file[128] = "initramfs.cpio.gz";
+static char __initdata initramfs_file[128] = "initramfs";
static int __init setup_initramfs_file(char *str)
{
@@ -928,9 +1095,9 @@ static int __init setup_initramfs_file(char *str)
early_param("initramfs_file", setup_initramfs_file);
/*
- * We look for an additional "initramfs.cpio.gz" file in the hvfs.
- * If there is one, we allocate some memory for it and it will be
- * unpacked to the initramfs after any built-in initramfs_data.
+ * We look for a file called "initramfs" in the hvfs. If there is one, we
+ * allocate some memory for it and it will be unpacked to the initramfs.
+ * If it's compressed, the initd code will uncompress it first.
*/
static void __init load_hv_initrd(void)
{
@@ -938,12 +1105,22 @@ static void __init load_hv_initrd(void)
int fd, rc;
void *initrd;
+ /* If initrd has already been set, skip initramfs file in hvfs. */
+ if (initrd_start)
+ return;
+
fd = hv_fs_findfile((HV_VirtAddr) initramfs_file);
if (fd == HV_ENOENT) {
- if (set_initramfs_file)
+ if (set_initramfs_file) {
pr_warning("No such hvfs initramfs file '%s'\n",
initramfs_file);
- return;
+ return;
+ } else {
+ /* Try old backwards-compatible name. */
+ fd = hv_fs_findfile((HV_VirtAddr)"initramfs.cpio.gz");
+ if (fd == HV_ENOENT)
+ return;
+ }
}
BUG_ON(fd < 0);
stat = hv_fs_fstat(fd);
@@ -970,6 +1147,25 @@ void __init free_initrd_mem(unsigned long begin, unsigned long end)
free_bootmem(__pa(begin), end - begin);
}
+static int __init setup_initrd(char *str)
+{
+ char *endp;
+ unsigned long initrd_size;
+
+ initrd_size = str ? simple_strtoul(str, &endp, 0) : 0;
+ if (initrd_size == 0 || *endp != '@')
+ return -EINVAL;
+
+ initrd_start = simple_strtoul(endp+1, &endp, 0);
+ if (initrd_start == 0)
+ return -EINVAL;
+
+ initrd_end = initrd_start + initrd_size;
+
+ return 0;
+}
+early_param("initrd", setup_initrd);
+
#else
static inline void load_hv_initrd(void) {}
#endif /* CONFIG_BLK_DEV_INITRD */
@@ -1037,7 +1233,7 @@ static void __init validate_va(void)
#ifndef __tilegx__ /* FIXME: GX: probably some validation relevant here */
/*
* Similarly, make sure we're only using allowed VAs.
- * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_INTRPT,
+ * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_START,
* and 0 .. KERNEL_HIGH_VADDR.
* In addition, make sure we CAN'T use the end of memory, since
* we use the last chunk of each pgd for the pgd_list.
@@ -1052,7 +1248,7 @@ static void __init validate_va(void)
if (range.size == 0)
break;
if (range.start <= MEM_USER_INTRPT &&
- range.start + range.size >= MEM_HV_INTRPT)
+ range.start + range.size >= MEM_HV_START)
user_kernel_ok = 1;
if (range.start == 0)
max_va = range.size;
@@ -1070,8 +1266,7 @@ static void __init validate_va(void)
if ((long)VMALLOC_START >= 0)
early_panic(
"Linux VMALLOC region below the 2GB line (%#lx)!\n"
- "Reconfigure the kernel with fewer NR_HUGE_VMAPS\n"
- "or smaller VMALLOC_RESERVE.\n",
+ "Reconfigure the kernel with smaller VMALLOC_RESERVE.\n",
VMALLOC_START);
#endif
}
@@ -1086,7 +1281,6 @@ static void __init validate_va(void)
struct cpumask __write_once cpu_lotar_map;
EXPORT_SYMBOL(cpu_lotar_map);
-#if CHIP_HAS_CBOX_HOME_MAP()
/*
* hash_for_home_map lists all the tiles that hash-for-home data
* will be cached on. Note that this may includes tiles that are not
@@ -1096,11 +1290,10 @@ EXPORT_SYMBOL(cpu_lotar_map);
*/
struct cpumask hash_for_home_map;
EXPORT_SYMBOL(hash_for_home_map);
-#endif
/*
* cpu_cacheable_map lists all the cpus whose caches the hypervisor can
- * flush on our behalf. It is set to cpu_possible_map OR'ed with
+ * flush on our behalf. It is set to cpu_possible_mask OR'ed with
* hash_for_home_map, and it is what should be passed to
* hv_flush_remote() to flush all caches. Note that if there are
* dedicated hypervisor driver tiles that have authorized use of their
@@ -1186,20 +1379,16 @@ static void __init setup_cpu_maps(void)
sizeof(cpu_lotar_map));
if (rc < 0) {
pr_err("warning: no HV_INQ_TILES_LOTAR; using AVAIL\n");
- cpu_lotar_map = cpu_possible_map;
+ cpu_lotar_map = *cpu_possible_mask;
}
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Retrieve set of CPUs used for hash-for-home caching */
rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE,
(HV_VirtAddr) hash_for_home_map.bits,
sizeof(hash_for_home_map));
if (rc < 0)
early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc);
- cpumask_or(&cpu_cacheable_map, &cpu_possible_map, &hash_for_home_map);
-#else
- cpu_cacheable_map = cpu_possible_map;
-#endif
+ cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map);
}
@@ -1259,7 +1448,7 @@ void __init setup_arch(char **cmdline_p)
setup_cpu_maps();
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
/*
* Initialize the PCI structures. This is done before memory
* setup so that we know whether or not a pci_reserve region
@@ -1288,6 +1477,10 @@ void __init setup_arch(char **cmdline_p)
* any memory using the bootmem allocator.
*/
+#ifdef CONFIG_SWIOTLB
+ swiotlb_init(0);
+#endif
+
paging_init();
setup_numa_mapping();
zone_sizes_init();
@@ -1390,26 +1583,26 @@ void __init setup_per_cpu_areas(void)
for (i = 0; i < size; i += PAGE_SIZE, ++pfn, ++pg) {
/* Update the vmalloc mapping and page home. */
- pte_t *ptep =
- virt_to_pte(NULL, (unsigned long)ptr + i);
+ unsigned long addr = (unsigned long)ptr + i;
+ pte_t *ptep = virt_to_kpte(addr);
pte_t pte = *ptep;
BUG_ON(pfn != pte_pfn(pte));
pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3);
pte = set_remote_cache_cpu(pte, cpu);
- set_pte(ptep, pte);
+ set_pte_at(&init_mm, addr, ptep, pte);
/* Update the lowmem mapping for consistency. */
lowmem_va = (unsigned long)pfn_to_kaddr(pfn);
- ptep = virt_to_pte(NULL, lowmem_va);
+ ptep = virt_to_kpte(lowmem_va);
if (pte_huge(*ptep)) {
printk(KERN_DEBUG "early shatter of huge page"
" at %#lx\n", lowmem_va);
shatter_pmd((pmd_t *)ptep);
- ptep = virt_to_pte(NULL, lowmem_va);
+ ptep = virt_to_kpte(lowmem_va);
BUG_ON(pte_huge(*ptep));
}
BUG_ON(pfn != pte_pfn(*ptep));
- set_pte(ptep, pte);
+ set_pte_at(&init_mm, lowmem_va, ptep, pte);
}
}
@@ -1438,16 +1631,17 @@ static struct resource code_resource = {
};
/*
- * We reserve all resources above 4GB so that PCI won't try to put
- * mappings above 4GB; the standard allows that for some devices but
- * the probing code trunates values to 32 bits.
+ * On Pro, we reserve all resources above 4GB so that PCI won't try to put
+ * mappings above 4GB.
*/
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
static struct resource* __init
insert_non_bus_resource(void)
{
struct resource *res =
kzalloc(sizeof(struct resource), GFP_ATOMIC);
+ if (!res)
+ return NULL;
res->name = "Non-Bus Physical Address Space";
res->start = (1ULL << 32);
res->end = -1LL;
@@ -1461,11 +1655,13 @@ insert_non_bus_resource(void)
#endif
static struct resource* __init
-insert_ram_resource(u64 start_pfn, u64 end_pfn)
+insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved)
{
struct resource *res =
kzalloc(sizeof(struct resource), GFP_ATOMIC);
- res->name = "System RAM";
+ if (!res)
+ return NULL;
+ res->name = reserved ? "Reserved" : "System RAM";
res->start = start_pfn << PAGE_SHIFT;
res->end = (end_pfn << PAGE_SHIFT) - 1;
res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
@@ -1485,10 +1681,9 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn)
static int __init request_standard_resources(void)
{
int i;
- enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
+ enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET };
- iomem_resource.end = -1LL;
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
insert_non_bus_resource();
#endif
@@ -1496,16 +1691,16 @@ static int __init request_standard_resources(void)
u64 start_pfn = node_start_pfn[i];
u64 end_pfn = node_end_pfn[i];
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && !defined(__tilegx__)
if (start_pfn <= pci_reserve_start_pfn &&
end_pfn > pci_reserve_start_pfn) {
if (end_pfn > pci_reserve_end_pfn)
insert_ram_resource(pci_reserve_end_pfn,
- end_pfn);
+ end_pfn, 0);
end_pfn = pci_reserve_start_pfn;
}
#endif
- insert_ram_resource(start_pfn, end_pfn);
+ insert_ram_resource(start_pfn, end_pfn, 0);
}
code_resource.start = __pa(_text - CODE_DELTA);
@@ -1516,6 +1711,13 @@ static int __init request_standard_resources(void)
insert_resource(&iomem_resource, &code_resource);
insert_resource(&iomem_resource, &data_resource);
+ /* Mark any "memmap" regions busy for the resource manager. */
+ for (i = 0; i < memmap_nr; ++i) {
+ struct memmap_entry *m = &memmap_map[i];
+ insert_ram_resource(PFN_DOWN(m->addr),
+ PFN_UP(m->addr + m->size - 1), 1);
+ }
+
#ifdef CONFIG_KEXEC
insert_resource(&iomem_resource, &crashk_res);
#endif
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index f79d4b88c74..d1d026f0126 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -33,19 +33,11 @@
#include <asm/ucontext.h>
#include <asm/sigframe.h>
#include <asm/syscalls.h>
+#include <asm/vdso.h>
#include <arch/interrupts.h>
#define DEBUG_SIG 0
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss,
- stack_t __user *, uoss, struct pt_regs *, regs)
-{
- return do_sigaltstack(uss, uoss, regs->sp);
-}
-
-
/*
* Do a signal return; undo the signal stack.
*/
@@ -85,8 +77,9 @@ void signal_fault(const char *type, struct pt_regs *regs,
}
/* The assembly shim for this function arranges to ignore the return value. */
-SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs)
+SYSCALL_DEFINE0(rt_sigreturn)
{
+ struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame =
(struct rt_sigframe __user *)(regs->sp);
sigset_t set;
@@ -96,13 +89,12 @@ SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
- if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+ if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
return 0;
@@ -193,17 +185,13 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __clear_user(&frame->save_area, sizeof(frame->save_area));
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(NULL, &frame->uc.uc_link);
- err |= __put_user((void __user *)(current->sas_ss_sp),
- &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
err |= setup_sigcontext(&frame->uc.uc_mcontext, regs);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
goto give_sigsegv;
- restorer = VDSO_BASE;
+ restorer = VDSO_SYM(&__vdso_rt_sigreturn);
if (ka->sa.sa_flags & SA_RESTORER)
restorer = (unsigned long) ka->sa.sa_restorer;
@@ -222,15 +210,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->regs[1] = (unsigned long) &frame->info;
regs->regs[2] = (unsigned long) &frame->uc;
regs->flags |= PT_FLAGS_CALLER_SAVES;
-
- /*
- * Notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
- */
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
return 0;
give_sigsegv:
@@ -242,10 +221,11 @@ give_sigsegv:
* OK, we're invoking a handler
*/
-static int handle_signal(unsigned long sig, siginfo_t *info,
- struct k_sigaction *ka, sigset_t *oldset,
+static void handle_signal(unsigned long sig, siginfo_t *info,
+ struct k_sigaction *ka,
struct pt_regs *regs)
{
+ sigset_t *oldset = sigmask_to_save();
int ret;
/* Are we from a system call? */
@@ -278,15 +258,10 @@ static int handle_signal(unsigned long sig, siginfo_t *info,
else
#endif
ret = setup_rt_frame(sig, ka, info, oldset, regs);
- if (ret == 0) {
- /* This code is only called from system calls or from
- * the work_pending path in the return-to-user code, and
- * either way we can re-enable interrupts unconditionally.
- */
- block_sigmask(ka, sig);
- }
-
- return ret;
+ if (ret)
+ return;
+ signal_delivered(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLESTEP));
}
/*
@@ -299,7 +274,6 @@ void do_signal(struct pt_regs *regs)
siginfo_t info;
int signr;
struct k_sigaction ka;
- sigset_t *oldset;
/*
* i386 will check if we're coming from kernel mode and bail out
@@ -308,24 +282,10 @@ void do_signal(struct pt_regs *regs)
* helpful, we can reinstate the check on "!user_mode(regs)".
*/
- if (current_thread_info()->status & TS_RESTORE_SIGMASK)
- oldset = &current->saved_sigmask;
- else
- oldset = &current->blocked;
-
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
/* Whee! Actually deliver the signal. */
- if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
- /*
- * A signal was successfully delivered; the saved
- * sigmask will have been stored in the signal frame,
- * and will be restored by sigreturn, so we can simply
- * clear the TS_RESTORE_SIGMASK flag.
- */
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
- }
-
+ handle_signal(signr, &info, &ka, regs);
goto done;
}
@@ -350,10 +310,7 @@ void do_signal(struct pt_regs *regs)
}
/* If there's no signal to deliver, just put the saved sigmask back. */
- if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
- sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
- }
+ restore_saved_sigmask();
done:
/* Avoid double syscall restart if there are nested signals. */
@@ -364,14 +321,13 @@ int show_unhandled_signals = 1;
static int __init crashinfo(char *str)
{
- unsigned long val;
const char *word;
if (*str == '\0')
- val = 2;
- else if (*str != '=' || strict_strtoul(++str, 0, &val) != 0)
+ show_unhandled_signals = 2;
+ else if (*str != '=' || kstrtoint(++str, 0, &show_unhandled_signals) != 0)
return 0;
- show_unhandled_signals = val;
+
switch (show_unhandled_signals) {
case 0:
word = "No";
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index b7a87950408..de07fa7d131 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -12,40 +12,30 @@
* more details.
*
* A code-rewriter that enables instruction single-stepping.
- * Derived from iLib's single-stepping code.
*/
-#ifndef __tilegx__ /* Hardware support for single step unavailable. */
-
-/* These functions are only used on the TILE platform */
+#include <linux/smp.h>
+#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/uaccess.h>
#include <linux/mman.h>
#include <linux/types.h>
#include <linux/err.h>
+#include <linux/prctl.h>
#include <asm/cacheflush.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
#include <arch/abi.h>
+#include <arch/spr_def.h>
#include <arch/opcode.h>
-#define signExtend17(val) sign_extend((val), 17)
-#define TILE_X1_MASK (0xffffffffULL << 31)
-
-int unaligned_printk;
-static int __init setup_unaligned_printk(char *str)
-{
- long val;
- if (strict_strtol(str, 0, &val) != 0)
- return 0;
- unaligned_printk = val;
- pr_info("Printk for each unaligned data accesses is %s\n",
- unaligned_printk ? "enabled" : "disabled");
- return 1;
-}
-__setup("unaligned_printk=", setup_unaligned_printk);
+#ifndef __tilegx__ /* Hardware support for single step unavailable. */
-unsigned int unaligned_fixup_count;
+#define signExtend17(val) sign_extend((val), 17)
+#define TILE_X1_MASK (0xffffffffULL << 31)
enum mem_op {
MEMOP_NONE,
@@ -55,12 +45,13 @@ enum mem_op {
MEMOP_STORE_POSTINCR
};
-static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset)
+static inline tilepro_bundle_bits set_BrOff_X1(tilepro_bundle_bits n,
+ s32 offset)
{
- tile_bundle_bits result;
+ tilepro_bundle_bits result;
/* mask out the old offset */
- tile_bundle_bits mask = create_BrOff_X1(-1);
+ tilepro_bundle_bits mask = create_BrOff_X1(-1);
result = n & (~mask);
/* or in the new offset */
@@ -69,10 +60,11 @@ static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset)
return result;
}
-static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
+static inline tilepro_bundle_bits move_X1(tilepro_bundle_bits n, int dest,
+ int src)
{
- tile_bundle_bits result;
- tile_bundle_bits op;
+ tilepro_bundle_bits result;
+ tilepro_bundle_bits op;
result = n & (~TILE_X1_MASK);
@@ -86,13 +78,13 @@ static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
return result;
}
-static inline tile_bundle_bits nop_X1(tile_bundle_bits n)
+static inline tilepro_bundle_bits nop_X1(tilepro_bundle_bits n)
{
return move_X1(n, TREG_ZERO, TREG_ZERO);
}
-static inline tile_bundle_bits addi_X1(
- tile_bundle_bits n, int dest, int src, int imm)
+static inline tilepro_bundle_bits addi_X1(
+ tilepro_bundle_bits n, int dest, int src, int imm)
{
n &= ~TILE_X1_MASK;
@@ -106,15 +98,26 @@ static inline tile_bundle_bits addi_X1(
return n;
}
-static tile_bundle_bits rewrite_load_store_unaligned(
+static tilepro_bundle_bits rewrite_load_store_unaligned(
struct single_step_state *state,
- tile_bundle_bits bundle,
+ tilepro_bundle_bits bundle,
struct pt_regs *regs,
enum mem_op mem_op,
int size, int sign_ext)
{
unsigned char __user *addr;
int val_reg, addr_reg, err, val;
+ int align_ctl;
+
+ align_ctl = unaligned_fixup;
+ switch (task_thread_info(current)->align_ctl) {
+ case PR_UNALIGN_NOPRINT:
+ align_ctl = 1;
+ break;
+ case PR_UNALIGN_SIGBUS:
+ align_ctl = 0;
+ break;
+ }
/* Get address and value registers */
if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) {
@@ -152,9 +155,25 @@ static tile_bundle_bits rewrite_load_store_unaligned(
if (((unsigned long)addr % size) == 0)
return bundle;
-#ifndef __LITTLE_ENDIAN
-# error We assume little-endian representation with copy_xx_user size 2 here
-#endif
+ /*
+ * Return SIGBUS with the unaligned address, if requested.
+ * Note that we return SIGBUS even for completely invalid addresses
+ * as long as they are in fact unaligned; this matches what the
+ * tilepro hardware would be doing, if it could provide us with the
+ * actual bad address in an SPR, which it doesn't.
+ */
+ if (align_ctl == 0) {
+ siginfo_t info = {
+ .si_signo = SIGBUS,
+ .si_code = BUS_ADRALN,
+ .si_addr = addr
+ };
+ trace_unhandled_signal("unaligned trap", regs,
+ (unsigned long)addr, SIGBUS);
+ force_sig_info(info.si_signo, &info, current);
+ return (tilepro_bundle_bits) 0;
+ }
+
/* Handle unaligned load/store */
if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) {
unsigned short val_16;
@@ -175,32 +194,31 @@ static tile_bundle_bits rewrite_load_store_unaligned(
state->update = 1;
}
} else {
+ unsigned short val_16;
val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg];
- err = copy_to_user(addr, &val, size);
+ switch (size) {
+ case 2:
+ val_16 = val;
+ err = copy_to_user(addr, &val_16, sizeof(val_16));
+ break;
+ case 4:
+ err = copy_to_user(addr, &val, sizeof(val));
+ break;
+ default:
+ BUG();
+ }
}
if (err) {
siginfo_t info = {
- .si_signo = SIGSEGV,
- .si_code = SEGV_MAPERR,
- .si_addr = addr
- };
- trace_unhandled_signal("segfault", regs,
- (unsigned long)addr, SIGSEGV);
- force_sig_info(info.si_signo, &info, current);
- return (tile_bundle_bits) 0;
- }
-
- if (unaligned_fixup == 0) {
- siginfo_t info = {
.si_signo = SIGBUS,
.si_code = BUS_ADRALN,
.si_addr = addr
};
- trace_unhandled_signal("unaligned trap", regs,
+ trace_unhandled_signal("bad address for unaligned fixup", regs,
(unsigned long)addr, SIGBUS);
force_sig_info(info.si_signo, &info, current);
- return (tile_bundle_bits) 0;
+ return (tilepro_bundle_bits) 0;
}
if (unaligned_printk || unaligned_fixup_count == 0) {
@@ -269,7 +287,7 @@ void single_step_execve(void)
ti->step_state = NULL;
}
-/**
+/*
* single_step_once() - entry point when single stepping has been triggered.
* @regs: The machine register state
*
@@ -288,20 +306,31 @@ void single_step_execve(void)
*/
void single_step_once(struct pt_regs *regs)
{
- extern tile_bundle_bits __single_step_ill_insn;
- extern tile_bundle_bits __single_step_j_insn;
- extern tile_bundle_bits __single_step_addli_insn;
- extern tile_bundle_bits __single_step_auli_insn;
+ extern tilepro_bundle_bits __single_step_ill_insn;
+ extern tilepro_bundle_bits __single_step_j_insn;
+ extern tilepro_bundle_bits __single_step_addli_insn;
+ extern tilepro_bundle_bits __single_step_auli_insn;
struct thread_info *info = (void *)current_thread_info();
struct single_step_state *state = info->step_state;
int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
- tile_bundle_bits __user *buffer, *pc;
- tile_bundle_bits bundle;
+ tilepro_bundle_bits __user *buffer, *pc;
+ tilepro_bundle_bits bundle;
int temp_reg;
int target_reg = TREG_LR;
int err;
enum mem_op mem_op = MEMOP_NONE;
int size = 0, sign_ext = 0; /* happy compiler */
+ int align_ctl;
+
+ align_ctl = unaligned_fixup;
+ switch (task_thread_info(current)->align_ctl) {
+ case PR_UNALIGN_NOPRINT:
+ align_ctl = 1;
+ break;
+ case PR_UNALIGN_SIGBUS:
+ align_ctl = 0;
+ break;
+ }
asm(
" .pushsection .rodata.single_step\n"
@@ -338,12 +367,10 @@ void single_step_once(struct pt_regs *regs)
}
/* allocate a cache line of writable, executable memory */
- down_write(&current->mm->mmap_sem);
- buffer = (void __user *) do_mmap(NULL, 0, 64,
+ buffer = (void __user *) vm_mmap(NULL, 0, 64,
PROT_EXEC | PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
0);
- up_write(&current->mm->mmap_sem);
if (IS_ERR((void __force *)buffer)) {
kfree(state);
@@ -376,7 +403,7 @@ void single_step_once(struct pt_regs *regs)
if (regs->faultnum == INT_SWINT_1)
regs->pc -= 8;
- pc = (tile_bundle_bits __user *)(regs->pc);
+ pc = (tilepro_bundle_bits __user *)(regs->pc);
if (get_user(bundle, pc) != 0) {
pr_err("Couldn't read instruction at %p trying to step\n", pc);
return;
@@ -519,7 +546,6 @@ void single_step_once(struct pt_regs *regs)
}
break;
-#if CHIP_HAS_WH64()
/* postincrement operations */
case IMM_0_OPCODE_X1:
switch (get_ImmOpcodeExtension_X1(bundle)) {
@@ -554,7 +580,6 @@ void single_step_once(struct pt_regs *regs)
break;
}
break;
-#endif /* CHIP_HAS_WH64() */
}
if (state->update) {
@@ -613,9 +638,9 @@ void single_step_once(struct pt_regs *regs)
/*
* Check if we need to rewrite an unaligned load/store.
- * Returning zero is a special value meaning we need to SIGSEGV.
+ * Returning zero is a special value meaning we generated a signal.
*/
- if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) {
+ if (mem_op != MEMOP_NONE && align_ctl >= 0) {
bundle = rewrite_load_store_unaligned(state, bundle, regs,
mem_op, size, sign_ext);
if (bundle == 0)
@@ -654,9 +679,9 @@ void single_step_once(struct pt_regs *regs)
}
/* End with a jump back to the next instruction */
- delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) -
+ delta = ((regs->pc + TILEPRO_BUNDLE_SIZE_IN_BYTES) -
(unsigned long)buffer) >>
- TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
+ TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
bundle = __single_step_j_insn;
bundle |= create_JOffLong_X1(delta);
err |= __put_user(bundle, buffer++);
@@ -684,9 +709,6 @@ void single_step_once(struct pt_regs *regs)
}
#else
-#include <linux/smp.h>
-#include <linux/ptrace.h>
-#include <arch/spr_def.h>
static DEFINE_PER_CPU(unsigned long, ss_saved_pc);
@@ -729,10 +751,10 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num)
} else if ((*ss_pc != regs->pc) ||
(!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) {
- ptrace_notify(SIGTRAP);
control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK;
control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK;
__insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control);
+ send_sigtrap(current, regs);
}
}
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index c52224d5ed4..01e8ab29f43 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -20,8 +20,13 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <asm/cacheflush.h>
+#include <asm/homecache.h>
-HV_Topology smp_topology __write_once;
+/*
+ * We write to width and height with a single store in head_NN.S,
+ * so make the variable aligned to "long".
+ */
+HV_Topology smp_topology __write_once __aligned(sizeof(long));
EXPORT_SYMBOL(smp_topology);
#if CHIP_HAS_IPI()
@@ -87,25 +92,6 @@ void send_IPI_allbutself(int tag)
send_IPI_many(&mask, tag);
}
-
-/*
- * Provide smp_call_function_mask, but also run function locally
- * if specified in the mask.
- */
-void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *),
- void *info, bool wait)
-{
- int cpu = get_cpu();
- smp_call_function_many(mask, func, info, wait);
- if (cpumask_test_cpu(cpu, mask)) {
- local_irq_disable();
- func(info);
- local_irq_enable();
- }
- put_cpu();
-}
-
-
/*
* Functions related to starting/stopping cpus.
*/
@@ -119,10 +105,10 @@ static void smp_start_cpu_interrupt(void)
/* Handler to stop the current cpu. */
static void smp_stop_cpu_interrupt(void)
{
- set_cpu_online(smp_processor_id(), 0);
arch_local_irq_disable_all();
+ set_cpu_online(smp_processor_id(), 0);
for (;;)
- asm("nap");
+ asm("nap; nop");
}
/* This function calls the 'stop' function on all other CPUs in the system. */
@@ -132,6 +118,12 @@ void smp_send_stop(void)
send_IPI_allbutself(MSG_TAG_STOP_CPU);
}
+/* On panic, just wait; we may get an smp_send_stop() later on. */
+void panic_smp_self_stop(void)
+{
+ while (1)
+ asm("nap; nop");
+}
/*
* Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages.
@@ -180,9 +172,16 @@ static void ipi_flush_icache_range(void *info)
void flush_icache_range(unsigned long start, unsigned long end)
{
struct ipi_flush flush = { start, end };
- preempt_disable();
- on_each_cpu(ipi_flush_icache_range, &flush, 1);
- preempt_enable();
+
+ /* If invoked with irqs disabled, we can not issue IPIs. */
+ if (irqs_disabled())
+ flush_remote(0, HV_FLUSH_EVICT_L1I, NULL, 0, 0, 0,
+ NULL, NULL, 0);
+ else {
+ preempt_disable();
+ on_each_cpu(ipi_flush_icache_range, &flush, 1);
+ preempt_enable();
+ }
}
@@ -216,7 +215,7 @@ void __init ipi_init(void)
if (hv_get_ipi_pte(tile, KERNEL_PL, &pte) != 0)
panic("Failed to initialize IPI for cpu %d\n", cpu);
- offset = hv_pte_get_pfn(pte) << PAGE_SHIFT;
+ offset = PFN_PHYS(pte_pfn(pte));
ipi_mappings[cpu] = ioremap_prot(offset, PAGE_SIZE, pte);
}
#endif
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index b949edcec20..732e9d13866 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -133,22 +133,24 @@ static __init int reset_init_affinity(void)
}
late_initcall(reset_init_affinity);
-static struct cpumask cpu_started __cpuinitdata;
+static struct cpumask cpu_started;
/*
* Activate a secondary processor. Very minimal; don't add anything
* to this path without knowing what you're doing, since SMP booting
* is pretty fragile.
*/
-static void __cpuinit start_secondary(void)
+static void start_secondary(void)
{
- int cpuid = smp_processor_id();
+ int cpuid;
+
+ preempt_disable();
+
+ cpuid = smp_processor_id();
/* Set our thread pointer appropriately. */
set_my_cpu_offset(__per_cpu_offset[cpuid]);
- preempt_disable();
-
/*
* In large machines even this will slow us down, since we
* will be contending for for the printk spinlock.
@@ -183,7 +185,7 @@ static void __cpuinit start_secondary(void)
/*
* Bring a secondary processor online.
*/
-void __cpuinit online_secondary(void)
+void online_secondary(void)
{
/*
* low-memory mappings have been cleared, flush them from
@@ -196,17 +198,9 @@ void __cpuinit online_secondary(void)
/* This must be done before setting cpu_online_mask */
wmb();
- /*
- * We need to hold call_lock, so there is no inconsistency
- * between the time smp_call_function() determines number of
- * IPI recipients, and the time when the determination is made
- * for which cpus receive the IPI. Holding this
- * lock helps us to not include this cpu in a currently in progress
- * smp_call_function().
- */
- ipi_call_lock();
+ notify_cpu_starting(smp_processor_id());
+
set_cpu_online(smp_processor_id(), 1);
- ipi_call_unlock();
__get_cpu_var(cpu_state) = CPU_ONLINE;
/* Set up tile-specific state for this cpu. */
@@ -215,12 +209,10 @@ void __cpuinit online_secondary(void)
/* Set up tile-timer clock-event device on this cpu */
setup_tile_timer();
- preempt_enable();
-
- cpu_idle();
+ cpu_startup_entry(CPUHP_ONLINE);
}
-int __cpuinit __cpu_up(unsigned int cpu)
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
/* Wait 5s total for all CPUs for them to come online */
static int timeout;
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index 37ee4d037e0..c93977a6211 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -21,12 +21,16 @@
#include <linux/stacktrace.h>
#include <linux/uaccess.h>
#include <linux/mmzone.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/string.h>
#include <asm/backtrace.h>
#include <asm/page.h>
-#include <asm/tlbflush.h>
#include <asm/ucontext.h>
+#include <asm/switch_to.h>
#include <asm/sigframe.h>
#include <asm/stack.h>
+#include <asm/vdso.h>
#include <arch/abi.h>
#include <arch/interrupts.h>
@@ -44,72 +48,23 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, unsigned long sp)
return sp >= kstack_base && sp < kstack_base + THREAD_SIZE;
}
-/* Is address valid for reading? */
-static int valid_address(struct KBacktraceIterator *kbt, unsigned long address)
-{
- HV_PTE *l1_pgtable = kbt->pgtable;
- HV_PTE *l2_pgtable;
- unsigned long pfn;
- HV_PTE pte;
- struct page *page;
-
- if (l1_pgtable == NULL)
- return 0; /* can't read user space in other tasks */
-
-#ifdef CONFIG_64BIT
- /* Find the real l1_pgtable by looking in the l0_pgtable. */
- pte = l1_pgtable[HV_L0_INDEX(address)];
- if (!hv_pte_get_present(pte))
- return 0;
- pfn = hv_pte_get_pfn(pte);
- if (pte_huge(pte)) {
- if (!pfn_valid(pfn)) {
- pr_err("L0 huge page has bad pfn %#lx\n", pfn);
- return 0;
- }
- return hv_pte_get_present(pte) && hv_pte_get_readable(pte);
- }
- page = pfn_to_page(pfn);
- BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */
- l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn);
-#endif
- pte = l1_pgtable[HV_L1_INDEX(address)];
- if (!hv_pte_get_present(pte))
- return 0;
- pfn = hv_pte_get_pfn(pte);
- if (pte_huge(pte)) {
- if (!pfn_valid(pfn)) {
- pr_err("huge page has bad pfn %#lx\n", pfn);
- return 0;
- }
- return hv_pte_get_present(pte) && hv_pte_get_readable(pte);
- }
-
- page = pfn_to_page(pfn);
- if (PageHighMem(page)) {
- pr_err("L2 page table not in LOWMEM (%#llx)\n",
- HV_PFN_TO_CPA(pfn));
- return 0;
- }
- l2_pgtable = (HV_PTE *)pfn_to_kaddr(pfn);
- pte = l2_pgtable[HV_L2_INDEX(address)];
- return hv_pte_get_present(pte) && hv_pte_get_readable(pte);
-}
-
/* Callback for backtracer; basically a glorified memcpy */
static bool read_memory_func(void *result, unsigned long address,
unsigned int size, void *vkbt)
{
int retval;
struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt;
+
+ if (address == 0)
+ return 0;
if (__kernel_text_address(address)) {
/* OK to read kernel code. */
} else if (address >= PAGE_OFFSET) {
/* We only tolerate kernel-space reads of this task's stack */
if (!in_kernel_stack(kbt, address))
return 0;
- } else if (!valid_address(kbt, address)) {
- return 0; /* invalid user-space address */
+ } else if (!kbt->is_current) {
+ return 0; /* can't read from other user address spaces */
}
pagefault_disable();
retval = __copy_from_user_inatomic(result,
@@ -127,6 +82,8 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
unsigned long sp = kbt->it.sp;
struct pt_regs *p;
+ if (sp % sizeof(long) != 0)
+ return NULL;
if (!in_kernel_stack(kbt, sp))
return NULL;
if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1))
@@ -147,9 +104,8 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
p->sp >= sp) {
if (kbt->verbose)
pr_err(" <%s while in kernel mode>\n", fault);
- } else if (EX1_PL(p->ex1) == USER_PL &&
- p->pc < PAGE_OFFSET &&
- p->sp < PAGE_OFFSET) {
+ } else if (user_mode(p) &&
+ p->sp < PAGE_OFFSET && p->sp != 0) {
if (kbt->verbose)
pr_err(" <%s while in user mode>\n", fault);
} else if (kbt->verbose) {
@@ -157,7 +113,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
p->pc, p->sp, p->ex1);
p = NULL;
}
- if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0)
+ if (!kbt->profile || ((1ULL << p->faultnum) & QUEUED_INTERRUPTS) == 0)
return p;
return NULL;
}
@@ -165,31 +121,31 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
/* Is the pc pointing to a sigreturn trampoline? */
static int is_sigreturn(unsigned long pc)
{
- return (pc == VDSO_BASE);
+ return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn));
}
/* Return a pt_regs pointer for a valid signal handler frame */
-static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt)
+static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
+ struct rt_sigframe* kframe)
{
BacktraceIterator *b = &kbt->it;
- if (b->pc == VDSO_BASE) {
- struct rt_sigframe *frame;
- unsigned long sigframe_top =
- b->sp + sizeof(struct rt_sigframe) - 1;
- if (!valid_address(kbt, b->sp) ||
- !valid_address(kbt, sigframe_top)) {
- if (kbt->verbose)
- pr_err(" (odd signal: sp %#lx?)\n",
- (unsigned long)(b->sp));
+ if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET &&
+ b->sp % sizeof(long) == 0) {
+ int retval;
+ pagefault_disable();
+ retval = __copy_from_user_inatomic(
+ kframe, (void __user __force *)b->sp,
+ sizeof(*kframe));
+ pagefault_enable();
+ if (retval != 0 ||
+ (unsigned int)(kframe->info.si_signo) >= _NSIG)
return NULL;
- }
- frame = (struct rt_sigframe *)b->sp;
if (kbt->verbose) {
pr_err(" <received signal %d>\n",
- frame->info.si_signo);
+ kframe->info.si_signo);
}
- return (struct pt_regs *)&frame->uc.uc_mcontext;
+ return (struct pt_regs *)&kframe->uc.uc_mcontext;
}
return NULL;
}
@@ -202,10 +158,11 @@ static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt)
static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt)
{
struct pt_regs *p;
+ struct rt_sigframe kframe;
p = valid_fault_handler(kbt);
if (p == NULL)
- p = valid_sigframe(kbt);
+ p = valid_sigframe(kbt, &kframe);
if (p == NULL)
return 0;
backtrace_init(&kbt->it, read_memory_func, kbt,
@@ -239,21 +196,21 @@ static int KBacktraceIterator_next_item_inclusive(
*/
static void validate_stack(struct pt_regs *regs)
{
- int cpu = smp_processor_id();
+ int cpu = raw_smp_processor_id();
unsigned long ksp0 = get_current_ksp0();
- unsigned long ksp0_base = ksp0 - THREAD_SIZE;
+ unsigned long ksp0_base = ksp0 & -THREAD_SIZE;
unsigned long sp = stack_pointer;
if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) {
- pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n"
+ pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx underrun!\n"
" sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
- cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+ cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr);
}
else if (sp < ksp0_base + sizeof(struct thread_info)) {
- pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n"
+ pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx overrun!\n"
" sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
- cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+ cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr);
}
}
@@ -265,41 +222,19 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt,
/*
* Set up callback information. We grab the kernel stack base
- * so we will allow reads of that address range, and if we're
- * asking about the current process we grab the page table
- * so we can check user accesses before trying to read them.
- * We flush the TLB to avoid any weird skew issues.
+ * so we will allow reads of that address range.
*/
- is_current = (t == NULL);
+ is_current = (t == NULL || t == current);
kbt->is_current = is_current;
if (is_current)
t = validate_current();
kbt->task = t;
- kbt->pgtable = NULL;
kbt->verbose = 0; /* override in caller if desired */
kbt->profile = 0; /* override in caller if desired */
kbt->end = KBT_ONGOING;
- kbt->new_context = 0;
- if (is_current) {
- HV_PhysAddr pgdir_pa = hv_inquire_context().page_table;
- if (pgdir_pa == (unsigned long)swapper_pg_dir - PAGE_OFFSET) {
- /*
- * Not just an optimization: this also allows
- * this to work at all before va/pa mappings
- * are set up.
- */
- kbt->pgtable = swapper_pg_dir;
- } else {
- struct page *page = pfn_to_page(PFN_DOWN(pgdir_pa));
- if (!PageHighMem(page))
- kbt->pgtable = __va(pgdir_pa);
- else
- pr_err("page table not in LOWMEM"
- " (%#llx)\n", pgdir_pa);
- }
- local_flush_tlb_all();
+ kbt->new_context = 1;
+ if (is_current)
validate_stack(regs);
- }
if (regs == NULL) {
if (is_current || t->state == TASK_RUNNING) {
@@ -345,6 +280,95 @@ void KBacktraceIterator_next(struct KBacktraceIterator *kbt)
}
EXPORT_SYMBOL(KBacktraceIterator_next);
+static void describe_addr(struct KBacktraceIterator *kbt,
+ unsigned long address,
+ int have_mmap_sem, char *buf, size_t bufsize)
+{
+ struct vm_area_struct *vma;
+ size_t namelen, remaining;
+ unsigned long size, offset, adjust;
+ char *p, *modname;
+ const char *name;
+ int rc;
+
+ /*
+ * Look one byte back for every caller frame (i.e. those that
+ * aren't a new context) so we look up symbol data for the
+ * call itself, not the following instruction, which may be on
+ * a different line (or in a different function).
+ */
+ adjust = !kbt->new_context;
+ address -= adjust;
+
+ if (address >= PAGE_OFFSET) {
+ /* Handle kernel symbols. */
+ BUG_ON(bufsize < KSYM_NAME_LEN);
+ name = kallsyms_lookup(address, &size, &offset,
+ &modname, buf);
+ if (name == NULL) {
+ buf[0] = '\0';
+ return;
+ }
+ namelen = strlen(buf);
+ remaining = (bufsize - 1) - namelen;
+ p = buf + namelen;
+ rc = snprintf(p, remaining, "+%#lx/%#lx ",
+ offset + adjust, size);
+ if (modname && rc < remaining)
+ snprintf(p + rc, remaining - rc, "[%s] ", modname);
+ buf[bufsize-1] = '\0';
+ return;
+ }
+
+ /* If we don't have the mmap_sem, we can't show any more info. */
+ buf[0] = '\0';
+ if (!have_mmap_sem)
+ return;
+
+ /* Find vma info. */
+ vma = find_vma(kbt->task->mm, address);
+ if (vma == NULL || address < vma->vm_start) {
+ snprintf(buf, bufsize, "[unmapped address] ");
+ return;
+ }
+
+ if (vma->vm_file) {
+ p = d_path(&vma->vm_file->f_path, buf, bufsize);
+ if (IS_ERR(p))
+ p = "?";
+ name = kbasename(p);
+ } else {
+ name = "anon";
+ }
+
+ /* Generate a string description of the vma info. */
+ namelen = strlen(name);
+ remaining = (bufsize - 1) - namelen;
+ memmove(buf, name, namelen);
+ snprintf(buf + namelen, remaining, "[%lx+%lx] ",
+ vma->vm_start, vma->vm_end - vma->vm_start);
+}
+
+/*
+ * Avoid possible crash recursion during backtrace. If it happens, it
+ * makes it easy to lose the actual root cause of the failure, so we
+ * put a simple guard on all the backtrace loops.
+ */
+static bool start_backtrace(void)
+{
+ if (current->thread.in_backtrace) {
+ pr_err("Backtrace requested while in backtrace!\n");
+ return false;
+ }
+ current->thread.in_backtrace = true;
+ return true;
+}
+
+static void end_backtrace(void)
+{
+ current->thread.in_backtrace = false;
+}
+
/*
* This method wraps the backtracer's more generic support.
* It is only invoked from the architecture-specific code; show_stack()
@@ -353,7 +377,10 @@ EXPORT_SYMBOL(KBacktraceIterator_next);
void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
{
int i;
+ int have_mmap_sem = 0;
+ if (!start_backtrace())
+ return;
if (headers) {
/*
* Add a blank line since if we are called from panic(),
@@ -364,36 +391,21 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
pr_err("Starting stack dump of tid %d, pid %d (%s)"
" on cpu %d at cycle %lld\n",
kbt->task->pid, kbt->task->tgid, kbt->task->comm,
- smp_processor_id(), get_cycles());
+ raw_smp_processor_id(), get_cycles());
}
kbt->verbose = 1;
i = 0;
for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) {
- char *modname;
- const char *name;
- unsigned long address = kbt->it.pc;
- unsigned long offset, size;
char namebuf[KSYM_NAME_LEN+100];
+ unsigned long address = kbt->it.pc;
- if (address >= PAGE_OFFSET)
- name = kallsyms_lookup(address, &size, &offset,
- &modname, namebuf);
- else
- name = NULL;
-
- if (!name)
- namebuf[0] = '\0';
- else {
- size_t namelen = strlen(namebuf);
- size_t remaining = (sizeof(namebuf) - 1) - namelen;
- char *p = namebuf + namelen;
- int rc = snprintf(p, remaining, "+%#lx/%#lx ",
- offset, size);
- if (modname && rc < remaining)
- snprintf(p + rc, remaining - rc,
- "[%s] ", modname);
- namebuf[sizeof(namebuf)-1] = '\0';
- }
+ /* Try to acquire the mmap_sem as we pass into userspace. */
+ if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm)
+ have_mmap_sem =
+ down_read_trylock(&kbt->task->mm->mmap_sem);
+
+ describe_addr(kbt, address, have_mmap_sem,
+ namebuf, sizeof(namebuf));
pr_err(" frame %d: 0x%lx %s(sp 0x%lx)\n",
i++, address, namebuf, (unsigned long)(kbt->it.sp));
@@ -408,6 +420,9 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
pr_err("Stack dump stopped; next frame identical to this one\n");
if (headers)
pr_err("Stack dump complete\n");
+ if (have_mmap_sem)
+ up_read(&kbt->task->mm->mmap_sem);
+ end_backtrace();
}
EXPORT_SYMBOL(tile_show_stack);
@@ -448,7 +463,7 @@ void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc,
regs_to_pt_regs(&regs, pc, lr, sp, r52));
}
-/* This is called only from kernel/sched.c, with esp == NULL */
+/* This is called only from kernel/sched/core.c, with esp == NULL */
void show_stack(struct task_struct *task, unsigned long *esp)
{
struct KBacktraceIterator kbt;
@@ -469,6 +484,8 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
int skip = trace->skip;
int i = 0;
+ if (!start_backtrace())
+ goto done;
if (task == NULL || task == current)
KBacktraceIterator_init_current(&kbt);
else
@@ -482,6 +499,8 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
break;
trace->entries[i++] = kbt.it.pc;
}
+ end_backtrace();
+done:
trace->nr_entries = i;
}
EXPORT_SYMBOL(save_stack_trace_tsk);
@@ -490,6 +509,7 @@ void save_stack_trace(struct stack_trace *trace)
{
save_stack_trace_tsk(NULL, trace);
}
+EXPORT_SYMBOL_GPL(save_stack_trace);
#endif
diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c
index cb44ba7ccd2..38debe70606 100644
--- a/arch/tile/kernel/sys.c
+++ b/arch/tile/kernel/sys.c
@@ -32,11 +32,19 @@
#include <asm/syscalls.h>
#include <asm/pgtable.h>
#include <asm/homecache.h>
+#include <asm/cachectl.h>
#include <arch/chip.h>
-SYSCALL_DEFINE0(flush_cache)
+SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len,
+ unsigned long, flags)
{
- homecache_evict(cpumask_of(smp_processor_id()));
+ /* DCACHE is not particularly effective if not bound to one cpu. */
+ if (flags & DCACHE)
+ homecache_evict(cpumask_of(raw_smp_processor_id()));
+
+ if (flags & ICACHE)
+ flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(current->mm),
+ 0, 0, 0, NULL, NULL, 0);
return 0;
}
@@ -100,14 +108,10 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
#define sys_readahead sys32_readahead
#endif
-/* Call the trampolines to manage pt_regs where necessary. */
-#define sys_execve _sys_execve
-#define sys_sigaltstack _sys_sigaltstack
+/* Call the assembly trampolines where necessary. */
+#undef sys_rt_sigreturn
#define sys_rt_sigreturn _sys_rt_sigreturn
#define sys_clone _sys_clone
-#ifndef __tilegx__
-#define sys_cmpxchg_badaddr _sys_cmpxchg_badaddr
-#endif
/*
* Note that we can't include <linux/unistd.h> here since the header
diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c
index 71ae728e9d0..a3ed12f8f83 100644
--- a/arch/tile/kernel/sysfs.c
+++ b/arch/tile/kernel/sysfs.c
@@ -93,6 +93,10 @@ HV_CONF_ATTR(mezz_part, HV_CONFSTR_MEZZ_PART_NUM)
HV_CONF_ATTR(mezz_serial, HV_CONFSTR_MEZZ_SERIAL_NUM)
HV_CONF_ATTR(mezz_revision, HV_CONFSTR_MEZZ_REV)
HV_CONF_ATTR(mezz_description, HV_CONFSTR_MEZZ_DESC)
+HV_CONF_ATTR(cpumod_part, HV_CONFSTR_CPUMOD_PART_NUM)
+HV_CONF_ATTR(cpumod_serial, HV_CONFSTR_CPUMOD_SERIAL_NUM)
+HV_CONF_ATTR(cpumod_revision, HV_CONFSTR_CPUMOD_REV)
+HV_CONF_ATTR(cpumod_description,HV_CONFSTR_CPUMOD_DESC)
HV_CONF_ATTR(switch_control, HV_CONFSTR_SWITCH_CONTROL)
static struct attribute *board_attrs[] = {
@@ -104,6 +108,10 @@ static struct attribute *board_attrs[] = {
&dev_attr_mezz_serial.attr,
&dev_attr_mezz_revision.attr,
&dev_attr_mezz_description.attr,
+ &dev_attr_cpumod_part.attr,
+ &dev_attr_cpumod_serial.attr,
+ &dev_attr_cpumod_revision.attr,
+ &dev_attr_cpumod_description.attr,
&dev_attr_switch_control.attr,
NULL
};
@@ -149,6 +157,67 @@ hvconfig_bin_read(struct file *filp, struct kobject *kobj,
return count;
}
+static ssize_t hv_stats_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ int cpu = dev->id;
+ long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu));
+
+ ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS,
+ (unsigned long)page, PAGE_SIZE - 1,
+ lotar, 0);
+ n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1);
+ page[n] = '\0';
+ return n;
+}
+
+static ssize_t hv_stats_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *page,
+ size_t count)
+{
+ int cpu = dev->id;
+ long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu));
+
+ ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS, 0, 0, lotar, 1);
+ return n < 0 ? n : count;
+}
+
+static DEVICE_ATTR(hv_stats, 0644, hv_stats_show, hv_stats_store);
+
+static int hv_stats_device_add(struct device *dev, struct subsys_interface *sif)
+{
+ int err, cpu = dev->id;
+
+ if (!cpu_online(cpu))
+ return 0;
+
+ err = sysfs_create_file(&dev->kobj, &dev_attr_hv_stats.attr);
+
+ return err;
+}
+
+static int hv_stats_device_remove(struct device *dev,
+ struct subsys_interface *sif)
+{
+ int cpu = dev->id;
+
+ if (!cpu_online(cpu))
+ return 0;
+
+ sysfs_remove_file(&dev->kobj, &dev_attr_hv_stats.attr);
+ return 0;
+}
+
+
+static struct subsys_interface hv_stats_interface = {
+ .name = "hv_stats",
+ .subsys = &cpu_subsys,
+ .add_dev = hv_stats_device_add,
+ .remove_dev = hv_stats_device_remove,
+};
+
static int __init create_sysfs_entries(void)
{
int err = 0;
@@ -180,6 +249,21 @@ static int __init create_sysfs_entries(void)
err = sysfs_create_bin_file(hypervisor_kobj, &hvconfig_bin);
}
+ if (!err) {
+ /*
+ * Don't bother adding the hv_stats files on each CPU if
+ * our hypervisor doesn't supply statistics.
+ */
+ int cpu = raw_smp_processor_id();
+ long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu));
+ char dummy;
+ ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS,
+ (unsigned long) &dummy, 1,
+ lotar, 0);
+ if (n >= 0)
+ err = subsys_interface_register(&hv_stats_interface);
+ }
+
return err;
}
subsys_initcall(create_sysfs_entries);
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index f6f50f2a5e3..462dcd0c170 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -23,8 +23,10 @@
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/module.h>
+#include <linux/timekeeper_internal.h>
#include <asm/irq_regs.h>
#include <asm/traps.h>
+#include <asm/vdso.h>
#include <hv/hypervisor.h>
#include <arch/interrupts.h>
#include <arch/spr_def.h>
@@ -110,7 +112,6 @@ void __init time_init(void)
setup_tile_timer();
}
-
/*
* Define the tile timer clock event device. The timer is driven by
* the TILE_TIMER_CONTROL register, which consists of a 31-bit down
@@ -159,7 +160,7 @@ static DEFINE_PER_CPU(struct clock_event_device, tile_timer) = {
.set_mode = tile_timer_set_mode,
};
-void __cpuinit setup_tile_timer(void)
+void setup_tile_timer(void)
{
struct clock_event_device *evt = &__get_cpu_var(tile_timer);
@@ -230,6 +231,52 @@ int setup_profiling_timer(unsigned int multiplier)
*/
cycles_t ns2cycles(unsigned long nsecs)
{
- struct clock_event_device *dev = &__get_cpu_var(tile_timer);
- return ((u64)nsecs * dev->mult) >> dev->shift;
+ /*
+ * We do not have to disable preemption here as each core has the same
+ * clock frequency.
+ */
+ struct clock_event_device *dev = &__raw_get_cpu_var(tile_timer);
+
+ /*
+ * as in clocksource.h and x86's timer.h, we split the calculation
+ * into 2 parts to avoid unecessary overflow of the intermediate
+ * value. This will not lead to any loss of precision.
+ */
+ u64 quot = (u64)nsecs >> dev->shift;
+ u64 rem = (u64)nsecs & ((1ULL << dev->shift) - 1);
+ return quot * dev->mult + ((rem * dev->mult) >> dev->shift);
+}
+
+void update_vsyscall_tz(void)
+{
+ /* Userspace gettimeofday will spin while this value is odd. */
+ ++vdso_data->tz_update_count;
+ smp_wmb();
+ vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ smp_wmb();
+ ++vdso_data->tz_update_count;
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+ struct timespec wall_time = tk_xtime(tk);
+ struct timespec *wtm = &tk->wall_to_monotonic;
+ struct clocksource *clock = tk->clock;
+
+ if (clock != &cycle_counter_cs)
+ return;
+
+ /* Userspace gettimeofday will spin while this value is odd. */
+ ++vdso_data->tb_update_count;
+ smp_wmb();
+ vdso_data->xtime_tod_stamp = clock->cycle_last;
+ vdso_data->xtime_clock_sec = wall_time.tv_sec;
+ vdso_data->xtime_clock_nsec = wall_time.tv_nsec;
+ vdso_data->wtom_clock_sec = wtm->tv_sec;
+ vdso_data->wtom_clock_nsec = wtm->tv_nsec;
+ vdso_data->mult = clock->mult;
+ vdso_data->shift = clock->shift;
+ smp_wmb();
+ ++vdso_data->tb_update_count;
}
diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c
index a5f241c24ca..f23b5351567 100644
--- a/arch/tile/kernel/tlb.c
+++ b/arch/tile/kernel/tlb.c
@@ -15,6 +15,7 @@
#include <linux/cpumask.h>
#include <linux/module.h>
+#include <linux/hugetlb.h>
#include <asm/tlbflush.h>
#include <asm/homecache.h>
#include <hv/hypervisor.h>
@@ -49,25 +50,25 @@ void flush_tlb_current_task(void)
flush_tlb_mm(current->mm);
}
-void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm,
+void flush_tlb_page_mm(struct vm_area_struct *vma, struct mm_struct *mm,
unsigned long va)
{
- unsigned long size = hv_page_size(vma);
+ unsigned long size = vma_kernel_pagesize(vma);
int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
flush_remote(0, cache, mm_cpumask(mm),
va, size, size, mm_cpumask(mm), NULL, 0);
}
-void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va)
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
{
flush_tlb_page_mm(vma, vma->vm_mm, va);
}
EXPORT_SYMBOL(flush_tlb_page);
-void flush_tlb_range(const struct vm_area_struct *vma,
+void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- unsigned long size = hv_page_size(vma);
+ unsigned long size = vma_kernel_pagesize(vma);
struct mm_struct *mm = vma->vm_mm;
int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
flush_remote(0, cache, mm_cpumask(mm), start, end - start, size,
@@ -90,8 +91,14 @@ void flush_tlb_all(void)
}
}
+/*
+ * Callers need to flush the L1I themselves if necessary, e.g. for
+ * kernel module unload. Otherwise we assume callers are not using
+ * executable pgprot_t's. Using EVICT_L1I means that dataplane cpus
+ * will get an unnecessary interrupt otherwise.
+ */
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask,
+ flush_remote(0, 0, NULL,
start, end - start, PAGE_SIZE, cpu_online_mask, NULL, 0);
}
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 4f47b8a356d..f3ceb6308e4 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -15,12 +15,14 @@
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/kprobes.h>
+#include <linux/kdebug.h>
#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/uaccess.h>
#include <linux/ptrace.h>
#include <asm/stack.h>
#include <asm/traps.h>
+#include <asm/setup.h>
#include <arch/interrupts.h>
#include <arch/spr_def.h>
@@ -28,7 +30,7 @@
void __init trap_init(void)
{
- /* Nothing needed here since we link code at .intrpt1 */
+ /* Nothing needed here since we link code at .intrpt */
}
int unaligned_fixup = 1;
@@ -40,10 +42,9 @@ static int __init setup_unaligned_fixup(char *str)
* will still parse the instruction, then fire a SIGBUS with
* the correct address from inside the single_step code.
*/
- long val;
- if (strict_strtol(str, 0, &val) != 0)
+ if (kstrtoint(str, 0, &unaligned_fixup) != 0)
return 0;
- unaligned_fixup = val;
+
pr_info("Fixups for unaligned data accesses are %s\n",
unaligned_fixup >= 0 ?
(unaligned_fixup ? "enabled" : "disabled") :
@@ -99,13 +100,7 @@ static int retry_gpv(unsigned int gpv_reason)
#endif /* CHIP_HAS_TILE_DMA() */
-#ifdef __tilegx__
-#define bundle_bits tilegx_bundle_bits
-#else
-#define bundle_bits tile_bundle_bits
-#endif
-
-extern bundle_bits bpt_code;
+extern tile_bundle_bits bpt_code;
asm(".pushsection .rodata.bpt_code,\"a\";"
".align 8;"
@@ -113,7 +108,7 @@ asm(".pushsection .rodata.bpt_code,\"a\";"
".size bpt_code,.-bpt_code;"
".popsection");
-static int special_ill(bundle_bits bundle, int *sigp, int *codep)
+static int special_ill(tile_bundle_bits bundle, int *sigp, int *codep)
{
int sig, code, maxcode;
@@ -194,34 +189,119 @@ static int special_ill(bundle_bits bundle, int *sigp, int *codep)
return 1;
}
+static const char *const int_name[] = {
+ [INT_MEM_ERROR] = "Memory error",
+ [INT_ILL] = "Illegal instruction",
+ [INT_GPV] = "General protection violation",
+ [INT_UDN_ACCESS] = "UDN access",
+ [INT_IDN_ACCESS] = "IDN access",
+#if CHIP_HAS_SN()
+ [INT_SN_ACCESS] = "SN access",
+#endif
+ [INT_SWINT_3] = "Software interrupt 3",
+ [INT_SWINT_2] = "Software interrupt 2",
+ [INT_SWINT_0] = "Software interrupt 0",
+ [INT_UNALIGN_DATA] = "Unaligned data",
+ [INT_DOUBLE_FAULT] = "Double fault",
+#ifdef __tilegx__
+ [INT_ILL_TRANS] = "Illegal virtual address",
+#endif
+};
+
+static int do_bpt(struct pt_regs *regs)
+{
+ unsigned long bundle, bcode, bpt;
+
+ bundle = *(unsigned long *)instruction_pointer(regs);
+
+ /*
+ * bpt shoule be { bpt; nop }, which is 0x286a44ae51485000ULL.
+ * we encode the unused least significant bits for other purpose.
+ */
+ bpt = bundle & ~((1ULL << 12) - 1);
+ if (bpt != TILE_BPT_BUNDLE)
+ return 0;
+
+ bcode = bundle & ((1ULL << 12) - 1);
+ /*
+ * notify the kprobe handlers, if instruction is likely to
+ * pertain to them.
+ */
+ switch (bcode) {
+ /* breakpoint_insn */
+ case 0:
+ notify_die(DIE_BREAK, "debug", regs, bundle,
+ INT_ILL, SIGTRAP);
+ break;
+ /* compiled_bpt */
+ case DIE_COMPILED_BPT:
+ notify_die(DIE_COMPILED_BPT, "debug", regs, bundle,
+ INT_ILL, SIGTRAP);
+ break;
+ /* breakpoint2_insn */
+ case DIE_SSTEPBP:
+ notify_die(DIE_SSTEPBP, "single_step", regs, bundle,
+ INT_ILL, SIGTRAP);
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
void __kprobes do_trap(struct pt_regs *regs, int fault_num,
unsigned long reason)
{
siginfo_t info = { 0 };
int signo, code;
- unsigned long address;
- bundle_bits instr;
+ unsigned long address = 0;
+ tile_bundle_bits instr;
+ int is_kernel = !user_mode(regs);
+
+ /* Handle breakpoints, etc. */
+ if (is_kernel && fault_num == INT_ILL && do_bpt(regs))
+ return;
- /* Re-enable interrupts. */
- local_irq_enable();
+ /* Re-enable interrupts, if they were previously enabled. */
+ if (!(regs->flags & PT_FLAGS_DISABLE_IRQ))
+ local_irq_enable();
/*
* If it hits in kernel mode and we can't fix it up, just exit the
* current process and hope for the best.
*/
- if (!user_mode(regs)) {
- if (fixup_exception(regs)) /* only UNALIGN_DATA in practice */
+ if (is_kernel) {
+ const char *name;
+ char buf[100];
+ if (fixup_exception(regs)) /* ILL_TRANS or UNALIGN_DATA */
return;
- pr_alert("Kernel took bad trap %d at PC %#lx\n",
- fault_num, regs->pc);
+ if (fault_num >= 0 &&
+ fault_num < sizeof(int_name)/sizeof(int_name[0]) &&
+ int_name[fault_num] != NULL)
+ name = int_name[fault_num];
+ else
+ name = "Unknown interrupt";
if (fault_num == INT_GPV)
- pr_alert("GPV_REASON is %#lx\n", reason);
+ snprintf(buf, sizeof(buf), "; GPV_REASON %#lx", reason);
+#ifdef __tilegx__
+ else if (fault_num == INT_ILL_TRANS)
+ snprintf(buf, sizeof(buf), "; address %#lx", reason);
+#endif
+ else
+ buf[0] = '\0';
+ pr_alert("Kernel took bad trap %d (%s) at PC %#lx%s\n",
+ fault_num, name, regs->pc, buf);
show_regs(regs);
do_exit(SIGKILL); /* FIXME: implement i386 die() */
return;
}
switch (fault_num) {
+ case INT_MEM_ERROR:
+ signo = SIGBUS;
+ code = BUS_OBJERR;
+ break;
case INT_ILL:
if (copy_from_user(&instr, (void __user *)regs->pc,
sizeof(instr))) {
@@ -288,14 +368,15 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
address = regs->pc;
break;
#ifdef __tilegx__
- case INT_ILL_TRANS:
+ case INT_ILL_TRANS: {
+ /* Avoid a hardware erratum with the return address stack. */
+ fill_ra_stack();
+
signo = SIGSEGV;
+ address = reason;
code = SEGV_MAPERR;
- if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK)
- address = regs->pc;
- else
- address = 0; /* FIXME: GX: single-step for address */
break;
+ }
#endif
default:
panic("Unexpected do_trap interrupt number %d", fault_num);
@@ -307,7 +388,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
info.si_addr = (void __user *)address;
if (signo == SIGILL)
info.si_trapno = fault_num;
- trace_unhandled_signal("trap", regs, address, signo);
+ if (signo != SIGTRAP)
+ trace_unhandled_signal("trap", regs, address, signo);
force_sig_info(signo, &info, current);
}
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
new file mode 100644
index 00000000000..c02ea2a45f6
--- /dev/null
+++ b/arch/tile/kernel/unaligned.c
@@ -0,0 +1,1598 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * A code-rewriter that handles unaligned exception.
+ */
+
+#include <linux/smp.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/compat.h>
+#include <linux/prctl.h>
+#include <asm/cacheflush.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+#include <arch/abi.h>
+#include <arch/spr_def.h>
+#include <arch/opcode.h>
+
+
+/*
+ * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
+ * exception is supported out of single_step.c
+ */
+
+int unaligned_printk;
+
+static int __init setup_unaligned_printk(char *str)
+{
+ long val;
+ if (kstrtol(str, 0, &val) != 0)
+ return 0;
+ unaligned_printk = val;
+ pr_info("Printk for each unaligned data accesses is %s\n",
+ unaligned_printk ? "enabled" : "disabled");
+ return 1;
+}
+__setup("unaligned_printk=", setup_unaligned_printk);
+
+unsigned int unaligned_fixup_count;
+
+#ifdef __tilegx__
+
+/*
+ * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
+ * The 1st 64-bit word saves fault PC address, 2nd word is the fault
+ * instruction bundle followed by 14 JIT bundles.
+ */
+
+struct unaligned_jit_fragment {
+ unsigned long pc;
+ tilegx_bundle_bits bundle;
+ tilegx_bundle_bits insn[14];
+};
+
+/*
+ * Check if a nop or fnop at bundle's pipeline X0.
+ */
+
+static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
+{
+ return (((get_UnaryOpcodeExtension_X0(bundle) ==
+ NOP_UNARY_OPCODE_X0) &&
+ (get_RRROpcodeExtension_X0(bundle) ==
+ UNARY_RRR_0_OPCODE_X0) &&
+ (get_Opcode_X0(bundle) ==
+ RRR_0_OPCODE_X0)) ||
+ ((get_UnaryOpcodeExtension_X0(bundle) ==
+ FNOP_UNARY_OPCODE_X0) &&
+ (get_RRROpcodeExtension_X0(bundle) ==
+ UNARY_RRR_0_OPCODE_X0) &&
+ (get_Opcode_X0(bundle) ==
+ RRR_0_OPCODE_X0)));
+}
+
+/*
+ * Check if nop or fnop at bundle's pipeline X1.
+ */
+
+static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
+{
+ return (((get_UnaryOpcodeExtension_X1(bundle) ==
+ NOP_UNARY_OPCODE_X1) &&
+ (get_RRROpcodeExtension_X1(bundle) ==
+ UNARY_RRR_0_OPCODE_X1) &&
+ (get_Opcode_X1(bundle) ==
+ RRR_0_OPCODE_X1)) ||
+ ((get_UnaryOpcodeExtension_X1(bundle) ==
+ FNOP_UNARY_OPCODE_X1) &&
+ (get_RRROpcodeExtension_X1(bundle) ==
+ UNARY_RRR_0_OPCODE_X1) &&
+ (get_Opcode_X1(bundle) ==
+ RRR_0_OPCODE_X1)));
+}
+
+/*
+ * Check if nop or fnop at bundle's Y0 pipeline.
+ */
+
+static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
+{
+ return (((get_UnaryOpcodeExtension_Y0(bundle) ==
+ NOP_UNARY_OPCODE_Y0) &&
+ (get_RRROpcodeExtension_Y0(bundle) ==
+ UNARY_RRR_1_OPCODE_Y0) &&
+ (get_Opcode_Y0(bundle) ==
+ RRR_1_OPCODE_Y0)) ||
+ ((get_UnaryOpcodeExtension_Y0(bundle) ==
+ FNOP_UNARY_OPCODE_Y0) &&
+ (get_RRROpcodeExtension_Y0(bundle) ==
+ UNARY_RRR_1_OPCODE_Y0) &&
+ (get_Opcode_Y0(bundle) ==
+ RRR_1_OPCODE_Y0)));
+}
+
+/*
+ * Check if nop or fnop at bundle's pipeline Y1.
+ */
+
+static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
+{
+ return (((get_UnaryOpcodeExtension_Y1(bundle) ==
+ NOP_UNARY_OPCODE_Y1) &&
+ (get_RRROpcodeExtension_Y1(bundle) ==
+ UNARY_RRR_1_OPCODE_Y1) &&
+ (get_Opcode_Y1(bundle) ==
+ RRR_1_OPCODE_Y1)) ||
+ ((get_UnaryOpcodeExtension_Y1(bundle) ==
+ FNOP_UNARY_OPCODE_Y1) &&
+ (get_RRROpcodeExtension_Y1(bundle) ==
+ UNARY_RRR_1_OPCODE_Y1) &&
+ (get_Opcode_Y1(bundle) ==
+ RRR_1_OPCODE_Y1)));
+}
+
+/*
+ * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
+ */
+
+static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
+{
+ return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
+}
+
+/*
+ * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
+ */
+
+static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
+{
+ return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
+}
+
+/*
+ * Find the destination, source registers of fault unalign access instruction
+ * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
+ * clob3, which are guaranteed different from any register used in the fault
+ * bundle. r_alias is used to return if the other instructions other than the
+ * unalign load/store shares same register with ra, rb and rd.
+ */
+
+static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
+ uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
+ uint64_t *clob3, bool *r_alias)
+{
+ int i;
+ uint64_t reg;
+ uint64_t reg_map = 0, alias_reg_map = 0, map;
+ bool alias = false;
+
+ /*
+ * Parse fault bundle, find potential used registers and mark
+ * corresponding bits in reg_map and alias_map. These 2 bit maps
+ * are used to find the scratch registers and determine if there
+ * is register alais.
+ */
+ if (bundle & TILEGX_BUNDLE_MODE_MASK) { /* Y Mode Bundle. */
+
+ reg = get_SrcA_Y2(bundle);
+ reg_map |= 1ULL << reg;
+ *ra = reg;
+ reg = get_SrcBDest_Y2(bundle);
+ reg_map |= 1ULL << reg;
+
+ if (rd) {
+ /* Load. */
+ *rd = reg;
+ alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
+ } else {
+ /* Store. */
+ *rb = reg;
+ alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
+ }
+
+ if (!is_bundle_y1_nop(bundle)) {
+ reg = get_SrcA_Y1(bundle);
+ reg_map |= (1ULL << reg);
+ map = (1ULL << reg);
+
+ reg = get_SrcB_Y1(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ reg = get_Dest_Y1(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ if (map & alias_reg_map)
+ alias = true;
+ }
+
+ if (!is_bundle_y0_nop(bundle)) {
+ reg = get_SrcA_Y0(bundle);
+ reg_map |= (1ULL << reg);
+ map = (1ULL << reg);
+
+ reg = get_SrcB_Y0(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ reg = get_Dest_Y0(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ if (map & alias_reg_map)
+ alias = true;
+ }
+ } else { /* X Mode Bundle. */
+
+ reg = get_SrcA_X1(bundle);
+ reg_map |= (1ULL << reg);
+ *ra = reg;
+ if (rd) {
+ /* Load. */
+ reg = get_Dest_X1(bundle);
+ reg_map |= (1ULL << reg);
+ *rd = reg;
+ alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
+ } else {
+ /* Store. */
+ reg = get_SrcB_X1(bundle);
+ reg_map |= (1ULL << reg);
+ *rb = reg;
+ alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
+ }
+
+ if (!is_bundle_x0_nop(bundle)) {
+ reg = get_SrcA_X0(bundle);
+ reg_map |= (1ULL << reg);
+ map = (1ULL << reg);
+
+ reg = get_SrcB_X0(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ reg = get_Dest_X0(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ if (map & alias_reg_map)
+ alias = true;
+ }
+ }
+
+ /*
+ * "alias" indicates if the unalign access registers have collision
+ * with others in the same bundle. We jsut simply test all register
+ * operands case (RRR), ignored the case with immidate. If a bundle
+ * has no register alias, we may do fixup in a simple or fast manner.
+ * So if an immidata field happens to hit with a register, we may end
+ * up fall back to the generic handling.
+ */
+
+ *r_alias = alias;
+
+ /* Flip bits on reg_map. */
+ reg_map ^= -1ULL;
+
+ /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
+ for (i = 0; i < TREG_SP; i++) {
+ if (reg_map & (0x1ULL << i)) {
+ if (*clob1 == -1) {
+ *clob1 = i;
+ } else if (*clob2 == -1) {
+ *clob2 = i;
+ } else if (*clob3 == -1) {
+ *clob3 = i;
+ return;
+ }
+ }
+ }
+}
+
+/*
+ * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
+ * is unexpected.
+ */
+
+static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
+ uint64_t clob1, uint64_t clob2, uint64_t clob3)
+{
+ bool unexpected = false;
+ if ((ra >= 56) && (ra != TREG_ZERO))
+ unexpected = true;
+
+ if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
+ unexpected = true;
+
+ if (rd != -1) {
+ if ((rd >= 56) && (rd != TREG_ZERO))
+ unexpected = true;
+ } else {
+ if ((rb >= 56) && (rb != TREG_ZERO))
+ unexpected = true;
+ }
+ return unexpected;
+}
+
+
+#define GX_INSN_X0_MASK ((1ULL << 31) - 1)
+#define GX_INSN_X1_MASK (((1ULL << 31) - 1) << 31)
+#define GX_INSN_Y0_MASK ((0xFULL << 27) | (0xFFFFFULL))
+#define GX_INSN_Y1_MASK (GX_INSN_Y0_MASK << 31)
+#define GX_INSN_Y2_MASK ((0x7FULL << 51) | (0x7FULL << 20))
+
+#ifdef __LITTLE_ENDIAN
+#define GX_INSN_BSWAP(_bundle_) (_bundle_)
+#else
+#define GX_INSN_BSWAP(_bundle_) swab64(_bundle_)
+#endif /* __LITTLE_ENDIAN */
+
+/*
+ * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
+ * The corresponding static function jix_x#_###(.) generates partial or
+ * whole bundle based on the template and given arguments.
+ */
+
+#define __JIT_CODE(_X_) \
+ asm (".pushsection .rodata.unalign_data, \"a\"\n" \
+ _X_"\n" \
+ ".popsection\n")
+
+__JIT_CODE("__unalign_jit_x1_mtspr: {mtspr 0, r0}");
+static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_mtspr;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
+ create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
+}
+
+__JIT_CODE("__unalign_jit_x1_mfspr: {mfspr r0, 0}");
+static tilegx_bundle_bits jit_x1_mfspr(int reg, int spr)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_mfspr;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
+ create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
+}
+
+__JIT_CODE("__unalign_jit_x0_addi: {addi r0, r0, 0; iret}");
+static tilegx_bundle_bits jit_x0_addi(int rd, int ra, int imm8)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_addi;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_Imm8_X0(imm8);
+}
+
+__JIT_CODE("__unalign_jit_x1_ldna: {ldna r0, r0}");
+static tilegx_bundle_bits jit_x1_ldna(int rd, int ra)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_ldna;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) & GX_INSN_X1_MASK) |
+ create_Dest_X1(rd) | create_SrcA_X1(ra);
+}
+
+__JIT_CODE("__unalign_jit_x0_dblalign: {dblalign r0, r0 ,r0}");
+static tilegx_bundle_bits jit_x0_dblalign(int rd, int ra, int rb)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_dblalign;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_SrcB_X0(rb);
+}
+
+__JIT_CODE("__unalign_jit_x1_iret: {iret}");
+static tilegx_bundle_bits jit_x1_iret(void)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_iret;
+ return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
+}
+
+__JIT_CODE("__unalign_jit_x01_fnop: {fnop;fnop}");
+static tilegx_bundle_bits jit_x0_fnop(void)
+{
+ extern tilegx_bundle_bits __unalign_jit_x01_fnop;
+ return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
+}
+
+static tilegx_bundle_bits jit_x1_fnop(void)
+{
+ extern tilegx_bundle_bits __unalign_jit_x01_fnop;
+ return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
+}
+
+__JIT_CODE("__unalign_jit_y2_dummy: {fnop; fnop; ld zero, sp}");
+static tilegx_bundle_bits jit_y2_dummy(void)
+{
+ extern tilegx_bundle_bits __unalign_jit_y2_dummy;
+ return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
+}
+
+static tilegx_bundle_bits jit_y1_fnop(void)
+{
+ extern tilegx_bundle_bits __unalign_jit_y2_dummy;
+ return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
+}
+
+__JIT_CODE("__unalign_jit_x1_st1_add: {st1_add r1, r0, 0}");
+static tilegx_bundle_bits jit_x1_st1_add(int ra, int rb, int imm8)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_st1_add;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
+ (~create_SrcA_X1(-1)) &
+ GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
+ create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
+}
+
+__JIT_CODE("__unalign_jit_x1_st: {crc32_8 r1, r0, r0; st r0, r0}");
+static tilegx_bundle_bits jit_x1_st(int ra, int rb)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_st;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
+ create_SrcA_X1(ra) | create_SrcB_X1(rb);
+}
+
+__JIT_CODE("__unalign_jit_x1_st_add: {st_add r1, r0, 0}");
+static tilegx_bundle_bits jit_x1_st_add(int ra, int rb, int imm8)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_st_add;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
+ (~create_SrcA_X1(-1)) &
+ GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
+ create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
+}
+
+__JIT_CODE("__unalign_jit_x1_ld: {crc32_8 r1, r0, r0; ld r0, r0}");
+static tilegx_bundle_bits jit_x1_ld(int rd, int ra)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_ld;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
+ create_Dest_X1(rd) | create_SrcA_X1(ra);
+}
+
+__JIT_CODE("__unalign_jit_x1_ld_add: {ld_add r1, r0, 0}");
+static tilegx_bundle_bits jit_x1_ld_add(int rd, int ra, int imm8)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_ld_add;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
+ (~create_Dest_X1(-1)) &
+ GX_INSN_X1_MASK) | create_Dest_X1(rd) |
+ create_SrcA_X1(ra) | create_Imm8_X1(imm8);
+}
+
+__JIT_CODE("__unalign_jit_x0_bfexts: {bfexts r0, r0, 0, 0}");
+static tilegx_bundle_bits jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_bfexts;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
+ GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
+}
+
+__JIT_CODE("__unalign_jit_x0_bfextu: {bfextu r0, r0, 0, 0}");
+static tilegx_bundle_bits jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_bfextu;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
+ GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
+}
+
+__JIT_CODE("__unalign_jit_x1_addi: {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
+static tilegx_bundle_bits jit_x1_addi(int rd, int ra, int imm8)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_addi;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
+ create_Dest_X1(rd) | create_SrcA_X1(ra) |
+ create_Imm8_X1(imm8);
+}
+
+__JIT_CODE("__unalign_jit_x0_shrui: {shrui r0, r0, 0; iret}");
+static tilegx_bundle_bits jit_x0_shrui(int rd, int ra, int imm6)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_shrui;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
+ GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_ShAmt_X0(imm6);
+}
+
+__JIT_CODE("__unalign_jit_x0_rotli: {rotli r0, r0, 0; iret}");
+static tilegx_bundle_bits jit_x0_rotli(int rd, int ra, int imm6)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_rotli;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
+ GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_ShAmt_X0(imm6);
+}
+
+__JIT_CODE("__unalign_jit_x1_bnezt: {bnezt r0, __unalign_jit_x1_bnezt}");
+static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_bnezt;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
+ GX_INSN_X1_MASK) |
+ create_SrcA_X1(ra) | create_BrOff_X1(broff);
+}
+
+#undef __JIT_CODE
+
+/*
+ * This function generates unalign fixup JIT.
+ *
+ * We first find unalign load/store instruction's destination, source
+ * registers: ra, rb and rd. and 3 scratch registers by calling
+ * find_regs(...). 3 scratch clobbers should not alias with any register
+ * used in the fault bundle. Then analyze the fault bundle to determine
+ * if it's a load or store, operand width, branch or address increment etc.
+ * At last generated JIT is copied into JIT code area in user space.
+ */
+
+static
+void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
+ int align_ctl)
+{
+ struct thread_info *info = current_thread_info();
+ struct unaligned_jit_fragment frag;
+ struct unaligned_jit_fragment *jit_code_area;
+ tilegx_bundle_bits bundle_2 = 0;
+ /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
+ bool bundle_2_enable = true;
+ uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
+ /*
+ * Indicate if the unalign access
+ * instruction's registers hit with
+ * others in the same bundle.
+ */
+ bool alias = false;
+ bool load_n_store = true;
+ bool load_store_signed = false;
+ unsigned int load_store_size = 8;
+ bool y1_br = false; /* True, for a branch in same bundle at Y1.*/
+ int y1_br_reg = 0;
+ /* True for link operation. i.e. jalr or lnk at Y1 */
+ bool y1_lr = false;
+ int y1_lr_reg = 0;
+ bool x1_add = false;/* True, for load/store ADD instruction at X1*/
+ int x1_add_imm8 = 0;
+ bool unexpected = false;
+ int n = 0, k;
+
+ jit_code_area =
+ (struct unaligned_jit_fragment *)(info->unalign_jit_base);
+
+ memset((void *)&frag, 0, sizeof(frag));
+
+ /* 0: X mode, Otherwise: Y mode. */
+ if (bundle & TILEGX_BUNDLE_MODE_MASK) {
+ unsigned int mod, opcode;
+
+ if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
+ get_RRROpcodeExtension_Y1(bundle) ==
+ UNARY_RRR_1_OPCODE_Y1) {
+
+ opcode = get_UnaryOpcodeExtension_Y1(bundle);
+
+ /*
+ * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
+ * pipeline.
+ */
+ switch (opcode) {
+ case JALR_UNARY_OPCODE_Y1:
+ case JALRP_UNARY_OPCODE_Y1:
+ y1_lr = true;
+ y1_lr_reg = 55; /* Link register. */
+ /* FALLTHROUGH */
+ case JR_UNARY_OPCODE_Y1:
+ case JRP_UNARY_OPCODE_Y1:
+ y1_br = true;
+ y1_br_reg = get_SrcA_Y1(bundle);
+ break;
+ case LNK_UNARY_OPCODE_Y1:
+ /* "lnk" at Y1 pipeline. */
+ y1_lr = true;
+ y1_lr_reg = get_Dest_Y1(bundle);
+ break;
+ }
+ }
+
+ opcode = get_Opcode_Y2(bundle);
+ mod = get_Mode(bundle);
+
+ /*
+ * bundle_2 is bundle after making Y2 as a dummy operation
+ * - ld zero, sp
+ */
+ bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
+
+ /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
+ if (y1_br || y1_lr) {
+ bundle_2 &= ~(GX_INSN_Y1_MASK);
+ bundle_2 |= jit_y1_fnop();
+ }
+
+ if (is_y0_y1_nop(bundle_2))
+ bundle_2_enable = false;
+
+ if (mod == MODE_OPCODE_YC2) {
+ /* Store. */
+ load_n_store = false;
+ load_store_size = 1 << opcode;
+ load_store_signed = false;
+ find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
+ &clob3, &alias);
+ if (load_store_size > 8)
+ unexpected = true;
+ } else {
+ /* Load. */
+ load_n_store = true;
+ if (mod == MODE_OPCODE_YB2) {
+ switch (opcode) {
+ case LD_OPCODE_Y2:
+ load_store_signed = false;
+ load_store_size = 8;
+ break;
+ case LD4S_OPCODE_Y2:
+ load_store_signed = true;
+ load_store_size = 4;
+ break;
+ case LD4U_OPCODE_Y2:
+ load_store_signed = false;
+ load_store_size = 4;
+ break;
+ default:
+ unexpected = true;
+ }
+ } else if (mod == MODE_OPCODE_YA2) {
+ if (opcode == LD2S_OPCODE_Y2) {
+ load_store_signed = true;
+ load_store_size = 2;
+ } else if (opcode == LD2U_OPCODE_Y2) {
+ load_store_signed = false;
+ load_store_size = 2;
+ } else
+ unexpected = true;
+ } else
+ unexpected = true;
+ find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
+ &clob3, &alias);
+ }
+ } else {
+ unsigned int opcode;
+
+ /* bundle_2 is bundle after making X1 as "fnop". */
+ bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
+
+ if (is_x0_x1_nop(bundle_2))
+ bundle_2_enable = false;
+
+ if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
+ opcode = get_UnaryOpcodeExtension_X1(bundle);
+
+ if (get_RRROpcodeExtension_X1(bundle) ==
+ UNARY_RRR_0_OPCODE_X1) {
+ load_n_store = true;
+ find_regs(bundle, &rd, &ra, &rb, &clob1,
+ &clob2, &clob3, &alias);
+
+ switch (opcode) {
+ case LD_UNARY_OPCODE_X1:
+ load_store_signed = false;
+ load_store_size = 8;
+ break;
+ case LD4S_UNARY_OPCODE_X1:
+ load_store_signed = true;
+ /* FALLTHROUGH */
+ case LD4U_UNARY_OPCODE_X1:
+ load_store_size = 4;
+ break;
+
+ case LD2S_UNARY_OPCODE_X1:
+ load_store_signed = true;
+ /* FALLTHROUGH */
+ case LD2U_UNARY_OPCODE_X1:
+ load_store_size = 2;
+ break;
+ default:
+ unexpected = true;
+ }
+ } else {
+ load_n_store = false;
+ load_store_signed = false;
+ find_regs(bundle, 0, &ra, &rb,
+ &clob1, &clob2, &clob3,
+ &alias);
+
+ opcode = get_RRROpcodeExtension_X1(bundle);
+ switch (opcode) {
+ case ST_RRR_0_OPCODE_X1:
+ load_store_size = 8;
+ break;
+ case ST4_RRR_0_OPCODE_X1:
+ load_store_size = 4;
+ break;
+ case ST2_RRR_0_OPCODE_X1:
+ load_store_size = 2;
+ break;
+ default:
+ unexpected = true;
+ }
+ }
+ } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
+ load_n_store = true;
+ opcode = get_Imm8OpcodeExtension_X1(bundle);
+ switch (opcode) {
+ case LD_ADD_IMM8_OPCODE_X1:
+ load_store_size = 8;
+ break;
+
+ case LD4S_ADD_IMM8_OPCODE_X1:
+ load_store_signed = true;
+ /* FALLTHROUGH */
+ case LD4U_ADD_IMM8_OPCODE_X1:
+ load_store_size = 4;
+ break;
+
+ case LD2S_ADD_IMM8_OPCODE_X1:
+ load_store_signed = true;
+ /* FALLTHROUGH */
+ case LD2U_ADD_IMM8_OPCODE_X1:
+ load_store_size = 2;
+ break;
+
+ case ST_ADD_IMM8_OPCODE_X1:
+ load_n_store = false;
+ load_store_size = 8;
+ break;
+ case ST4_ADD_IMM8_OPCODE_X1:
+ load_n_store = false;
+ load_store_size = 4;
+ break;
+ case ST2_ADD_IMM8_OPCODE_X1:
+ load_n_store = false;
+ load_store_size = 2;
+ break;
+ default:
+ unexpected = true;
+ }
+
+ if (!unexpected) {
+ x1_add = true;
+ if (load_n_store)
+ x1_add_imm8 = get_Imm8_X1(bundle);
+ else
+ x1_add_imm8 = get_Dest_Imm8_X1(bundle);
+ }
+
+ find_regs(bundle, load_n_store ? (&rd) : NULL,
+ &ra, &rb, &clob1, &clob2, &clob3, &alias);
+ } else
+ unexpected = true;
+ }
+
+ /*
+ * Some sanity check for register numbers extracted from fault bundle.
+ */
+ if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
+ unexpected = true;
+
+ /* Give warning if register ra has an aligned address. */
+ if (!unexpected)
+ WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
+
+
+ /*
+ * Fault came from kernel space, here we only need take care of
+ * unaligned "get_user/put_user" macros defined in "uaccess.h".
+ * Basically, we will handle bundle like this:
+ * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
+ * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
+ * For either load or store, byte-wise operation is performed by calling
+ * get_user() or put_user(). If the macro returns non-zero value,
+ * set the value to rx, otherwise set zero to rx. Finally make pc point
+ * to next bundle and return.
+ */
+
+ if (EX1_PL(regs->ex1) != USER_PL) {
+
+ unsigned long rx = 0;
+ unsigned long x = 0, ret = 0;
+
+ if (y1_br || y1_lr || x1_add ||
+ (load_store_signed !=
+ (load_n_store && load_store_size == 4))) {
+ /* No branch, link, wrong sign-ext or load/store add. */
+ unexpected = true;
+ } else if (!unexpected) {
+ if (bundle & TILEGX_BUNDLE_MODE_MASK) {
+ /*
+ * Fault bundle is Y mode.
+ * Check if the Y1 and Y0 is the form of
+ * { movei rx, 0; nop/fnop }, if yes,
+ * find the rx.
+ */
+
+ if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
+ && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
+ (get_Imm8_Y1(bundle) == 0) &&
+ is_bundle_y0_nop(bundle)) {
+ rx = get_Dest_Y1(bundle);
+ } else if ((get_Opcode_Y0(bundle) ==
+ ADDI_OPCODE_Y0) &&
+ (get_SrcA_Y0(bundle) == TREG_ZERO) &&
+ (get_Imm8_Y0(bundle) == 0) &&
+ is_bundle_y1_nop(bundle)) {
+ rx = get_Dest_Y0(bundle);
+ } else {
+ unexpected = true;
+ }
+ } else {
+ /*
+ * Fault bundle is X mode.
+ * Check if the X0 is 'movei rx, 0',
+ * if yes, find the rx.
+ */
+
+ if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
+ && (get_Imm8OpcodeExtension_X0(bundle) ==
+ ADDI_IMM8_OPCODE_X0) &&
+ (get_SrcA_X0(bundle) == TREG_ZERO) &&
+ (get_Imm8_X0(bundle) == 0)) {
+ rx = get_Dest_X0(bundle);
+ } else {
+ unexpected = true;
+ }
+ }
+
+ /* rx should be less than 56. */
+ if (!unexpected && (rx >= 56))
+ unexpected = true;
+ }
+
+ if (!search_exception_tables(regs->pc)) {
+ /* No fixup in the exception tables for the pc. */
+ unexpected = true;
+ }
+
+ if (unexpected) {
+ /* Unexpected unalign kernel fault. */
+ struct task_struct *tsk = validate_current();
+
+ bust_spinlocks(1);
+
+ show_regs(regs);
+
+ if (unlikely(tsk->pid < 2)) {
+ panic("Kernel unalign fault running %s!",
+ tsk->pid ? "init" : "the idle task");
+ }
+#ifdef SUPPORT_DIE
+ die("Oops", regs);
+#endif
+ bust_spinlocks(1);
+
+ do_group_exit(SIGKILL);
+
+ } else {
+ unsigned long i, b = 0;
+ unsigned char *ptr =
+ (unsigned char *)regs->regs[ra];
+ if (load_n_store) {
+ /* handle get_user(x, ptr) */
+ for (i = 0; i < load_store_size; i++) {
+ ret = get_user(b, ptr++);
+ if (!ret) {
+ /* Success! update x. */
+#ifdef __LITTLE_ENDIAN
+ x |= (b << (8 * i));
+#else
+ x <<= 8;
+ x |= b;
+#endif /* __LITTLE_ENDIAN */
+ } else {
+ x = 0;
+ break;
+ }
+ }
+
+ /* Sign-extend 4-byte loads. */
+ if (load_store_size == 4)
+ x = (long)(int)x;
+
+ /* Set register rd. */
+ regs->regs[rd] = x;
+
+ /* Set register rx. */
+ regs->regs[rx] = ret;
+
+ /* Bump pc. */
+ regs->pc += 8;
+
+ } else {
+ /* Handle put_user(x, ptr) */
+ x = regs->regs[rb];
+#ifdef __LITTLE_ENDIAN
+ b = x;
+#else
+ /*
+ * Swap x in order to store x from low
+ * to high memory same as the
+ * little-endian case.
+ */
+ switch (load_store_size) {
+ case 8:
+ b = swab64(x);
+ break;
+ case 4:
+ b = swab32(x);
+ break;
+ case 2:
+ b = swab16(x);
+ break;
+ }
+#endif /* __LITTLE_ENDIAN */
+ for (i = 0; i < load_store_size; i++) {
+ ret = put_user(b, ptr++);
+ if (ret)
+ break;
+ /* Success! shift 1 byte. */
+ b >>= 8;
+ }
+ /* Set register rx. */
+ regs->regs[rx] = ret;
+
+ /* Bump pc. */
+ regs->pc += 8;
+ }
+ }
+
+ unaligned_fixup_count++;
+
+ if (unaligned_printk) {
+ pr_info("%s/%d. Unalign fixup for kernel access "
+ "to userspace %lx.",
+ current->comm, current->pid, regs->regs[ra]);
+ }
+
+ /* Done! Return to the exception handler. */
+ return;
+ }
+
+ if ((align_ctl == 0) || unexpected) {
+ siginfo_t info = {
+ .si_signo = SIGBUS,
+ .si_code = BUS_ADRALN,
+ .si_addr = (unsigned char __user *)0
+ };
+ if (unaligned_printk)
+ pr_info("Unalign bundle: unexp @%llx, %llx",
+ (unsigned long long)regs->pc,
+ (unsigned long long)bundle);
+
+ if (ra < 56) {
+ unsigned long uaa = (unsigned long)regs->regs[ra];
+ /* Set bus Address. */
+ info.si_addr = (unsigned char __user *)uaa;
+ }
+
+ unaligned_fixup_count++;
+
+ trace_unhandled_signal("unaligned fixup trap", regs,
+ (unsigned long)info.si_addr, SIGBUS);
+ force_sig_info(info.si_signo, &info, current);
+ return;
+ }
+
+#ifdef __LITTLE_ENDIAN
+#define UA_FIXUP_ADDR_DELTA 1
+#define UA_FIXUP_BFEXT_START(_B_) 0
+#define UA_FIXUP_BFEXT_END(_B_) (8 * (_B_) - 1)
+#else /* __BIG_ENDIAN */
+#define UA_FIXUP_ADDR_DELTA -1
+#define UA_FIXUP_BFEXT_START(_B_) (64 - 8 * (_B_))
+#define UA_FIXUP_BFEXT_END(_B_) 63
+#endif /* __LITTLE_ENDIAN */
+
+
+
+ if ((ra != rb) && (rd != TREG_SP) && !alias &&
+ !y1_br && !y1_lr && !x1_add) {
+ /*
+ * Simple case: ra != rb and no register alias found,
+ * and no branch or link. This will be the majority.
+ * We can do a little better for simplae case than the
+ * generic scheme below.
+ */
+ if (!load_n_store) {
+ /*
+ * Simple store: ra != rb, no need for scratch register.
+ * Just store and rotate to right bytewise.
+ */
+#ifdef __BIG_ENDIAN
+ frag.insn[n++] =
+ jit_x0_addi(ra, ra, load_store_size - 1) |
+ jit_x1_fnop();
+#endif /* __BIG_ENDIAN */
+ for (k = 0; k < load_store_size; k++) {
+ /* Store a byte. */
+ frag.insn[n++] =
+ jit_x0_rotli(rb, rb, 56) |
+ jit_x1_st1_add(ra, rb,
+ UA_FIXUP_ADDR_DELTA);
+ }
+#ifdef __BIG_ENDIAN
+ frag.insn[n] = jit_x1_addi(ra, ra, 1);
+#else
+ frag.insn[n] = jit_x1_addi(ra, ra,
+ -1 * load_store_size);
+#endif /* __LITTLE_ENDIAN */
+
+ if (load_store_size == 8) {
+ frag.insn[n] |= jit_x0_fnop();
+ } else if (load_store_size == 4) {
+ frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
+ } else { /* = 2 */
+ frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
+ }
+ n++;
+ if (bundle_2_enable)
+ frag.insn[n++] = bundle_2;
+ frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
+ } else {
+ if (rd == ra) {
+ /* Use two clobber registers: clob1/2. */
+ frag.insn[n++] =
+ jit_x0_addi(TREG_SP, TREG_SP, -16) |
+ jit_x1_fnop();
+ frag.insn[n++] =
+ jit_x0_addi(clob1, ra, 7) |
+ jit_x1_st_add(TREG_SP, clob1, -8);
+ frag.insn[n++] =
+ jit_x0_addi(clob2, ra, 0) |
+ jit_x1_st(TREG_SP, clob2);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ldna(rd, ra);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ldna(clob1, clob1);
+ /*
+ * Note: we must make sure that rd must not
+ * be sp. Recover clob1/2 from stack.
+ */
+ frag.insn[n++] =
+ jit_x0_dblalign(rd, clob1, clob2) |
+ jit_x1_ld_add(clob2, TREG_SP, 8);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ld_add(clob1, TREG_SP, 16);
+ } else {
+ /* Use one clobber register: clob1 only. */
+ frag.insn[n++] =
+ jit_x0_addi(TREG_SP, TREG_SP, -16) |
+ jit_x1_fnop();
+ frag.insn[n++] =
+ jit_x0_addi(clob1, ra, 7) |
+ jit_x1_st(TREG_SP, clob1);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ldna(rd, ra);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ldna(clob1, clob1);
+ /*
+ * Note: we must make sure that rd must not
+ * be sp. Recover clob1 from stack.
+ */
+ frag.insn[n++] =
+ jit_x0_dblalign(rd, clob1, ra) |
+ jit_x1_ld_add(clob1, TREG_SP, 16);
+ }
+
+ if (bundle_2_enable)
+ frag.insn[n++] = bundle_2;
+ /*
+ * For non 8-byte load, extract corresponding bytes and
+ * signed extension.
+ */
+ if (load_store_size == 4) {
+ if (load_store_signed)
+ frag.insn[n++] =
+ jit_x0_bfexts(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(4),
+ UA_FIXUP_BFEXT_END(4)) |
+ jit_x1_fnop();
+ else
+ frag.insn[n++] =
+ jit_x0_bfextu(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(4),
+ UA_FIXUP_BFEXT_END(4)) |
+ jit_x1_fnop();
+ } else if (load_store_size == 2) {
+ if (load_store_signed)
+ frag.insn[n++] =
+ jit_x0_bfexts(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(2),
+ UA_FIXUP_BFEXT_END(2)) |
+ jit_x1_fnop();
+ else
+ frag.insn[n++] =
+ jit_x0_bfextu(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(2),
+ UA_FIXUP_BFEXT_END(2)) |
+ jit_x1_fnop();
+ }
+
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_iret();
+ }
+ } else if (!load_n_store) {
+
+ /*
+ * Generic memory store cases: use 3 clobber registers.
+ *
+ * Alloc space for saveing clob2,1,3 on user's stack.
+ * register clob3 points to where clob2 saved, followed by
+ * clob1 and 3 from high to low memory.
+ */
+ frag.insn[n++] =
+ jit_x0_addi(TREG_SP, TREG_SP, -32) |
+ jit_x1_fnop();
+ frag.insn[n++] =
+ jit_x0_addi(clob3, TREG_SP, 16) |
+ jit_x1_st_add(TREG_SP, clob3, 8);
+#ifdef __LITTLE_ENDIAN
+ frag.insn[n++] =
+ jit_x0_addi(clob1, ra, 0) |
+ jit_x1_st_add(TREG_SP, clob1, 8);
+#else
+ frag.insn[n++] =
+ jit_x0_addi(clob1, ra, load_store_size - 1) |
+ jit_x1_st_add(TREG_SP, clob1, 8);
+#endif
+ if (load_store_size == 8) {
+ /*
+ * We save one byte a time, not for fast, but compact
+ * code. After each store, data source register shift
+ * right one byte. unchanged after 8 stores.
+ */
+ frag.insn[n++] =
+ jit_x0_addi(clob2, TREG_ZERO, 7) |
+ jit_x1_st_add(TREG_SP, clob2, 16);
+ frag.insn[n++] =
+ jit_x0_rotli(rb, rb, 56) |
+ jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
+ frag.insn[n++] =
+ jit_x0_addi(clob2, clob2, -1) |
+ jit_x1_bnezt(clob2, -1);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_addi(clob2, y1_br_reg, 0);
+ } else if (load_store_size == 4) {
+ frag.insn[n++] =
+ jit_x0_addi(clob2, TREG_ZERO, 3) |
+ jit_x1_st_add(TREG_SP, clob2, 16);
+ frag.insn[n++] =
+ jit_x0_rotli(rb, rb, 56) |
+ jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
+ frag.insn[n++] =
+ jit_x0_addi(clob2, clob2, -1) |
+ jit_x1_bnezt(clob2, -1);
+ /*
+ * same as 8-byte case, but need shift another 4
+ * byte to recover rb for 4-byte store.
+ */
+ frag.insn[n++] = jit_x0_rotli(rb, rb, 32) |
+ jit_x1_addi(clob2, y1_br_reg, 0);
+ } else { /* =2 */
+ frag.insn[n++] =
+ jit_x0_addi(clob2, rb, 0) |
+ jit_x1_st_add(TREG_SP, clob2, 16);
+ for (k = 0; k < 2; k++) {
+ frag.insn[n++] =
+ jit_x0_shrui(rb, rb, 8) |
+ jit_x1_st1_add(clob1, rb,
+ UA_FIXUP_ADDR_DELTA);
+ }
+ frag.insn[n++] =
+ jit_x0_addi(rb, clob2, 0) |
+ jit_x1_addi(clob2, y1_br_reg, 0);
+ }
+
+ if (bundle_2_enable)
+ frag.insn[n++] = bundle_2;
+
+ if (y1_lr) {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_mfspr(y1_lr_reg,
+ SPR_EX_CONTEXT_0_0);
+ }
+ if (y1_br) {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
+ clob2);
+ }
+ if (x1_add) {
+ frag.insn[n++] =
+ jit_x0_addi(ra, ra, x1_add_imm8) |
+ jit_x1_ld_add(clob2, clob3, -8);
+ } else {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ld_add(clob2, clob3, -8);
+ }
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ld_add(clob1, clob3, -8);
+ frag.insn[n++] = jit_x0_fnop() | jit_x1_ld(clob3, clob3);
+ frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
+
+ } else {
+ /*
+ * Generic memory load cases.
+ *
+ * Alloc space for saveing clob1,2,3 on user's stack.
+ * register clob3 points to where clob1 saved, followed
+ * by clob2 and 3 from high to low memory.
+ */
+
+ frag.insn[n++] =
+ jit_x0_addi(TREG_SP, TREG_SP, -32) |
+ jit_x1_fnop();
+ frag.insn[n++] =
+ jit_x0_addi(clob3, TREG_SP, 16) |
+ jit_x1_st_add(TREG_SP, clob3, 8);
+ frag.insn[n++] =
+ jit_x0_addi(clob2, ra, 0) |
+ jit_x1_st_add(TREG_SP, clob2, 8);
+
+ if (y1_br) {
+ frag.insn[n++] =
+ jit_x0_addi(clob1, y1_br_reg, 0) |
+ jit_x1_st_add(TREG_SP, clob1, 16);
+ } else {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_st_add(TREG_SP, clob1, 16);
+ }
+
+ if (bundle_2_enable)
+ frag.insn[n++] = bundle_2;
+
+ if (y1_lr) {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_mfspr(y1_lr_reg,
+ SPR_EX_CONTEXT_0_0);
+ }
+
+ if (y1_br) {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
+ clob1);
+ }
+
+ frag.insn[n++] =
+ jit_x0_addi(clob1, clob2, 7) |
+ jit_x1_ldna(rd, clob2);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ldna(clob1, clob1);
+ frag.insn[n++] =
+ jit_x0_dblalign(rd, clob1, clob2) |
+ jit_x1_ld_add(clob1, clob3, -8);
+ if (x1_add) {
+ frag.insn[n++] =
+ jit_x0_addi(ra, ra, x1_add_imm8) |
+ jit_x1_ld_add(clob2, clob3, -8);
+ } else {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ld_add(clob2, clob3, -8);
+ }
+
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ld(clob3, clob3);
+
+ if (load_store_size == 4) {
+ if (load_store_signed)
+ frag.insn[n++] =
+ jit_x0_bfexts(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(4),
+ UA_FIXUP_BFEXT_END(4)) |
+ jit_x1_fnop();
+ else
+ frag.insn[n++] =
+ jit_x0_bfextu(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(4),
+ UA_FIXUP_BFEXT_END(4)) |
+ jit_x1_fnop();
+ } else if (load_store_size == 2) {
+ if (load_store_signed)
+ frag.insn[n++] =
+ jit_x0_bfexts(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(2),
+ UA_FIXUP_BFEXT_END(2)) |
+ jit_x1_fnop();
+ else
+ frag.insn[n++] =
+ jit_x0_bfextu(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(2),
+ UA_FIXUP_BFEXT_END(2)) |
+ jit_x1_fnop();
+ }
+
+ frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
+ }
+
+ /* Max JIT bundle count is 14. */
+ WARN_ON(n > 14);
+
+ if (!unexpected) {
+ int status = 0;
+ int idx = (regs->pc >> 3) &
+ ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
+
+ frag.pc = regs->pc;
+ frag.bundle = bundle;
+
+ if (unaligned_printk) {
+ pr_info("%s/%d, Unalign fixup: pc=%lx "
+ "bundle=%lx %d %d %d %d %d %d %d %d.",
+ current->comm, current->pid,
+ (unsigned long)frag.pc,
+ (unsigned long)frag.bundle,
+ (int)alias, (int)rd, (int)ra,
+ (int)rb, (int)bundle_2_enable,
+ (int)y1_lr, (int)y1_br, (int)x1_add);
+
+ for (k = 0; k < n; k += 2)
+ pr_info("[%d] %016llx %016llx", k,
+ (unsigned long long)frag.insn[k],
+ (unsigned long long)frag.insn[k+1]);
+ }
+
+ /* Swap bundle byte order for big endian sys. */
+#ifdef __BIG_ENDIAN
+ frag.bundle = GX_INSN_BSWAP(frag.bundle);
+ for (k = 0; k < n; k++)
+ frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
+#endif /* __BIG_ENDIAN */
+
+ status = copy_to_user((void __user *)&jit_code_area[idx],
+ &frag, sizeof(frag));
+ if (status) {
+ /* Fail to copy JIT into user land. send SIGSEGV. */
+ siginfo_t info = {
+ .si_signo = SIGSEGV,
+ .si_code = SEGV_MAPERR,
+ .si_addr = (void __user *)&jit_code_area[idx]
+ };
+
+ pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx",
+ current->pid, current->comm,
+ (unsigned long long)&jit_code_area[idx]);
+
+ trace_unhandled_signal("segfault in unalign fixup",
+ regs,
+ (unsigned long)info.si_addr,
+ SIGSEGV);
+ force_sig_info(info.si_signo, &info, current);
+ return;
+ }
+
+
+ /* Do a cheaper increment, not accurate. */
+ unaligned_fixup_count++;
+ __flush_icache_range((unsigned long)&jit_code_area[idx],
+ (unsigned long)&jit_code_area[idx] +
+ sizeof(frag));
+
+ /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
+ __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
+ __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
+
+ /* Modify pc at the start of new JIT. */
+ regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
+ /* Set ICS in SPR_EX_CONTEXT_K_1. */
+ regs->ex1 = PL_ICS_EX1(USER_PL, 1);
+ }
+}
+
+
+/*
+ * C function to generate unalign data JIT. Called from unalign data
+ * interrupt handler.
+ *
+ * First check if unalign fix is disabled or exception did not not come from
+ * user space or sp register points to unalign address, if true, generate a
+ * SIGBUS. Then map a page into user space as JIT area if it is not mapped
+ * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
+ * back to exception handler.
+ *
+ * The exception handler will "iret" to new generated JIT code after
+ * restoring caller saved registers. In theory, the JIT code will perform
+ * another "iret" to resume user's program.
+ */
+
+void do_unaligned(struct pt_regs *regs, int vecnum)
+{
+ tilegx_bundle_bits __user *pc;
+ tilegx_bundle_bits bundle;
+ struct thread_info *info = current_thread_info();
+ int align_ctl;
+
+ /* Checks the per-process unaligned JIT flags */
+ align_ctl = unaligned_fixup;
+ switch (task_thread_info(current)->align_ctl) {
+ case PR_UNALIGN_NOPRINT:
+ align_ctl = 1;
+ break;
+ case PR_UNALIGN_SIGBUS:
+ align_ctl = 0;
+ break;
+ }
+
+ /* Enable iterrupt in order to access user land. */
+ local_irq_enable();
+
+ /*
+ * The fault came from kernel space. Two choices:
+ * (a) unaligned_fixup < 1, we will first call get/put_user fixup
+ * to return -EFAULT. If no fixup, simply panic the kernel.
+ * (b) unaligned_fixup >=1, we will try to fix the unaligned access
+ * if it was triggered by get_user/put_user() macros. Panic the
+ * kernel if it is not fixable.
+ */
+
+ if (EX1_PL(regs->ex1) != USER_PL) {
+
+ if (align_ctl < 1) {
+ unaligned_fixup_count++;
+ /* If exception came from kernel, try fix it up. */
+ if (fixup_exception(regs)) {
+ if (unaligned_printk)
+ pr_info("Unalign fixup: %d %llx @%llx",
+ (int)unaligned_fixup,
+ (unsigned long long)regs->ex1,
+ (unsigned long long)regs->pc);
+ return;
+ }
+ /* Not fixable. Go panic. */
+ panic("Unalign exception in Kernel. pc=%lx",
+ regs->pc);
+ return;
+ } else {
+ /*
+ * Try to fix the exception. If we can't, panic the
+ * kernel.
+ */
+ bundle = GX_INSN_BSWAP(
+ *((tilegx_bundle_bits *)(regs->pc)));
+ jit_bundle_gen(regs, bundle, align_ctl);
+ return;
+ }
+ }
+
+ /*
+ * Fault came from user with ICS or stack is not aligned.
+ * If so, we will trigger SIGBUS.
+ */
+ if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
+ siginfo_t info = {
+ .si_signo = SIGBUS,
+ .si_code = BUS_ADRALN,
+ .si_addr = (unsigned char __user *)0
+ };
+
+ if (unaligned_printk)
+ pr_info("Unalign fixup: %d %llx @%llx",
+ (int)unaligned_fixup,
+ (unsigned long long)regs->ex1,
+ (unsigned long long)regs->pc);
+
+ unaligned_fixup_count++;
+
+ trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
+ force_sig_info(info.si_signo, &info, current);
+ return;
+ }
+
+
+ /* Read the bundle casued the exception! */
+ pc = (tilegx_bundle_bits __user *)(regs->pc);
+ if (get_user(bundle, pc) != 0) {
+ /* Probably never be here since pc is valid user address.*/
+ siginfo_t info = {
+ .si_signo = SIGSEGV,
+ .si_code = SEGV_MAPERR,
+ .si_addr = (void __user *)pc
+ };
+ pr_err("Couldn't read instruction at %p trying to step\n", pc);
+ trace_unhandled_signal("segfault in unalign fixup", regs,
+ (unsigned long)info.si_addr, SIGSEGV);
+ force_sig_info(info.si_signo, &info, current);
+ return;
+ }
+
+ if (!info->unalign_jit_base) {
+ void __user *user_page;
+
+ /*
+ * Allocate a page in userland.
+ * For 64-bit processes we try to place the mapping far
+ * from anything else that might be going on (specifically
+ * 64 GB below the top of the user address space). If it
+ * happens not to be possible to put it there, it's OK;
+ * the kernel will choose another location and we'll
+ * remember it for later.
+ */
+ if (is_compat_task())
+ user_page = NULL;
+ else
+ user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
+ (current->pid << PAGE_SHIFT);
+
+ user_page = (void __user *) vm_mmap(NULL,
+ (unsigned long)user_page,
+ PAGE_SIZE,
+ PROT_EXEC | PROT_READ |
+ PROT_WRITE,
+#ifdef CONFIG_HOMECACHE
+ MAP_CACHE_HOME_TASK |
+#endif
+ MAP_PRIVATE |
+ MAP_ANONYMOUS,
+ 0);
+
+ if (IS_ERR((void __force *)user_page)) {
+ pr_err("Out of kernel pages trying do_mmap.\n");
+ return;
+ }
+
+ /* Save the address in the thread_info struct */
+ info->unalign_jit_base = user_page;
+ if (unaligned_printk)
+ pr_info("Unalign bundle: %d:%d, allocate page @%llx",
+ raw_smp_processor_id(), current->pid,
+ (unsigned long long)user_page);
+ }
+
+ /* Generate unalign JIT */
+ jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
+}
+
+#endif /* __tilegx__ */
diff --git a/arch/tile/kernel/usb.c b/arch/tile/kernel/usb.c
new file mode 100644
index 00000000000..5af8debc6a7
--- /dev/null
+++ b/arch/tile/kernel/usb.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Register the Tile-Gx USB interfaces as platform devices.
+ *
+ * The actual USB driver is just some glue (in
+ * drivers/usb/host/[eo]hci-tilegx.c) which makes the registers available
+ * to the standard kernel EHCI and OHCI drivers.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+#include <linux/usb/tilegx.h>
+#include <linux/types.h>
+
+static u64 ehci_dmamask = DMA_BIT_MASK(32);
+
+#define USB_HOST_DEF(unit, type, dmamask) \
+ static struct \
+ tilegx_usb_platform_data tilegx_usb_platform_data_ ## type ## \
+ hci ## unit = { \
+ .dev_index = unit, \
+ }; \
+ \
+ static struct platform_device tilegx_usb_ ## type ## hci ## unit = { \
+ .name = "tilegx-" #type "hci", \
+ .id = unit, \
+ .dev = { \
+ .dma_mask = dmamask, \
+ .coherent_dma_mask = DMA_BIT_MASK(32), \
+ .platform_data = \
+ &tilegx_usb_platform_data_ ## type ## hci ## \
+ unit, \
+ }, \
+ };
+
+USB_HOST_DEF(0, e, &ehci_dmamask)
+USB_HOST_DEF(0, o, NULL)
+USB_HOST_DEF(1, e, &ehci_dmamask)
+USB_HOST_DEF(1, o, NULL)
+
+#undef USB_HOST_DEF
+
+static struct platform_device *tilegx_usb_devices[] __initdata = {
+ &tilegx_usb_ehci0,
+ &tilegx_usb_ehci1,
+ &tilegx_usb_ohci0,
+ &tilegx_usb_ohci1,
+};
+
+/** Add our set of possible USB devices. */
+static int __init tilegx_usb_init(void)
+{
+ platform_add_devices(tilegx_usb_devices,
+ ARRAY_SIZE(tilegx_usb_devices));
+
+ return 0;
+}
+arch_initcall(tilegx_usb_init);
diff --git a/arch/tile/kernel/vdso.c b/arch/tile/kernel/vdso.c
new file mode 100644
index 00000000000..1533af24106
--- /dev/null
+++ b/arch/tile/kernel/vdso.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/binfmts.h>
+#include <linux/compat.h>
+#include <linux/elf.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+
+#include <asm/vdso.h>
+#include <asm/mman.h>
+#include <asm/sections.h>
+
+#include <arch/sim.h>
+
+/* The alignment of the vDSO. */
+#define VDSO_ALIGNMENT PAGE_SIZE
+
+
+static unsigned int vdso_pages;
+static struct page **vdso_pagelist;
+
+#ifdef CONFIG_COMPAT
+static unsigned int vdso32_pages;
+static struct page **vdso32_pagelist;
+#endif
+static int vdso_ready;
+
+/*
+ * The vdso data page.
+ */
+static union {
+ struct vdso_data data;
+ u8 page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+static unsigned int __read_mostly vdso_enabled = 1;
+
+static struct page **vdso_setup(void *vdso_kbase, unsigned int pages)
+{
+ int i;
+ struct page **pagelist;
+
+ pagelist = kzalloc(sizeof(struct page *) * (pages + 1), GFP_KERNEL);
+ BUG_ON(pagelist == NULL);
+ for (i = 0; i < pages - 1; i++) {
+ struct page *pg = virt_to_page(vdso_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ pagelist[i] = pg;
+ }
+ pagelist[pages - 1] = virt_to_page(vdso_data);
+ pagelist[pages] = NULL;
+
+ return pagelist;
+}
+
+static int __init vdso_init(void)
+{
+ int data_pages = sizeof(vdso_data_store) >> PAGE_SHIFT;
+
+ /*
+ * We can disable vDSO support generally, but we need to retain
+ * one page to support the two-bundle (16-byte) rt_sigreturn path.
+ */
+ if (!vdso_enabled) {
+ size_t offset = (unsigned long)&__vdso_rt_sigreturn;
+ static struct page *sigret_page;
+ sigret_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ BUG_ON(sigret_page == NULL);
+ vdso_pagelist = &sigret_page;
+ vdso_pages = 1;
+ BUG_ON(offset >= PAGE_SIZE);
+ memcpy(page_address(sigret_page) + offset,
+ vdso_start + offset, 16);
+#ifdef CONFIG_COMPAT
+ vdso32_pages = vdso_pages;
+ vdso32_pagelist = vdso_pagelist;
+#endif
+ vdso_ready = 1;
+ return 0;
+ }
+
+ vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
+ vdso_pages += data_pages;
+ vdso_pagelist = vdso_setup(vdso_start, vdso_pages);
+
+#ifdef CONFIG_COMPAT
+ vdso32_pages = (vdso32_end - vdso32_start) >> PAGE_SHIFT;
+ vdso32_pages += data_pages;
+ vdso32_pagelist = vdso_setup(vdso32_start, vdso32_pages);
+#endif
+
+ smp_wmb();
+ vdso_ready = 1;
+
+ return 0;
+}
+arch_initcall(vdso_init);
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+ if (vma->vm_mm && vma->vm_start == VDSO_BASE)
+ return "[vdso]";
+#ifndef __tilegx__
+ if (vma->vm_start == MEM_USER_INTRPT)
+ return "[intrpt]";
+#endif
+ return NULL;
+}
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+ return NULL;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long address)
+{
+ return 0;
+}
+
+int in_gate_area_no_mm(unsigned long address)
+{
+ return 0;
+}
+
+int setup_vdso_pages(void)
+{
+ struct page **pagelist;
+ unsigned long pages;
+ struct mm_struct *mm = current->mm;
+ unsigned long vdso_base = 0;
+ int retval = 0;
+
+ if (!vdso_ready)
+ return 0;
+
+ mm->context.vdso_base = 0;
+
+ pagelist = vdso_pagelist;
+ pages = vdso_pages;
+#ifdef CONFIG_COMPAT
+ if (is_compat_task()) {
+ pagelist = vdso32_pagelist;
+ pages = vdso32_pages;
+ }
+#endif
+
+ /*
+ * vDSO has a problem and was disabled, just don't "enable" it for the
+ * process.
+ */
+ if (pages == 0)
+ return 0;
+
+ vdso_base = get_unmapped_area(NULL, vdso_base,
+ (pages << PAGE_SHIFT) +
+ ((VDSO_ALIGNMENT - 1) & PAGE_MASK),
+ 0, 0);
+ if (IS_ERR_VALUE(vdso_base)) {
+ retval = vdso_base;
+ return retval;
+ }
+
+ /* Add required alignment. */
+ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT);
+
+ /*
+ * Put vDSO base into mm struct. We need to do this before calling
+ * install_special_mapping or the perf counter mmap tracking code
+ * will fail to recognise it as a vDSO (since arch_vma_name fails).
+ */
+ mm->context.vdso_base = vdso_base;
+
+ /*
+ * our vma flags don't have VM_WRITE so by default, the process isn't
+ * allowed to write those pages.
+ * gdb can break that with ptrace interface, and thus trigger COW on
+ * those pages but it's then your responsibility to never do that on
+ * the "data" page of the vDSO or you'll stop getting kernel updates
+ * and your nice userland gettimeofday will be totally dead.
+ * It's fine to use that for setting breakpoints in the vDSO code
+ * pages though
+ */
+ retval = install_special_mapping(mm, vdso_base,
+ pages << PAGE_SHIFT,
+ VM_READ|VM_EXEC |
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+ pagelist);
+ if (retval)
+ mm->context.vdso_base = 0;
+
+ return retval;
+}
+
+static __init int vdso_func(char *s)
+{
+ return kstrtouint(s, 0, &vdso_enabled);
+}
+__setup("vdso=", vdso_func);
diff --git a/arch/tile/kernel/vdso/Makefile b/arch/tile/kernel/vdso/Makefile
new file mode 100644
index 00000000000..a025f63d54c
--- /dev/null
+++ b/arch/tile/kernel/vdso/Makefile
@@ -0,0 +1,118 @@
+# Symbols present in the vdso
+vdso-syms = rt_sigreturn gettimeofday
+
+# Files to link into the vdso
+obj-vdso = $(patsubst %, v%.o, $(vdso-syms))
+
+# Build rules
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds
+obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+
+# vdso32 is only for tilegx -m32 compat task.
+VDSO32-$(CONFIG_COMPAT) := y
+
+obj-y += vdso.o
+obj-$(VDSO32-y) += vdso32.o
+extra-y += vdso.lds
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+CFLAGS_REMOVE_vdso.o = -pg
+CFLAGS_REMOVE_vdso32.o = -pg
+CFLAGS_REMOVE_vrt_sigreturn.o = -pg
+CFLAGS_REMOVE_vrt_sigreturn32.o = -pg
+CFLAGS_REMOVE_vgettimeofday.o = -pg
+CFLAGS_REMOVE_vgettimeofday32.o = -pg
+
+ifdef CONFIG_FEEDBACK_COLLECT
+# vDSO code runs in userspace, not collecting feedback data.
+CFLAGS_REMOVE_vdso.o = -ffeedback-generate
+CFLAGS_REMOVE_vdso32.o = -ffeedback-generate
+CFLAGS_REMOVE_vrt_sigreturn.o = -ffeedback-generate
+CFLAGS_REMOVE_vrt_sigreturn32.o = -ffeedback-generate
+CFLAGS_REMOVE_vgettimeofday.o = -ffeedback-generate
+CFLAGS_REMOVE_vgettimeofday32.o = -ffeedback-generate
+endif
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
+# Force dependency
+$(obj)/vdso.o: $(obj)/vdso.so
+
+# link rule for the .so file, .lds has to be first
+SYSCFLAGS_vdso.so.dbg = $(c_flags)
+$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso)
+ $(call if_changed,vdsold)
+
+
+# We also create a special relocatable object that should mirror the symbol
+# table and layout of the linked DSO. With ld -R we can then refer to
+# these symbols in the kernel code rather than hand-coded addresses.
+extra-y += vdso-syms.o
+$(obj)/built-in.o: $(obj)/vdso-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
+
+SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+SYSCFLAGS_vdso_syms.o = -r
+$(obj)/vdso-syms.o: $(src)/vdso.lds $(obj)/vrt_sigreturn.o FORCE
+ $(call if_changed,vdsold)
+
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# actual build commands
+# The DSO images are built using a special linker script
+# Add -lgcc so tilepro gets static muldi3 and lshrdi3 definitions.
+# Make sure only to export the intended __vdso_xxx symbol offsets.
+quiet_cmd_vdsold = VDSOLD $@
+ cmd_vdsold = $(CC) $(KCFLAGS) -nostdlib $(SYSCFLAGS_$(@F)) \
+ -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp -lgcc && \
+ $(CROSS_COMPILE)objcopy \
+ $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso.so: $(obj)/vdso.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso32.so: $(obj)/vdso32.so.dbg
+ $(call cmd,vdso_install)
+
+vdso_install: vdso.so
+vdso32_install: vdso32.so
+
+
+KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_32 += -m32 -s
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 += -m32 -fPIC -shared
+
+obj-vdso32 = $(patsubst %, v%32.o, $(vdso-syms))
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+targets += $(obj-vdso32) vdso32.so vdso32.so.dbg
+
+$(obj-vdso32:%=%): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(obj-vdso32:%=%): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+
+$(obj)/vgettimeofday32.o: $(obj)/vgettimeofday.c
+ $(call if_changed_rule,cc_o_c)
+
+$(obj)/vrt_sigreturn32.o: $(obj)/vrt_sigreturn.S
+ $(call if_changed,as_o_S)
+
+# Force dependency
+$(obj)/vdso32.o: $(obj)/vdso32.so
+
+SYSCFLAGS_vdso32.so.dbg = -m32 -shared -s -Wl,-soname=linux-vdso32.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+$(obj)/vdso32.so.dbg: $(src)/vdso.lds $(obj-vdso32)
+ $(call if_changed,vdsold)
diff --git a/arch/tile/kernel/vdso/vdso.S b/arch/tile/kernel/vdso/vdso.S
new file mode 100644
index 00000000000..3467adb4163
--- /dev/null
+++ b/arch/tile/kernel/vdso/vdso.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .global vdso_start, vdso_end
+ .align PAGE_SIZE
+vdso_start:
+ .incbin "arch/tile/kernel/vdso/vdso.so"
+ .align PAGE_SIZE
+vdso_end:
+
+ .previous
diff --git a/arch/tile/kernel/vdso/vdso.lds.S b/arch/tile/kernel/vdso/vdso.lds.S
new file mode 100644
index 00000000000..041cd6c39c8
--- /dev/null
+++ b/arch/tile/kernel/vdso/vdso.lds.S
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#define VDSO_VERSION_STRING LINUX_2.6
+
+
+OUTPUT_ARCH(tile)
+
+/* The ELF entry point can be used to set the AT_SYSINFO value. */
+ENTRY(__vdso_rt_sigreturn);
+
+
+SECTIONS
+{
+ . = SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+
+ /*
+ * This linker script is used both with -r and with -shared.
+ * For the layouts to match, we need to skip more than enough
+ * space for the dynamic symbol table et al. If this amount
+ * is insufficient, ld -shared will barf. Just increase it here.
+ */
+ . = 0x1000;
+ .text : { *(.text .text.*) } :text
+
+ .data : {
+ *(.got.plt) *(.got)
+ *(.data .data.* .gnu.linkonce.d.*)
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ }
+}
+
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ __vdso_rt_sigreturn;
+ __vdso_gettimeofday;
+ gettimeofday;
+ local:*;
+ };
+}
diff --git a/arch/tile/kernel/vdso/vdso32.S b/arch/tile/kernel/vdso/vdso32.S
new file mode 100644
index 00000000000..1d1ac3257e1
--- /dev/null
+++ b/arch/tile/kernel/vdso/vdso32.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .global vdso32_start, vdso32_end
+ .align PAGE_SIZE
+vdso32_start:
+ .incbin "arch/tile/kernel/vdso/vdso32.so"
+ .align PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c
new file mode 100644
index 00000000000..51ec8e46f5f
--- /dev/null
+++ b/arch/tile/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#define VDSO_BUILD /* avoid some shift warnings for -m32 in <asm/page.h> */
+#include <linux/time.h>
+#include <asm/timex.h>
+#include <asm/vdso.h>
+
+#if CHIP_HAS_SPLIT_CYCLE()
+static inline cycles_t get_cycles_inline(void)
+{
+ unsigned int high = __insn_mfspr(SPR_CYCLE_HIGH);
+ unsigned int low = __insn_mfspr(SPR_CYCLE_LOW);
+ unsigned int high2 = __insn_mfspr(SPR_CYCLE_HIGH);
+
+ while (unlikely(high != high2)) {
+ low = __insn_mfspr(SPR_CYCLE_LOW);
+ high = high2;
+ high2 = __insn_mfspr(SPR_CYCLE_HIGH);
+ }
+
+ return (((cycles_t)high) << 32) | low;
+}
+#define get_cycles get_cycles_inline
+#endif
+
+/*
+ * Find out the vDSO data page address in the process address space.
+ */
+inline unsigned long get_datapage(void)
+{
+ unsigned long ret;
+
+ /* vdso data page located in the 2nd vDSO page. */
+ asm volatile ("lnk %0" : "=r"(ret));
+ ret &= ~(PAGE_SIZE - 1);
+ ret += PAGE_SIZE;
+
+ return ret;
+}
+
+int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ cycles_t cycles;
+ unsigned long count, sec, ns;
+ volatile struct vdso_data *vdso_data;
+
+ vdso_data = (struct vdso_data *)get_datapage();
+ /* The use of the timezone is obsolete, normally tz is NULL. */
+ if (unlikely(tz != NULL)) {
+ while (1) {
+ /* Spin until the update finish. */
+ count = vdso_data->tz_update_count;
+ if (count & 1)
+ continue;
+
+ tz->tz_minuteswest = vdso_data->tz_minuteswest;
+ tz->tz_dsttime = vdso_data->tz_dsttime;
+
+ /* Check whether updated, read again if so. */
+ if (count == vdso_data->tz_update_count)
+ break;
+ }
+ }
+
+ if (unlikely(tv == NULL))
+ return 0;
+
+ while (1) {
+ /* Spin until the update finish. */
+ count = vdso_data->tb_update_count;
+ if (count & 1)
+ continue;
+
+ cycles = (get_cycles() - vdso_data->xtime_tod_stamp);
+ ns = (cycles * vdso_data->mult) >> vdso_data->shift;
+ sec = vdso_data->xtime_clock_sec;
+ ns += vdso_data->xtime_clock_nsec;
+ if (ns >= NSEC_PER_SEC) {
+ ns -= NSEC_PER_SEC;
+ sec += 1;
+ }
+
+ /* Check whether updated, read again if so. */
+ if (count == vdso_data->tb_update_count)
+ break;
+ }
+
+ tv->tv_sec = sec;
+ tv->tv_usec = ns / 1000;
+
+ return 0;
+}
+
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+ __attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/tile/kernel/vdso/vrt_sigreturn.S b/arch/tile/kernel/vdso/vrt_sigreturn.S
new file mode 100644
index 00000000000..6326caf4a03
--- /dev/null
+++ b/arch/tile/kernel/vdso/vrt_sigreturn.S
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/linkage.h>
+#include <arch/abi.h>
+#include <asm/unistd.h>
+
+/*
+ * Note that libc has a copy of this function that it uses to compare
+ * against the PC when a stack backtrace ends, so if this code is
+ * changed, the libc implementation(s) should also be updated.
+ */
+ENTRY(__vdso_rt_sigreturn)
+ moveli TREG_SYSCALL_NR_NAME, __NR_rt_sigreturn
+ swint1
+ /* We don't use ENDPROC to avoid tagging this symbol as FUNC,
+ * which confuses the perf tool.
+ */
+ END(__vdso_rt_sigreturn)
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
index 631f10de12f..f1819423ffc 100644
--- a/arch/tile/kernel/vmlinux.lds.S
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -5,7 +5,7 @@
#include <hv/hypervisor.h>
/* Text loads starting from the supervisor interrupt vector address. */
-#define TEXT_OFFSET MEM_SV_INTRPT
+#define TEXT_OFFSET MEM_SV_START
OUTPUT_ARCH(tile)
ENTRY(_start)
@@ -13,7 +13,7 @@ jiffies = jiffies_64;
PHDRS
{
- intrpt1 PT_LOAD ;
+ intrpt PT_LOAD ;
text PT_LOAD ;
data PT_LOAD ;
}
@@ -24,23 +24,30 @@ SECTIONS
#define LOAD_OFFSET TEXT_OFFSET
/* Interrupt vectors */
- .intrpt1 (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */
+ .intrpt (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */
{
_text = .;
- _stext = .;
- *(.intrpt1)
- } :intrpt1 =0
+ *(.intrpt)
+ } :intrpt =0
/* Hypervisor call vectors */
- #include "hvglue.lds"
+ . = ALIGN(0x10000);
+ .hvglue : AT (ADDR(.hvglue) - LOAD_OFFSET) {
+ *(.hvglue)
+ } :NONE
/* Now the real code */
. = ALIGN(0x20000);
+ _stext = .;
.text : AT (ADDR(.text) - LOAD_OFFSET) {
HEAD_TEXT
SCHED_TEXT
LOCK_TEXT
+ KPROBES_TEXT
+ IRQENTRY_TEXT
__fix_text_end = .; /* tile-cpack won't rearrange before this */
+ ALIGN_FUNCTION();
+ *(.hottext*)
TEXT_TEXT
*(.text.*)
*(.coldtext*)
@@ -58,27 +65,17 @@ SECTIONS
#define LOAD_OFFSET PAGE_OFFSET
. = ALIGN(PAGE_SIZE);
+ __init_begin = .;
VMLINUX_SYMBOL(_sinitdata) = .;
INIT_DATA_SECTION(16) :data =0
PERCPU_SECTION(L2_CACHE_BYTES)
. = ALIGN(PAGE_SIZE);
VMLINUX_SYMBOL(_einitdata) = .;
+ __init_end = .;
_sdata = .; /* Start of data section */
-
RO_DATA_SECTION(PAGE_SIZE)
-
- /* initially writeable, then read-only */
- . = ALIGN(PAGE_SIZE);
- __w1data_begin = .;
- .w1data : AT(ADDR(.w1data) - LOAD_OFFSET) {
- VMLINUX_SYMBOL(__w1data_begin) = .;
- *(.w1data)
- VMLINUX_SYMBOL(__w1data_end) = .;
- }
-
RW_DATA_SECTION(L2_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-
_edata = .;
EXCEPTION_TABLE(L2_CACHE_BYTES)
diff --git a/arch/tile/kvm/Kconfig b/arch/tile/kvm/Kconfig
index 669fcdba31e..2298cb1daff 100644
--- a/arch/tile/kvm/Kconfig
+++ b/arch/tile/kvm/Kconfig
@@ -18,7 +18,7 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
- depends on HAVE_KVM && MODULES && EXPERIMENTAL
+ depends on HAVE_KVM && MODULES
select PREEMPT_NOTIFIERS
select ANON_INODES
---help---
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 0c26086ecbe..c4211cbb202 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -4,14 +4,15 @@
lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \
memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \
- strchr_$(BITS).o strlen_$(BITS).o
-
-ifeq ($(CONFIG_TILEGX),y)
-lib-y += memcpy_user_64.o
-else
-lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
-endif
+ strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o
+lib-$(CONFIG_TILEGX) += memcpy_user_64.o
+lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o
lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o
obj-$(CONFIG_MODULES) += exports.o
+
+# The finv_buffer_remote() and copy_{to,from}_user() routines can't
+# have -pg added, since they both rely on being leaf functions.
+CFLAGS_REMOVE_cacheflush.o = -pg
+CFLAGS_REMOVE_memcpy_user_64.o = -pg
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 771b251b409..c89b211fd9e 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -18,53 +18,14 @@
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/atomic.h>
-#include <asm/futex.h>
#include <arch/chip.h>
-/* See <asm/atomic_32.h> */
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
-
-/*
- * A block of memory containing locks for atomic ops. Each instance of this
- * struct will be homed on a different CPU.
- */
-struct atomic_locks_on_cpu {
- int lock[ATOMIC_HASH_L2_SIZE];
-} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4)));
-
-static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool);
-
-/* The locks we'll use until __init_atomic_per_cpu is called. */
-static struct atomic_locks_on_cpu __initdata initial_atomic_locks;
-
-/* Hash into this vector to get a pointer to lock for the given atomic. */
-struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE]
- __write_once = {
- [0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks)
-};
-
-#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/* This page is remapped on startup to be hash-for-home. */
int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss;
-#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
-static inline int *__atomic_hashed_lock(volatile void *v)
+int *__atomic_hashed_lock(volatile void *v)
{
/* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
- unsigned long i =
- (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long));
- unsigned long n = __insn_crc32_32(0, i);
-
- /* Grab high bits for L1 index. */
- unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT);
- /* Grab low bits for L2 index. */
- unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1);
-
- return &atomic_lock_ptr[l1_index]->lock[l2_index];
-#else
/*
* Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index.
* Using mm works here because atomic_locks is page aligned.
@@ -73,26 +34,13 @@ static inline int *__atomic_hashed_lock(volatile void *v)
(unsigned long)atomic_locks,
2, (ATOMIC_HASH_SHIFT + 2) - 1);
return (int *)ptr;
-#endif
}
#ifdef CONFIG_SMP
/* Return whether the passed pointer is a valid atomic lock pointer. */
static int is_atomic_lock(int *p)
{
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
- int i;
- for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
-
- if (p >= &atomic_lock_ptr[i]->lock[0] &&
- p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) {
- return 1;
- }
- }
- return 0;
-#else
return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE];
-#endif
}
void __atomic_fault_unlock(int *irqlock_word)
@@ -111,33 +59,32 @@ static inline int *__atomic_setup(volatile void *v)
return __atomic_hashed_lock(v);
}
-int _atomic_xchg(atomic_t *v, int n)
+int _atomic_xchg(int *v, int n)
{
- return __atomic_xchg(&v->counter, __atomic_setup(v), n).val;
+ return __atomic_xchg(v, __atomic_setup(v), n).val;
}
EXPORT_SYMBOL(_atomic_xchg);
-int _atomic_xchg_add(atomic_t *v, int i)
+int _atomic_xchg_add(int *v, int i)
{
- return __atomic_xchg_add(&v->counter, __atomic_setup(v), i).val;
+ return __atomic_xchg_add(v, __atomic_setup(v), i).val;
}
EXPORT_SYMBOL(_atomic_xchg_add);
-int _atomic_xchg_add_unless(atomic_t *v, int a, int u)
+int _atomic_xchg_add_unless(int *v, int a, int u)
{
/*
* Note: argument order is switched here since it is easier
* to use the first argument consistently as the "old value"
* in the assembly, as is done for _atomic_cmpxchg().
*/
- return __atomic_xchg_add_unless(&v->counter, __atomic_setup(v), u, a)
- .val;
+ return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val;
}
EXPORT_SYMBOL(_atomic_xchg_add_unless);
-int _atomic_cmpxchg(atomic_t *v, int o, int n)
+int _atomic_cmpxchg(int *v, int o, int n)
{
- return __atomic_cmpxchg(&v->counter, __atomic_setup(v), o, n).val;
+ return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val;
}
EXPORT_SYMBOL(_atomic_cmpxchg);
@@ -160,78 +107,36 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
EXPORT_SYMBOL(_atomic_xor);
-u64 _atomic64_xchg(atomic64_t *v, u64 n)
+long long _atomic64_xchg(long long *v, long long n)
{
- return __atomic64_xchg(&v->counter, __atomic_setup(v), n);
+ return __atomic64_xchg(v, __atomic_setup(v), n);
}
EXPORT_SYMBOL(_atomic64_xchg);
-u64 _atomic64_xchg_add(atomic64_t *v, u64 i)
+long long _atomic64_xchg_add(long long *v, long long i)
{
- return __atomic64_xchg_add(&v->counter, __atomic_setup(v), i);
+ return __atomic64_xchg_add(v, __atomic_setup(v), i);
}
EXPORT_SYMBOL(_atomic64_xchg_add);
-u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u)
+long long _atomic64_xchg_add_unless(long long *v, long long a, long long u)
{
/*
* Note: argument order is switched here since it is easier
* to use the first argument consistently as the "old value"
* in the assembly, as is done for _atomic_cmpxchg().
*/
- return __atomic64_xchg_add_unless(&v->counter, __atomic_setup(v),
- u, a);
+ return __atomic64_xchg_add_unless(v, __atomic_setup(v), u, a);
}
EXPORT_SYMBOL(_atomic64_xchg_add_unless);
-u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
+long long _atomic64_cmpxchg(long long *v, long long o, long long n)
{
- return __atomic64_cmpxchg(&v->counter, __atomic_setup(v), o, n);
+ return __atomic64_cmpxchg(v, __atomic_setup(v), o, n);
}
EXPORT_SYMBOL(_atomic64_cmpxchg);
-static inline int *__futex_setup(int __user *v)
-{
- /*
- * Issue a prefetch to the counter to bring it into cache.
- * As for __atomic_setup, but we can't do a read into the L1
- * since it might fault; instead we do a prefetch into the L2.
- */
- __insn_prefetch(v);
- return __atomic_hashed_lock((int __force *)v);
-}
-
-struct __get_user futex_set(u32 __user *v, int i)
-{
- return __atomic_xchg((int __force *)v, __futex_setup(v), i);
-}
-
-struct __get_user futex_add(u32 __user *v, int n)
-{
- return __atomic_xchg_add((int __force *)v, __futex_setup(v), n);
-}
-
-struct __get_user futex_or(u32 __user *v, int n)
-{
- return __atomic_or((int __force *)v, __futex_setup(v), n);
-}
-
-struct __get_user futex_andn(u32 __user *v, int n)
-{
- return __atomic_andn((int __force *)v, __futex_setup(v), n);
-}
-
-struct __get_user futex_xor(u32 __user *v, int n)
-{
- return __atomic_xor((int __force *)v, __futex_setup(v), n);
-}
-
-struct __get_user futex_cmpxchg(u32 __user *v, int o, int n)
-{
- return __atomic_cmpxchg((int __force *)v, __futex_setup(v), o, n);
-}
-
/*
* If any of the atomic or futex routines hit a bad address (not in
* the page tables at kernel PL) this routine is called. The futex
@@ -250,54 +155,8 @@ struct __get_user __atomic_bad_address(int __user *addr)
}
-#if CHIP_HAS_CBOX_HOME_MAP()
-static int __init noatomichash(char *str)
-{
- pr_warning("noatomichash is deprecated.\n");
- return 1;
-}
-__setup("noatomichash", noatomichash);
-#endif
-
void __init __init_atomic_per_cpu(void)
{
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
-
- unsigned int i;
- int actual_cpu;
-
- /*
- * Before this is called from setup, we just have one lock for
- * all atomic objects/operations. Here we replace the
- * elements of atomic_lock_ptr so that they point at per_cpu
- * integers. This seemingly over-complex approach stems from
- * the fact that DEFINE_PER_CPU defines an entry for each cpu
- * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1. But
- * for efficient hashing of atomics to their locks we want a
- * compile time constant power of 2 for the size of this
- * table, so we use ATOMIC_HASH_SIZE.
- *
- * Here we populate atomic_lock_ptr from the per cpu
- * atomic_lock_pool, interspersing by actual cpu so that
- * subsequent elements are homed on consecutive cpus.
- */
-
- actual_cpu = cpumask_first(cpu_possible_mask);
-
- for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
- /*
- * Preincrement to slightly bias against using cpu 0,
- * which has plenty of stuff homed on it already.
- */
- actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask);
- if (actual_cpu >= nr_cpu_ids)
- actual_cpu = cpumask_first(cpu_possible_mask);
-
- atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu);
- }
-
-#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/* Validate power-of-two and "bigger than cpus" assumption */
BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1));
BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids);
@@ -321,9 +180,4 @@ void __init __init_atomic_per_cpu(void)
* That should not produce more indices than ATOMIC_HASH_SIZE.
*/
BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
-
-#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
- /* The futex code makes this assumption, so we validate it here. */
- BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));
}
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 30638042691..6bda3132cd6 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -164,6 +164,7 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic)
STD_ENDPROC(__atomic\name)
.ifc \bitwidth,32
.pushsection __ex_table,"a"
+ .align 4
.word 1b, __atomic\name
.word 2b, __atomic\name
.word __atomic\name, __atomic_bad_address
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c
index 8928aace7a6..9c0ec22009a 100644
--- a/arch/tile/lib/cacheflush.c
+++ b/arch/tile/lib/cacheflush.c
@@ -12,6 +12,7 @@
* more details.
*/
+#include <linux/export.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <arch/icache.h>
@@ -35,11 +36,26 @@ static inline void force_load(char *p)
* core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting
* until the memory controller holds the flushed values.
*/
-void finv_buffer_remote(void *buffer, size_t size, int hfh)
+void __attribute__((optimize("omit-frame-pointer")))
+finv_buffer_remote(void *buffer, size_t size, int hfh)
{
char *p, *base;
size_t step_size, load_count;
+
+ /*
+ * On TILEPro the striping granularity is a fixed 8KB; on
+ * TILE-Gx it is configurable, and we rely on the fact that
+ * the hypervisor always configures maximum striping, so that
+ * bits 9 and 10 of the PA are part of the stripe function, so
+ * every 512 bytes we hit a striping boundary.
+ *
+ */
+#ifdef __tilegx__
+ const unsigned long STRIPE_WIDTH = 512;
+#else
const unsigned long STRIPE_WIDTH = 8192;
+#endif
+
#ifdef __tilegx__
/*
* On TILE-Gx, we must disable the dstream prefetcher before doing
@@ -74,7 +90,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
* memory, that one load would be sufficient, but since we may
* be, we also need to back up to the last load issued to
* another memory controller, which would be the point where
- * we crossed an 8KB boundary (the granularity of striping
+ * we crossed a "striping" boundary (the granularity of striping
* across memory controllers). Keep backing up and doing this
* until we are before the beginning of the buffer, or have
* hit all the controllers.
@@ -88,12 +104,22 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
* every cache line on a full memory stripe on each
* controller" that we simply do that, to simplify the logic.
*
- * FIXME: See bug 9535 for some issues with this code.
+ * On TILE-Gx the hash-for-home function is much more complex,
+ * with the upshot being we can't readily guarantee we have
+ * hit both entries in the 128-entry AMT that were hit by any
+ * load in the entire range, so we just re-load them all.
+ * With larger buffers, we may want to consider using a hypervisor
+ * trap to issue loads directly to each hash-for-home tile for
+ * each controller (doing it from Linux would trash the TLB).
*/
if (hfh) {
step_size = L2_CACHE_BYTES;
+#ifdef __tilegx__
+ load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES;
+#else
load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) *
(1 << CHIP_LOG_NUM_MSHIMS());
+#endif
} else {
step_size = STRIPE_WIDTH;
load_count = (1 << CHIP_LOG_NUM_MSHIMS());
@@ -109,7 +135,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
/* Figure out how far back we need to go. */
base = p - (step_size * (load_count - 2));
- if ((long)base < (long)buffer)
+ if ((unsigned long)base < (unsigned long)buffer)
base = buffer;
/*
@@ -122,18 +148,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
force_load(p);
/*
- * Repeat, but with inv's instead of loads, to get rid of the
+ * Repeat, but with finv's instead of loads, to get rid of the
* data we just loaded into our own cache and the old home L3.
- * No need to unroll since inv's don't target a register.
+ * No need to unroll since finv's don't target a register.
+ * The finv's are guaranteed not to actually flush the data in
+ * the buffer back to their home, since we just read it, so the
+ * lines are clean in cache; we will only invalidate those lines.
*/
p = (char *)buffer + size - 1;
- __insn_inv(p);
+ __insn_finv(p);
p -= step_size;
p = (char *)((unsigned long)p | (step_size - 1));
for (; p >= base; p -= step_size)
- __insn_inv(p);
+ __insn_finv(p);
- /* Wait for the load+inv's (and thus finvs) to have completed. */
+ /* Wait for these finv's (and thus the first finvs) to be done. */
__insn_mf();
#ifdef __tilegx__
@@ -141,3 +170,4 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
__insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf);
#endif
}
+EXPORT_SYMBOL_GPL(finv_buffer_remote);
diff --git a/arch/tile/lib/checksum.c b/arch/tile/lib/checksum.c
index e4bab5bd3f3..c3ca3e64d9d 100644
--- a/arch/tile/lib/checksum.c
+++ b/arch/tile/lib/checksum.c
@@ -16,19 +16,6 @@
#include <net/checksum.h>
#include <linux/module.h>
-static inline unsigned int longto16(unsigned long x)
-{
- unsigned long ret;
-#ifdef __tilegx__
- ret = __insn_v2sadu(x, 0);
- ret = __insn_v2sadu(ret, 0);
-#else
- ret = __insn_sadh_u(x, 0);
- ret = __insn_sadh_u(ret, 0);
-#endif
- return ret;
-}
-
__wsum do_csum(const unsigned char *buff, int len)
{
int odd, count;
@@ -94,7 +81,7 @@ __wsum do_csum(const unsigned char *buff, int len)
}
if (len & 1)
result += *buff;
- result = longto16(result);
+ result = csum_long(result);
if (odd)
result = swab16(result);
out:
diff --git a/arch/tile/lib/cpumask.c b/arch/tile/lib/cpumask.c
index fdc403614d1..75947edccb2 100644
--- a/arch/tile/lib/cpumask.c
+++ b/arch/tile/lib/cpumask.c
@@ -16,6 +16,7 @@
#include <linux/ctype.h>
#include <linux/errno.h>
#include <linux/smp.h>
+#include <linux/export.h>
/*
* Allow cropping out bits beyond the end of the array.
@@ -50,3 +51,4 @@ int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits)
} while (*bp != '\0' && *bp != '\n');
return 0;
}
+EXPORT_SYMBOL(bitmap_parselist_crop);
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 2a81d32de0d..82733c87d67 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -18,19 +18,10 @@
/* arch/tile/lib/usercopy.S */
#include <linux/uaccess.h>
-EXPORT_SYMBOL(__get_user_1);
-EXPORT_SYMBOL(__get_user_2);
-EXPORT_SYMBOL(__get_user_4);
-EXPORT_SYMBOL(__get_user_8);
-EXPORT_SYMBOL(__put_user_1);
-EXPORT_SYMBOL(__put_user_2);
-EXPORT_SYMBOL(__put_user_4);
-EXPORT_SYMBOL(__put_user_8);
EXPORT_SYMBOL(strnlen_user_asm);
EXPORT_SYMBOL(strncpy_from_user_asm);
EXPORT_SYMBOL(clear_user_asm);
EXPORT_SYMBOL(flush_user_asm);
-EXPORT_SYMBOL(inv_user_asm);
EXPORT_SYMBOL(finv_user_asm);
/* arch/tile/kernel/entry.S */
@@ -42,6 +33,12 @@ EXPORT_SYMBOL(dump_stack);
/* arch/tile/kernel/head.S */
EXPORT_SYMBOL(empty_zero_page);
+#ifdef CONFIG_FUNCTION_TRACER
+/* arch/tile/kernel/mcount_64.S */
+#include <asm/ftrace.h>
+EXPORT_SYMBOL(__mcount);
+#endif /* CONFIG_FUNCTION_TRACER */
+
/* arch/tile/lib/, various memcpy files */
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__copy_to_user_inatomic);
@@ -63,6 +60,8 @@ EXPORT_SYMBOL(hv_dev_poll_cancel);
EXPORT_SYMBOL(hv_dev_close);
EXPORT_SYMBOL(hv_sysconf);
EXPORT_SYMBOL(hv_confstr);
+EXPORT_SYMBOL(hv_get_rtc);
+EXPORT_SYMBOL(hv_set_rtc);
/* libgcc.a */
uint32_t __udivsi3(uint32_t dividend, uint32_t divisor);
@@ -90,4 +89,6 @@ uint64_t __ashrdi3(uint64_t, unsigned int);
EXPORT_SYMBOL(__ashrdi3);
uint64_t __ashldi3(uint64_t, unsigned int);
EXPORT_SYMBOL(__ashldi3);
+int __ffsdi2(uint64_t);
+EXPORT_SYMBOL(__ffsdi2);
#endif
diff --git a/arch/tile/lib/memchr_64.c b/arch/tile/lib/memchr_64.c
index 84fdc8d8e73..f8196b3a950 100644
--- a/arch/tile/lib/memchr_64.c
+++ b/arch/tile/lib/memchr_64.c
@@ -15,6 +15,7 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
+#include "string-endian.h"
void *memchr(const void *s, int c, size_t n)
{
@@ -35,15 +36,12 @@ void *memchr(const void *s, int c, size_t n)
p = (const uint64_t *)(s_int & -8);
/* Create eight copies of the byte for which we are looking. */
- goal = 0x0101010101010101ULL * (uint8_t) c;
+ goal = copy_byte(c);
/* Read the first word, but munge it so that bytes before the array
* will not match goal.
- *
- * Note that this shift count expression works because we know
- * shift counts are taken mod 64.
*/
- before_mask = (1ULL << (s_int << 3)) - 1;
+ before_mask = MASK(s_int);
v = (*p | before_mask) ^ (goal & before_mask);
/* Compute the address of the last byte. */
@@ -65,7 +63,7 @@ void *memchr(const void *s, int c, size_t n)
/* We found a match, but it might be in a byte past the end
* of the array.
*/
- ret = ((char *)p) + (__insn_ctz(bits) >> 3);
+ ret = ((char *)p) + (CFZ(bits) >> 3);
return (ret <= last_byte_ptr) ? ret : NULL;
}
EXPORT_SYMBOL(memchr);
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S
index 2a419a6122d..a2771ae5da5 100644
--- a/arch/tile/lib/memcpy_32.S
+++ b/arch/tile/lib/memcpy_32.S
@@ -22,14 +22,6 @@
#include <linux/linkage.h>
-/* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */
-#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
-#define memcpy __memcpy_asm
-#define __copy_to_user_inatomic __copy_to_user_inatomic_asm
-#define __copy_from_user_inatomic __copy_from_user_inatomic_asm
-#define __copy_from_user_zeroing __copy_from_user_zeroing_asm
-#endif
-
#define IS_MEMCPY 0
#define IS_COPY_FROM_USER 1
#define IS_COPY_FROM_USER_ZEROING 2
@@ -44,6 +36,7 @@
*/
#define EX \
.pushsection __ex_table, "a"; \
+ .align 4; \
.word 9f, memcpy_common_fixup; \
.popsection; \
9
@@ -158,12 +151,9 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
{ addi r3, r1, 60; andi r9, r9, -64 }
-#if CHIP_HAS_WH64()
/* No need to prefetch dst, we'll just do the wh64
* right before we copy a line.
*/
-#endif
-
EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 }
/* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bnzt zero, .; move r27, lr }
@@ -171,21 +161,6 @@ EX: { lw r6, r3; addi r3, r3, 64 }
/* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bnzt zero, . }
EX: { lw r7, r3; addi r3, r3, 64 }
-#if !CHIP_HAS_WH64()
- /* Prefetch the dest */
- /* Intentionally stall for a few cycles to leave L2 cache alone. */
- { bnzt zero, . }
- /* Use a real load to cause a TLB miss if necessary. We aren't using
- * r28, so this should be fine.
- */
-EX: { lw r28, r9; addi r9, r9, 64 }
- /* Intentionally stall for a few cycles to leave L2 cache alone. */
- { bnzt zero, . }
- { prefetch r9; addi r9, r9, 64 }
- /* Intentionally stall for a few cycles to leave L2 cache alone. */
- { bnzt zero, . }
- { prefetch r9; addi r9, r9, 64 }
-#endif
/* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bz zero, .Lbig_loop2 }
@@ -286,13 +261,8 @@ EX: { lw r7, r3; addi r3, r3, 64 }
/* Fill second L1D line. */
EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
-#if CHIP_HAS_WH64()
/* Prepare destination line for writing. */
EX: { wh64 r9; addi r9, r9, 64 }
-#else
- /* Prefetch dest line */
- { prefetch r9; addi r9, r9, 64 }
-#endif
/* Load seven words that are L1D hits to cover wh64 L2 usage. */
/* Load the three remaining words from the last L1D line, which
@@ -330,16 +300,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */
EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
-#if CHIP_HAS_WH64()
EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
-#else
- /* Back up the r9 to a cache line we are already storing to
- * if it gets past the end of the dest vector. Strictly speaking,
- * we don't need to back up to the start of a cache line, but it's free
- * and tidy, so why not?
- */
-EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */
-#endif
/* Store second L1D line. */
EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */
EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */
@@ -403,7 +364,6 @@ EX: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
.Ldest_is_word_aligned:
-#if CHIP_HAS_DWORD_ALIGN()
EX: { andi r8, r0, 63; lwadd_na r6, r1, 4}
{ slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned }
@@ -511,26 +471,6 @@ EX: { swadd r0, r13, 4; addi r2, r2, -32 }
/* Move r1 back to the point where it corresponds to r0. */
{ addi r1, r1, -4 }
-#else /* !CHIP_HAS_DWORD_ALIGN() */
-
- /* Compute right/left shift counts and load initial source words. */
- { andi r5, r1, -4; andi r3, r1, 3 }
-EX: { lw r6, r5; addi r5, r5, 4; shli r3, r3, 3 }
-EX: { lw r7, r5; addi r5, r5, 4; sub r4, zero, r3 }
-
- /* Load and store one word at a time, using shifts and ORs
- * to correct for the misaligned src.
- */
-.Lcopy_unaligned_src_loop:
- { shr r6, r6, r3; shl r8, r7, r4 }
-EX: { lw r7, r5; or r8, r8, r6; move r6, r7 }
-EX: { sw r0, r8; addi r0, r0, 4; addi r2, r2, -4 }
- { addi r5, r5, 4; slti_u r8, r2, 8 }
- { bzt r8, .Lcopy_unaligned_src_loop; addi r1, r1, 4 }
-
- { bz r2, .Lcopy_unaligned_done }
-#endif /* !CHIP_HAS_DWORD_ALIGN() */
-
/* Fall through */
/*
@@ -614,5 +554,6 @@ memcpy_fixup_loop:
.size memcpy_common_fixup, . - memcpy_common_fixup
.section __ex_table,"a"
+ .align 4
.word .Lcfu, .Lcopy_from_user_fixup_zero_remainder
.word .Lctu, .Lcopy_to_user_fixup_done
diff --git a/arch/tile/lib/memcpy_64.c b/arch/tile/lib/memcpy_64.c
index 3fab9a6a2bb..4815354b8cd 100644
--- a/arch/tile/lib/memcpy_64.c
+++ b/arch/tile/lib/memcpy_64.c
@@ -15,18 +15,20 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
-#define __memcpy memcpy
/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */
/* Must be 8 bytes in size. */
-#define word_t uint64_t
+#define op_t uint64_t
-#if CHIP_L2_LINE_SIZE() != 64 && CHIP_L2_LINE_SIZE() != 128
-#error "Assumes 64 or 128 byte line size"
+/* Threshold value for when to enter the unrolled loops. */
+#define OP_T_THRES 16
+
+#if CHIP_L2_LINE_SIZE() != 64
+#error "Assumes 64 byte line size"
#endif
/* How many cache lines ahead should we prefetch? */
-#define PREFETCH_LINES_AHEAD 3
+#define PREFETCH_LINES_AHEAD 4
/*
* Provide "base versions" of load and store for the normal code path.
@@ -52,15 +54,16 @@ void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n)
* macros to return a count of uncopied bytes due to mm fault.
*/
#define RETVAL 0
-int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
+int __attribute__((optimize("omit-frame-pointer")))
+USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
#endif
{
char *__restrict dst1 = (char *)dstv;
const char *__restrict src1 = (const char *)srcv;
const char *__restrict src1_end;
const char *__restrict prefetch;
- word_t *__restrict dst8; /* 8-byte pointer to destination memory. */
- word_t final; /* Final bytes to write to trailing word, if any */
+ op_t *__restrict dst8; /* 8-byte pointer to destination memory. */
+ op_t final; /* Final bytes to write to trailing word, if any */
long i;
if (n < 16) {
@@ -80,104 +83,228 @@ int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
for (i = 0; i < PREFETCH_LINES_AHEAD; i++) {
__insn_prefetch(prefetch);
prefetch += CHIP_L2_LINE_SIZE();
- prefetch = (prefetch > src1_end) ? prefetch : src1;
+ prefetch = (prefetch < src1_end) ? prefetch : src1;
}
/* Copy bytes until dst is word-aligned. */
- for (; (uintptr_t)dst1 & (sizeof(word_t) - 1); n--)
+ for (; (uintptr_t)dst1 & (sizeof(op_t) - 1); n--)
ST1(dst1++, LD1(src1++));
/* 8-byte pointer to destination memory. */
- dst8 = (word_t *)dst1;
-
- if (__builtin_expect((uintptr_t)src1 & (sizeof(word_t) - 1), 0)) {
- /*
- * Misaligned copy. Copy 8 bytes at a time, but don't
- * bother with other fanciness.
- *
- * TODO: Consider prefetching and using wh64 as well.
- */
-
- /* Create an aligned src8. */
- const word_t *__restrict src8 =
- (const word_t *)((uintptr_t)src1 & -sizeof(word_t));
- word_t b;
-
- word_t a = LD8(src8++);
- for (; n >= sizeof(word_t); n -= sizeof(word_t)) {
- b = LD8(src8++);
- a = __insn_dblalign(a, b, src1);
- ST8(dst8++, a);
- a = b;
+ dst8 = (op_t *)dst1;
+
+ if (__builtin_expect((uintptr_t)src1 & (sizeof(op_t) - 1), 0)) {
+ /* Unaligned copy. */
+
+ op_t tmp0 = 0, tmp1 = 0, tmp2, tmp3;
+ const op_t *src8 = (const op_t *) ((uintptr_t)src1 &
+ -sizeof(op_t));
+ const void *srci = (void *)src1;
+ int m;
+
+ m = (CHIP_L2_LINE_SIZE() << 2) -
+ (((uintptr_t)dst8) & ((CHIP_L2_LINE_SIZE() << 2) - 1));
+ m = (n < m) ? n : m;
+ m /= sizeof(op_t);
+
+ /* Copy until 'dst' is cache-line-aligned. */
+ n -= (sizeof(op_t) * m);
+
+ switch (m % 4) {
+ case 0:
+ if (__builtin_expect(!m, 0))
+ goto _M0;
+ tmp1 = LD8(src8++);
+ tmp2 = LD8(src8++);
+ goto _8B3;
+ case 2:
+ m += 2;
+ tmp3 = LD8(src8++);
+ tmp0 = LD8(src8++);
+ goto _8B1;
+ case 3:
+ m += 1;
+ tmp2 = LD8(src8++);
+ tmp3 = LD8(src8++);
+ goto _8B2;
+ case 1:
+ m--;
+ tmp0 = LD8(src8++);
+ tmp1 = LD8(src8++);
+ if (__builtin_expect(!m, 0))
+ goto _8B0;
+ }
+
+ do {
+ tmp2 = LD8(src8++);
+ tmp0 = __insn_dblalign(tmp0, tmp1, srci);
+ ST8(dst8++, tmp0);
+_8B3:
+ tmp3 = LD8(src8++);
+ tmp1 = __insn_dblalign(tmp1, tmp2, srci);
+ ST8(dst8++, tmp1);
+_8B2:
+ tmp0 = LD8(src8++);
+ tmp2 = __insn_dblalign(tmp2, tmp3, srci);
+ ST8(dst8++, tmp2);
+_8B1:
+ tmp1 = LD8(src8++);
+ tmp3 = __insn_dblalign(tmp3, tmp0, srci);
+ ST8(dst8++, tmp3);
+ m -= 4;
+ } while (m);
+
+_8B0:
+ tmp0 = __insn_dblalign(tmp0, tmp1, srci);
+ ST8(dst8++, tmp0);
+ src8--;
+
+_M0:
+ if (__builtin_expect(n >= CHIP_L2_LINE_SIZE(), 0)) {
+ op_t tmp4, tmp5, tmp6, tmp7, tmp8;
+
+ prefetch = ((const char *)src8) +
+ CHIP_L2_LINE_SIZE() * PREFETCH_LINES_AHEAD;
+
+ for (tmp0 = LD8(src8++); n >= CHIP_L2_LINE_SIZE();
+ n -= CHIP_L2_LINE_SIZE()) {
+ /* Prefetch and advance to next line to
+ prefetch, but don't go past the end. */
+ __insn_prefetch(prefetch);
+
+ /* Make sure prefetch got scheduled
+ earlier. */
+ __asm__ ("" : : : "memory");
+
+ prefetch += CHIP_L2_LINE_SIZE();
+ prefetch = (prefetch < src1_end) ? prefetch :
+ (const char *) src8;
+
+ tmp1 = LD8(src8++);
+ tmp2 = LD8(src8++);
+ tmp3 = LD8(src8++);
+ tmp4 = LD8(src8++);
+ tmp5 = LD8(src8++);
+ tmp6 = LD8(src8++);
+ tmp7 = LD8(src8++);
+ tmp8 = LD8(src8++);
+
+ tmp0 = __insn_dblalign(tmp0, tmp1, srci);
+ tmp1 = __insn_dblalign(tmp1, tmp2, srci);
+ tmp2 = __insn_dblalign(tmp2, tmp3, srci);
+ tmp3 = __insn_dblalign(tmp3, tmp4, srci);
+ tmp4 = __insn_dblalign(tmp4, tmp5, srci);
+ tmp5 = __insn_dblalign(tmp5, tmp6, srci);
+ tmp6 = __insn_dblalign(tmp6, tmp7, srci);
+ tmp7 = __insn_dblalign(tmp7, tmp8, srci);
+
+ __insn_wh64(dst8);
+
+ ST8(dst8++, tmp0);
+ ST8(dst8++, tmp1);
+ ST8(dst8++, tmp2);
+ ST8(dst8++, tmp3);
+ ST8(dst8++, tmp4);
+ ST8(dst8++, tmp5);
+ ST8(dst8++, tmp6);
+ ST8(dst8++, tmp7);
+
+ tmp0 = tmp8;
+ }
+ src8--;
+ }
+
+ /* Copy the rest 8-byte chunks. */
+ if (n >= sizeof(op_t)) {
+ tmp0 = LD8(src8++);
+ for (; n >= sizeof(op_t); n -= sizeof(op_t)) {
+ tmp1 = LD8(src8++);
+ tmp0 = __insn_dblalign(tmp0, tmp1, srci);
+ ST8(dst8++, tmp0);
+ tmp0 = tmp1;
+ }
+ src8--;
}
if (n == 0)
return RETVAL;
- b = ((const char *)src8 <= src1_end) ? *src8 : 0;
+ tmp0 = LD8(src8++);
+ tmp1 = ((const char *)src8 <= src1_end)
+ ? LD8((op_t *)src8) : 0;
+ final = __insn_dblalign(tmp0, tmp1, srci);
- /*
- * Final source bytes to write to trailing partial
- * word, if any.
- */
- final = __insn_dblalign(a, b, src1);
} else {
/* Aligned copy. */
- const word_t* __restrict src8 = (const word_t *)src1;
+ const op_t *__restrict src8 = (const op_t *)src1;
/* src8 and dst8 are both word-aligned. */
if (n >= CHIP_L2_LINE_SIZE()) {
/* Copy until 'dst' is cache-line-aligned. */
for (; (uintptr_t)dst8 & (CHIP_L2_LINE_SIZE() - 1);
- n -= sizeof(word_t))
+ n -= sizeof(op_t))
ST8(dst8++, LD8(src8++));
for (; n >= CHIP_L2_LINE_SIZE(); ) {
- __insn_wh64(dst8);
+ op_t tmp0, tmp1, tmp2, tmp3;
+ op_t tmp4, tmp5, tmp6, tmp7;
/*
* Prefetch and advance to next line
- * to prefetch, but don't go past the end
+ * to prefetch, but don't go past the
+ * end.
*/
__insn_prefetch(prefetch);
+
+ /* Make sure prefetch got scheduled
+ earlier. */
+ __asm__ ("" : : : "memory");
+
prefetch += CHIP_L2_LINE_SIZE();
- prefetch = (prefetch > src1_end) ? prefetch :
+ prefetch = (prefetch < src1_end) ? prefetch :
(const char *)src8;
/*
- * Copy an entire cache line. Manually
- * unrolled to avoid idiosyncracies of
- * compiler unrolling.
+ * Do all the loads before wh64. This
+ * is necessary if [src8, src8+7] and
+ * [dst8, dst8+7] share the same cache
+ * line and dst8 <= src8, as can be
+ * the case when called from memmove,
+ * or with code tested on x86 whose
+ * memcpy always works with forward
+ * copies.
*/
-#define COPY_WORD(offset) ({ ST8(dst8+offset, LD8(src8+offset)); n -= 8; })
- COPY_WORD(0);
- COPY_WORD(1);
- COPY_WORD(2);
- COPY_WORD(3);
- COPY_WORD(4);
- COPY_WORD(5);
- COPY_WORD(6);
- COPY_WORD(7);
-#if CHIP_L2_LINE_SIZE() == 128
- COPY_WORD(8);
- COPY_WORD(9);
- COPY_WORD(10);
- COPY_WORD(11);
- COPY_WORD(12);
- COPY_WORD(13);
- COPY_WORD(14);
- COPY_WORD(15);
-#elif CHIP_L2_LINE_SIZE() != 64
-# error Fix code that assumes particular L2 cache line sizes
-#endif
+ tmp0 = LD8(src8++);
+ tmp1 = LD8(src8++);
+ tmp2 = LD8(src8++);
+ tmp3 = LD8(src8++);
+ tmp4 = LD8(src8++);
+ tmp5 = LD8(src8++);
+ tmp6 = LD8(src8++);
+ tmp7 = LD8(src8++);
+
+ /* wh64 and wait for tmp7 load completion. */
+ __asm__ ("move %0, %0; wh64 %1\n"
+ : : "r"(tmp7), "r"(dst8));
+
+ ST8(dst8++, tmp0);
+ ST8(dst8++, tmp1);
+ ST8(dst8++, tmp2);
+ ST8(dst8++, tmp3);
+ ST8(dst8++, tmp4);
+ ST8(dst8++, tmp5);
+ ST8(dst8++, tmp6);
+ ST8(dst8++, tmp7);
- dst8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
- src8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
+ n -= CHIP_L2_LINE_SIZE();
}
+#if CHIP_L2_LINE_SIZE() != 64
+# error "Fix code that assumes particular L2 cache line size."
+#endif
}
- for (; n >= sizeof(word_t); n -= sizeof(word_t))
+ for (; n >= sizeof(op_t); n -= sizeof(op_t))
ST8(dst8++, LD8(src8++));
if (__builtin_expect(n == 0, 1))
@@ -188,6 +315,7 @@ int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
/* n != 0 if we get here. Write out any trailing bytes. */
dst1 = (char *)dst8;
+#ifndef __BIG_ENDIAN__
if (n & 4) {
ST4((uint32_t *)dst1, final);
dst1 += 4;
@@ -202,11 +330,30 @@ int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
}
if (n)
ST1((uint8_t *)dst1, final);
+#else
+ if (n & 4) {
+ ST4((uint32_t *)dst1, final >> 32);
+ dst1 += 4;
+ }
+ else
+ {
+ final >>= 32;
+ }
+ if (n & 2) {
+ ST2((uint16_t *)dst1, final >> 16);
+ dst1 += 2;
+ }
+ else
+ {
+ final >>= 16;
+ }
+ if (n & 1)
+ ST1((uint8_t *)dst1, final >> 8);
+#endif
return RETVAL;
}
-
#ifdef USERCOPY_FUNC
#undef ST1
#undef ST2
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
deleted file mode 100644
index b2fe15e0107..00000000000
--- a/arch/tile/lib/memcpy_tile64.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-#include <linux/string.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/uaccess.h>
-#include <asm/fixmap.h>
-#include <asm/kmap_types.h>
-#include <asm/tlbflush.h>
-#include <hv/hypervisor.h>
-#include <arch/chip.h>
-
-
-#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
-
-/* Defined in memcpy.S */
-extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
-extern unsigned long __copy_to_user_inatomic_asm(
- void __user *to, const void *from, unsigned long n);
-extern unsigned long __copy_from_user_inatomic_asm(
- void *to, const void __user *from, unsigned long n);
-extern unsigned long __copy_from_user_zeroing_asm(
- void *to, const void __user *from, unsigned long n);
-
-typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
-
-/* Size above which to consider TLB games for performance */
-#define LARGE_COPY_CUTOFF 2048
-
-/* Communicate to the simulator what we are trying to do. */
-#define sim_allow_multiple_caching(b) \
- __insn_mtspr(SPR_SIM_CONTROL, \
- SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
-
-/*
- * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
- *
- * We set up our own source and destination PTEs that we fully control.
- * This is the only way to guarantee that we don't race with another
- * thread that is modifying the PTE; we can't afford to try the
- * copy_{to,from}_user() technique of catching the interrupt, since
- * we must run with interrupts disabled to avoid the risk of some
- * other code seeing the incoherent data in our cache. (Recall that
- * our cache is indexed by PA, so even if the other code doesn't use
- * our kmap_atomic virtual addresses, they'll still hit in cache using
- * the normal VAs that aren't supposed to hit in cache.)
- */
-static void memcpy_multicache(void *dest, const void *source,
- pte_t dst_pte, pte_t src_pte, int len)
-{
- int idx;
- unsigned long flags, newsrc, newdst;
- pmd_t *pmdp;
- pte_t *ptep;
- int type0, type1;
- int cpu = get_cpu();
-
- /*
- * Disable interrupts so that we don't recurse into memcpy()
- * in an interrupt handler, nor accidentally reference
- * the PA of the source from an interrupt routine. Also
- * notify the simulator that we're playing games so we don't
- * generate spurious coherency warnings.
- */
- local_irq_save(flags);
- sim_allow_multiple_caching(1);
-
- /* Set up the new dest mapping */
- type0 = kmap_atomic_idx_push();
- idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
- newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
- pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
- ptep = pte_offset_kernel(pmdp, newdst);
- if (pte_val(*ptep) != pte_val(dst_pte)) {
- set_pte(ptep, dst_pte);
- local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
- }
-
- /* Set up the new source mapping */
- type1 = kmap_atomic_idx_push();
- idx += (type0 - type1);
- src_pte = hv_pte_set_nc(src_pte);
- src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */
- newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
- pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
- ptep = pte_offset_kernel(pmdp, newsrc);
- __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
- local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
-
- /* Actually move the data. */
- __memcpy_asm((void *)newdst, (const void *)newsrc, len);
-
- /*
- * Remap the source as locally-cached and not OLOC'ed so that
- * we can inval without also invaling the remote cpu's cache.
- * This also avoids known errata with inv'ing cacheable oloc data.
- */
- src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
- src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
- __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
- local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
-
- /*
- * Do the actual invalidation, covering the full L2 cache line
- * at the end since __memcpy_asm() is somewhat aggressive.
- */
- __inv_buffer((void *)newsrc, len);
-
- /*
- * We're done: notify the simulator that all is back to normal,
- * and re-enable interrupts and pre-emption.
- */
- kmap_atomic_idx_pop();
- kmap_atomic_idx_pop();
- sim_allow_multiple_caching(0);
- local_irq_restore(flags);
- put_cpu();
-}
-
-/*
- * Identify large copies from remotely-cached memory, and copy them
- * via memcpy_multicache() if they look good, otherwise fall back
- * to the particular kind of copying passed as the memcpy_t function.
- */
-static unsigned long fast_copy(void *dest, const void *source, int len,
- memcpy_t func)
-{
- /*
- * Check if it's big enough to bother with. We may end up doing a
- * small copy via TLB manipulation if we're near a page boundary,
- * but presumably we'll make it up when we hit the second page.
- */
- while (len >= LARGE_COPY_CUTOFF) {
- int copy_size, bytes_left_on_page;
- pte_t *src_ptep, *dst_ptep;
- pte_t src_pte, dst_pte;
- struct page *src_page, *dst_page;
-
- /* Is the source page oloc'ed to a remote cpu? */
-retry_source:
- src_ptep = virt_to_pte(current->mm, (unsigned long)source);
- if (src_ptep == NULL)
- break;
- src_pte = *src_ptep;
- if (!hv_pte_get_present(src_pte) ||
- !hv_pte_get_readable(src_pte) ||
- hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
- break;
- if (get_remote_cache_cpu(src_pte) == smp_processor_id())
- break;
- src_page = pfn_to_page(hv_pte_get_pfn(src_pte));
- get_page(src_page);
- if (pte_val(src_pte) != pte_val(*src_ptep)) {
- put_page(src_page);
- goto retry_source;
- }
- if (pte_huge(src_pte)) {
- /* Adjust the PTE to correspond to a small page */
- int pfn = hv_pte_get_pfn(src_pte);
- pfn += (((unsigned long)source & (HPAGE_SIZE-1))
- >> PAGE_SHIFT);
- src_pte = pfn_pte(pfn, src_pte);
- src_pte = pte_mksmall(src_pte);
- }
-
- /* Is the destination page writable? */
-retry_dest:
- dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
- if (dst_ptep == NULL) {
- put_page(src_page);
- break;
- }
- dst_pte = *dst_ptep;
- if (!hv_pte_get_present(dst_pte) ||
- !hv_pte_get_writable(dst_pte)) {
- put_page(src_page);
- break;
- }
- dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte));
- if (dst_page == src_page) {
- /*
- * Source and dest are on the same page; this
- * potentially exposes us to incoherence if any
- * part of src and dest overlap on a cache line.
- * Just give up rather than trying to be precise.
- */
- put_page(src_page);
- break;
- }
- get_page(dst_page);
- if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
- put_page(dst_page);
- goto retry_dest;
- }
- if (pte_huge(dst_pte)) {
- /* Adjust the PTE to correspond to a small page */
- int pfn = hv_pte_get_pfn(dst_pte);
- pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
- >> PAGE_SHIFT);
- dst_pte = pfn_pte(pfn, dst_pte);
- dst_pte = pte_mksmall(dst_pte);
- }
-
- /* All looks good: create a cachable PTE and copy from it */
- copy_size = len;
- bytes_left_on_page =
- PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
- if (copy_size > bytes_left_on_page)
- copy_size = bytes_left_on_page;
- bytes_left_on_page =
- PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
- if (copy_size > bytes_left_on_page)
- copy_size = bytes_left_on_page;
- memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
-
- /* Release the pages */
- put_page(dst_page);
- put_page(src_page);
-
- /* Continue on the next page */
- dest += copy_size;
- source += copy_size;
- len -= copy_size;
- }
-
- return func(dest, source, len);
-}
-
-void *memcpy(void *to, const void *from, __kernel_size_t n)
-{
- if (n < LARGE_COPY_CUTOFF)
- return (void *)__memcpy_asm(to, from, n);
- else
- return (void *)fast_copy(to, from, n, __memcpy_asm);
-}
-
-unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
- unsigned long n)
-{
- if (n < LARGE_COPY_CUTOFF)
- return __copy_to_user_inatomic_asm(to, from, n);
- else
- return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
-}
-
-unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
- unsigned long n)
-{
- if (n < LARGE_COPY_CUTOFF)
- return __copy_from_user_inatomic_asm(to, from, n);
- else
- return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
-}
-
-unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
- unsigned long n)
-{
- if (n < LARGE_COPY_CUTOFF)
- return __copy_from_user_zeroing_asm(to, from, n);
- else
- return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
-}
-
-#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
index 4763b3aff1c..88c7016492c 100644
--- a/arch/tile/lib/memcpy_user_64.c
+++ b/arch/tile/lib/memcpy_user_64.c
@@ -14,7 +14,13 @@
* Do memcpy(), but trap and return "n" when a load or store faults.
*
* Note: this idiom only works when memcpy() compiles to a leaf function.
- * If "sp" is updated during memcpy, the "jrp lr" will be incorrect.
+ * Here leaf function not only means it does not have calls, but also
+ * requires no stack operations (sp, stack frame pointer) and no
+ * use of callee-saved registers, else "jrp lr" will be incorrect since
+ * unwinding stack frame is bypassed. Since memcpy() is not complex so
+ * these conditions are satisfied here, but we need to be careful when
+ * modifying this file. This is not a clean solution but is the best
+ * one so far.
*
* Also note that we are capturing "n" from the containing scope here.
*/
@@ -25,6 +31,7 @@
".pushsection .coldtext.memcpy,\"ax\";" \
"2: { move r0, %2; jrp lr };" \
".section __ex_table,\"a\";" \
+ ".align 8;" \
".quad 1b, 2b;" \
".popsection" \
: "=m" (*(p)) : "r" (v), "r" (n)); \
@@ -37,6 +44,7 @@
".pushsection .coldtext.memcpy,\"ax\";" \
"2: { move r0, %2; jrp lr };" \
".section __ex_table,\"a\";" \
+ ".align 8;" \
".quad 1b, 2b;" \
".popsection" \
: "=r" (__v) : "m" (*(p)), "r" (n)); \
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c
index 57dbb3a5bff..2042bfe6595 100644
--- a/arch/tile/lib/memset_32.c
+++ b/arch/tile/lib/memset_32.c
@@ -12,13 +12,10 @@
* more details.
*/
-#include <arch/chip.h>
-
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
-
-#undef memset
+#include <arch/chip.h>
void *memset(void *s, int c, size_t n)
{
@@ -26,11 +23,7 @@ void *memset(void *s, int c, size_t n)
int n32;
uint32_t v16, v32;
uint8_t *out8 = s;
-#if !CHIP_HAS_WH64()
- int ahead32;
-#else
int to_align32;
-#endif
/* Experimentation shows that a trivial tight loop is a win up until
* around a size of 20, where writing a word at a time starts to win.
@@ -61,21 +54,6 @@ void *memset(void *s, int c, size_t n)
return s;
}
-#if !CHIP_HAS_WH64()
- /* Use a spare issue slot to start prefetching the first cache
- * line early. This instruction is free as the store can be buried
- * in otherwise idle issue slots doing ALU ops.
- */
- __insn_prefetch(out8);
-
- /* We prefetch the end so that a short memset that spans two cache
- * lines gets some prefetching benefit. Again we believe this is free
- * to issue.
- */
- __insn_prefetch(&out8[n - 1]);
-#endif /* !CHIP_HAS_WH64() */
-
-
/* Align 'out8'. We know n >= 3 so this won't write past the end. */
while (((uintptr_t) out8 & 3) != 0) {
*out8++ = c;
@@ -96,90 +74,6 @@ void *memset(void *s, int c, size_t n)
/* This must be at least 8 or the following loop doesn't work. */
#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4)
-#if !CHIP_HAS_WH64()
-
- ahead32 = CACHE_LINE_SIZE_IN_WORDS;
-
- /* We already prefetched the first and last cache lines, so
- * we only need to do more prefetching if we are storing
- * to more than two cache lines.
- */
- if (n32 > CACHE_LINE_SIZE_IN_WORDS * 2) {
- int i;
-
- /* Prefetch the next several cache lines.
- * This is the setup code for the software-pipelined
- * loop below.
- */
-#define MAX_PREFETCH 5
- ahead32 = n32 & -CACHE_LINE_SIZE_IN_WORDS;
- if (ahead32 > MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS)
- ahead32 = MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS;
-
- for (i = CACHE_LINE_SIZE_IN_WORDS;
- i < ahead32; i += CACHE_LINE_SIZE_IN_WORDS)
- __insn_prefetch(&out32[i]);
- }
-
- if (n32 > ahead32) {
- while (1) {
- int j;
-
- /* Prefetch by reading one word several cache lines
- * ahead. Since loads are non-blocking this will
- * cause the full cache line to be read while we are
- * finishing earlier cache lines. Using a store
- * here causes microarchitectural performance
- * problems where a victimizing store miss goes to
- * the head of the retry FIFO and locks the pipe for
- * a few cycles. So a few subsequent stores in this
- * loop go into the retry FIFO, and then later
- * stores see other stores to the same cache line
- * are already in the retry FIFO and themselves go
- * into the retry FIFO, filling it up and grinding
- * to a halt waiting for the original miss to be
- * satisfied.
- */
- __insn_prefetch(&out32[ahead32]);
-
-#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0
-#error "Unhandled CACHE_LINE_SIZE_IN_WORDS"
-#endif
-
- n32 -= CACHE_LINE_SIZE_IN_WORDS;
-
- /* Save icache space by only partially unrolling
- * this loop.
- */
- for (j = CACHE_LINE_SIZE_IN_WORDS / 4; j > 0; j--) {
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- }
-
- /* To save compiled code size, reuse this loop even
- * when we run out of prefetching to do by dropping
- * ahead32 down.
- */
- if (n32 <= ahead32) {
- /* Not even a full cache line left,
- * so stop now.
- */
- if (n32 < CACHE_LINE_SIZE_IN_WORDS)
- break;
-
- /* Choose a small enough value that we don't
- * prefetch past the end. There's no sense
- * in touching cache lines we don't have to.
- */
- ahead32 = CACHE_LINE_SIZE_IN_WORDS - 1;
- }
- }
- }
-
-#else /* CHIP_HAS_WH64() */
-
/* Determine how many words we need to emit before the 'out32'
* pointer becomes aligned modulo the cache line size.
*/
@@ -236,8 +130,6 @@ void *memset(void *s, int c, size_t n)
n32 &= CACHE_LINE_SIZE_IN_WORDS - 1;
}
-#endif /* CHIP_HAS_WH64() */
-
/* Now handle any leftover values. */
if (n32 != 0) {
do {
diff --git a/arch/tile/lib/memset_64.c b/arch/tile/lib/memset_64.c
index 3873085711d..03ef69cd73d 100644
--- a/arch/tile/lib/memset_64.c
+++ b/arch/tile/lib/memset_64.c
@@ -12,13 +12,11 @@
* more details.
*/
-#include <arch/chip.h>
-
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
-
-#undef memset
+#include <arch/chip.h>
+#include "string-endian.h"
void *memset(void *s, int c, size_t n)
{
@@ -70,8 +68,7 @@ void *memset(void *s, int c, size_t n)
n64 = n >> 3;
/* Tile input byte out to 64 bits. */
- /* KLUDGE */
- v64 = 0x0101010101010101ULL * (uint8_t)c;
+ v64 = copy_byte(c);
/* This must be at least 8 or the following loop doesn't work. */
#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index b16ac49a968..b34f79aada4 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -101,7 +101,7 @@ EXPORT_SYMBOL(arch_spin_unlock_wait);
* preserve the semantic that the same read lock can be acquired in an
* interrupt context.
*/
-inline int arch_read_trylock(arch_rwlock_t *rwlock)
+int arch_read_trylock(arch_rwlock_t *rwlock)
{
u32 val;
__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h
index c1010980913..6ac37509fac 100644
--- a/arch/tile/lib/spinlock_common.h
+++ b/arch/tile/lib/spinlock_common.h
@@ -60,5 +60,5 @@ static void delay_backoff(int iterations)
loops += __insn_crc32_32(stack_pointer, get_cycles_low()) &
(loops - 1);
- relax(1 << exponent);
+ relax(loops);
}
diff --git a/arch/tile/lib/strchr_32.c b/arch/tile/lib/strchr_32.c
index c94e6f7ae7b..841fe696301 100644
--- a/arch/tile/lib/strchr_32.c
+++ b/arch/tile/lib/strchr_32.c
@@ -16,8 +16,6 @@
#include <linux/string.h>
#include <linux/module.h>
-#undef strchr
-
char *strchr(const char *s, int c)
{
int z, g;
diff --git a/arch/tile/lib/strchr_64.c b/arch/tile/lib/strchr_64.c
index 617a9273aaa..fe6e31c06f8 100644
--- a/arch/tile/lib/strchr_64.c
+++ b/arch/tile/lib/strchr_64.c
@@ -15,8 +15,7 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
-
-#undef strchr
+#include "string-endian.h"
char *strchr(const char *s, int c)
{
@@ -27,19 +26,15 @@ char *strchr(const char *s, int c)
const uint64_t *p = (const uint64_t *)(s_int & -8);
/* Create eight copies of the byte for which we are looking. */
- const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
+ const uint64_t goal = copy_byte(c);
/* Read the first aligned word, but force bytes before the string to
* match neither zero nor goal (we make sure the high bit of each
* byte is 1, and the low 7 bits are all the opposite of the goal
* byte).
- *
- * Note that this shift count expression works because we know shift
- * counts are taken mod 64.
*/
- const uint64_t before_mask = (1ULL << (s_int << 3)) - 1;
- uint64_t v = (*p | before_mask) ^
- (goal & __insn_v1shrsi(before_mask, 1));
+ const uint64_t before_mask = MASK(s_int);
+ uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui(before_mask, 1));
uint64_t zero_matches, goal_matches;
while (1) {
@@ -55,8 +50,8 @@ char *strchr(const char *s, int c)
v = *++p;
}
- z = __insn_ctz(zero_matches);
- g = __insn_ctz(goal_matches);
+ z = CFZ(zero_matches);
+ g = CFZ(goal_matches);
/* If we found c before '\0' we got a match. Note that if c == '\0'
* then g == z, and we correctly return the address of the '\0'
diff --git a/arch/tile/lib/string-endian.h b/arch/tile/lib/string-endian.h
new file mode 100644
index 00000000000..2e49cbfe937
--- /dev/null
+++ b/arch/tile/lib/string-endian.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * Provide a mask based on the pointer alignment that
+ * sets up non-zero bytes before the beginning of the string.
+ * The MASK expression works because shift counts are taken mod 64.
+ * Also, specify how to count "first" and "last" bits
+ * when the bits have been read as a word.
+ */
+
+#include <asm/byteorder.h>
+
+#ifdef __LITTLE_ENDIAN
+#define MASK(x) (__insn_shl(1ULL, (x << 3)) - 1)
+#define NULMASK(x) ((2ULL << x) - 1)
+#define CFZ(x) __insn_ctz(x)
+#define REVCZ(x) __insn_clz(x)
+#else
+#define MASK(x) (__insn_shl(-2LL, ((-x << 3) - 1)))
+#define NULMASK(x) (-2LL << (63 - x))
+#define CFZ(x) __insn_clz(x)
+#define REVCZ(x) __insn_ctz(x)
+#endif
+
+/*
+ * Create eight copies of the byte in a uint64_t. Byte Shuffle uses
+ * the bytes of srcB as the index into the dest vector to select a
+ * byte. With all indices of zero, the first byte is copied into all
+ * the other bytes.
+ */
+static inline uint64_t copy_byte(uint8_t byte)
+{
+ return __insn_shufflebytes(byte, 0, 0);
+}
diff --git a/arch/tile/lib/strlen_32.c b/arch/tile/lib/strlen_32.c
index 4974292a553..f26f88e11e4 100644
--- a/arch/tile/lib/strlen_32.c
+++ b/arch/tile/lib/strlen_32.c
@@ -16,8 +16,6 @@
#include <linux/string.h>
#include <linux/module.h>
-#undef strlen
-
size_t strlen(const char *s)
{
/* Get an aligned pointer. */
diff --git a/arch/tile/lib/strlen_64.c b/arch/tile/lib/strlen_64.c
index 1c92d46202a..9583fc3361f 100644
--- a/arch/tile/lib/strlen_64.c
+++ b/arch/tile/lib/strlen_64.c
@@ -15,8 +15,7 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
-
-#undef strlen
+#include "string-endian.h"
size_t strlen(const char *s)
{
@@ -24,15 +23,13 @@ size_t strlen(const char *s)
const uintptr_t s_int = (uintptr_t) s;
const uint64_t *p = (const uint64_t *)(s_int & -8);
- /* Read the first word, but force bytes before the string to be nonzero.
- * This expression works because we know shift counts are taken mod 64.
- */
- uint64_t v = *p | ((1ULL << (s_int << 3)) - 1);
+ /* Read and MASK the first word. */
+ uint64_t v = *p | MASK(s_int);
uint64_t bits;
while ((bits = __insn_v1cmpeqi(v, 0)) == 0)
v = *++p;
- return ((const char *)p) + (__insn_ctz(bits) >> 3) - s;
+ return ((const char *)p) + (CFZ(bits) >> 3) - s;
}
EXPORT_SYMBOL(strlen);
diff --git a/arch/tile/lib/strnlen_32.c b/arch/tile/lib/strnlen_32.c
new file mode 100644
index 00000000000..1434141d9e0
--- /dev/null
+++ b/arch/tile/lib/strnlen_32.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+size_t strnlen(const char *s, size_t count)
+{
+ /* Get an aligned pointer. */
+ const uintptr_t s_int = (uintptr_t) s;
+ const uint32_t *p = (const uint32_t *)(s_int & -4);
+ size_t bytes_read = sizeof(*p) - (s_int & (sizeof(*p) - 1));
+ size_t len;
+ uint32_t v, bits;
+
+ /* Avoid page fault risk by not reading any bytes when count is 0. */
+ if (count == 0)
+ return 0;
+
+ /* Read first word, but force bytes before the string to be nonzero. */
+ v = *p | ((1 << ((s_int << 3) & 31)) - 1);
+
+ while ((bits = __insn_seqb(v, 0)) == 0) {
+ if (bytes_read >= count) {
+ /* Read COUNT bytes and didn't find the terminator. */
+ return count;
+ }
+ v = *++p;
+ bytes_read += sizeof(v);
+ }
+
+ len = ((const char *) p) + (__insn_ctz(bits) >> 3) - s;
+ return (len < count ? len : count);
+}
+EXPORT_SYMBOL(strnlen);
diff --git a/arch/tile/lib/strnlen_64.c b/arch/tile/lib/strnlen_64.c
new file mode 100644
index 00000000000..2e8de6a5136
--- /dev/null
+++ b/arch/tile/lib/strnlen_64.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include "string-endian.h"
+
+size_t strnlen(const char *s, size_t count)
+{
+ /* Get an aligned pointer. */
+ const uintptr_t s_int = (uintptr_t) s;
+ const uint64_t *p = (const uint64_t *)(s_int & -8);
+ size_t bytes_read = sizeof(*p) - (s_int & (sizeof(*p) - 1));
+ size_t len;
+ uint64_t v, bits;
+
+ /* Avoid page fault risk by not reading any bytes when count is 0. */
+ if (count == 0)
+ return 0;
+
+ /* Read and MASK the first word. */
+ v = *p | MASK(s_int);
+
+ while ((bits = __insn_v1cmpeqi(v, 0)) == 0) {
+ if (bytes_read >= count) {
+ /* Read COUNT bytes and didn't find the terminator. */
+ return count;
+ }
+ v = *++p;
+ bytes_read += sizeof(v);
+ }
+
+ len = ((const char *) p) + (CFZ(bits) >> 3) - s;
+ return (len < count ? len : count);
+}
+EXPORT_SYMBOL(strnlen);
diff --git a/arch/tile/lib/uaccess.c b/arch/tile/lib/uaccess.c
index f8d398c9ee7..030abe3ee4f 100644
--- a/arch/tile/lib/uaccess.c
+++ b/arch/tile/lib/uaccess.c
@@ -22,11 +22,3 @@ int __range_ok(unsigned long addr, unsigned long size)
is_arch_mappable_range(addr, size));
}
EXPORT_SYMBOL(__range_ok);
-
-#ifdef CONFIG_DEBUG_COPY_FROM_USER
-void copy_from_user_overflow(void)
-{
- WARN(1, "Buffer overflow detected!\n");
-}
-EXPORT_SYMBOL(copy_from_user_overflow);
-#endif
diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S
index 979f76d8374..1bc16222463 100644
--- a/arch/tile/lib/usercopy_32.S
+++ b/arch/tile/lib/usercopy_32.S
@@ -19,82 +19,6 @@
/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
- .pushsection .fixup,"ax"
-
-get_user_fault:
- { move r0, zero; move r1, zero }
- { movei r2, -EFAULT; jrp lr }
- ENDPROC(get_user_fault)
-
-put_user_fault:
- { movei r0, -EFAULT; jrp lr }
- ENDPROC(put_user_fault)
-
- .popsection
-
-/*
- * __get_user_N functions take a pointer in r0, and return 0 in r2
- * on success, with the value in r0; or else -EFAULT in r2.
- */
-#define __get_user_N(bytes, LOAD) \
- STD_ENTRY(__get_user_##bytes); \
-1: { LOAD r0, r0; move r1, zero; move r2, zero }; \
- jrp lr; \
- STD_ENDPROC(__get_user_##bytes); \
- .pushsection __ex_table,"a"; \
- .word 1b, get_user_fault; \
- .popsection
-
-__get_user_N(1, lb_u)
-__get_user_N(2, lh_u)
-__get_user_N(4, lw)
-
-/*
- * __get_user_8 takes a pointer in r0, and returns 0 in r2
- * on success, with the value in r0/r1; or else -EFAULT in r2.
- */
- STD_ENTRY(__get_user_8);
-1: { lw r0, r0; addi r1, r0, 4 };
-2: { lw r1, r1; move r2, zero };
- jrp lr;
- STD_ENDPROC(__get_user_8);
- .pushsection __ex_table,"a";
- .word 1b, get_user_fault;
- .word 2b, get_user_fault;
- .popsection
-
-/*
- * __put_user_N functions take a value in r0 and a pointer in r1,
- * and return 0 in r0 on success or -EFAULT on failure.
- */
-#define __put_user_N(bytes, STORE) \
- STD_ENTRY(__put_user_##bytes); \
-1: { STORE r1, r0; move r0, zero }; \
- jrp lr; \
- STD_ENDPROC(__put_user_##bytes); \
- .pushsection __ex_table,"a"; \
- .word 1b, put_user_fault; \
- .popsection
-
-__put_user_N(1, sb)
-__put_user_N(2, sh)
-__put_user_N(4, sw)
-
-/*
- * __put_user_8 takes a value in r0/r1 and a pointer in r2,
- * and returns 0 in r0 on success or -EFAULT on failure.
- */
-STD_ENTRY(__put_user_8)
-1: { sw r2, r0; addi r2, r2, 4 }
-2: { sw r2, r1; move r0, zero }
- jrp lr
- STD_ENDPROC(__put_user_8)
- .pushsection __ex_table,"a"
- .word 1b, put_user_fault
- .word 2b, put_user_fault
- .popsection
-
-
/*
* strnlen_user_asm takes the pointer in r0, and the length bound in r1.
* It returns the length, including the terminating NUL, or zero on exception.
@@ -112,6 +36,7 @@ strnlen_user_fault:
{ move r0, zero; jrp lr }
ENDPROC(strnlen_user_fault)
.section __ex_table,"a"
+ .align 4
.word 1b, strnlen_user_fault
.popsection
@@ -123,18 +48,20 @@ strnlen_user_fault:
*/
STD_ENTRY(strncpy_from_user_asm)
{ bz r2, 2f; move r3, r0 }
-1: { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
+1: { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
{ sb r0, r4; addi r0, r0, 1 }
- bz r2, 2f
- bnzt r4, 1b
- addi r0, r0, -1 /* don't count the trailing NUL */
-2: { sub r0, r0, r3; jrp lr }
+ bz r4, 2f
+ bnzt r2, 1b
+ { sub r0, r0, r3; jrp lr }
+2: addi r0, r0, -1 /* don't count the trailing NUL */
+ { sub r0, r0, r3; jrp lr }
STD_ENDPROC(strncpy_from_user_asm)
.pushsection .fixup,"ax"
strncpy_from_user_fault:
{ movei r0, -EFAULT; jrp lr }
ENDPROC(strncpy_from_user_fault)
.section __ex_table,"a"
+ .align 4
.word 1b, strncpy_from_user_fault
.popsection
@@ -153,6 +80,7 @@ STD_ENTRY(clear_user_asm)
bnzt r1, 1b
2: { move r0, r1; jrp lr }
.pushsection __ex_table,"a"
+ .align 4
.word 1b, 2b
.popsection
@@ -162,6 +90,7 @@ STD_ENTRY(clear_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(clear_user_asm)
.pushsection __ex_table,"a"
+ .align 4
.word 1b, 2b
.popsection
@@ -181,25 +110,7 @@ STD_ENTRY(flush_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(flush_user_asm)
.pushsection __ex_table,"a"
- .word 1b, 2b
- .popsection
-
-/*
- * inv_user_asm takes the user target address in r0 and the
- * number of bytes to invalidate in r1.
- * It returns the number of not inv'able bytes (hopefully zero) in r0.
- */
-STD_ENTRY(inv_user_asm)
- bz r1, 2f
- { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
- { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
- { and r0, r0, r2; and r1, r1, r2 }
- { sub r1, r1, r0 }
-1: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() }
- { addi r0, r0, CHIP_INV_STRIDE(); bnzt r1, 1b }
-2: { move r0, r1; jrp lr }
- STD_ENDPROC(inv_user_asm)
- .pushsection __ex_table,"a"
+ .align 4
.word 1b, 2b
.popsection
@@ -219,5 +130,6 @@ STD_ENTRY(finv_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(finv_user_asm)
.pushsection __ex_table,"a"
+ .align 4
.word 1b, 2b
.popsection
diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S
index 2ff44f87b78..b3b31a3306f 100644
--- a/arch/tile/lib/usercopy_64.S
+++ b/arch/tile/lib/usercopy_64.S
@@ -19,55 +19,6 @@
/* Access user memory, but use MMU to avoid propagating kernel exceptions. */
- .pushsection .fixup,"ax"
-
-get_user_fault:
- { movei r1, -EFAULT; move r0, zero }
- jrp lr
- ENDPROC(get_user_fault)
-
-put_user_fault:
- { movei r0, -EFAULT; jrp lr }
- ENDPROC(put_user_fault)
-
- .popsection
-
-/*
- * __get_user_N functions take a pointer in r0, and return 0 in r1
- * on success, with the value in r0; or else -EFAULT in r1.
- */
-#define __get_user_N(bytes, LOAD) \
- STD_ENTRY(__get_user_##bytes); \
-1: { LOAD r0, r0; move r1, zero }; \
- jrp lr; \
- STD_ENDPROC(__get_user_##bytes); \
- .pushsection __ex_table,"a"; \
- .quad 1b, get_user_fault; \
- .popsection
-
-__get_user_N(1, ld1u)
-__get_user_N(2, ld2u)
-__get_user_N(4, ld4u)
-__get_user_N(8, ld)
-
-/*
- * __put_user_N functions take a value in r0 and a pointer in r1,
- * and return 0 in r0 on success or -EFAULT on failure.
- */
-#define __put_user_N(bytes, STORE) \
- STD_ENTRY(__put_user_##bytes); \
-1: { STORE r1, r0; move r0, zero }; \
- jrp lr; \
- STD_ENDPROC(__put_user_##bytes); \
- .pushsection __ex_table,"a"; \
- .quad 1b, put_user_fault; \
- .popsection
-
-__put_user_N(1, st1)
-__put_user_N(2, st2)
-__put_user_N(4, st4)
-__put_user_N(8, st)
-
/*
* strnlen_user_asm takes the pointer in r0, and the length bound in r1.
* It returns the length, including the terminating NUL, or zero on exception.
@@ -85,6 +36,7 @@ strnlen_user_fault:
{ move r0, zero; jrp lr }
ENDPROC(strnlen_user_fault)
.section __ex_table,"a"
+ .align 8
.quad 1b, strnlen_user_fault
.popsection
@@ -96,18 +48,20 @@ strnlen_user_fault:
*/
STD_ENTRY(strncpy_from_user_asm)
{ beqz r2, 2f; move r3, r0 }
-1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
+1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
{ st1 r0, r4; addi r0, r0, 1 }
- beqz r2, 2f
- bnezt r4, 1b
- addi r0, r0, -1 /* don't count the trailing NUL */
-2: { sub r0, r0, r3; jrp lr }
+ beqz r4, 2f
+ bnezt r2, 1b
+ { sub r0, r0, r3; jrp lr }
+2: addi r0, r0, -1 /* don't count the trailing NUL */
+ { sub r0, r0, r3; jrp lr }
STD_ENDPROC(strncpy_from_user_asm)
.pushsection .fixup,"ax"
strncpy_from_user_fault:
{ movei r0, -EFAULT; jrp lr }
ENDPROC(strncpy_from_user_fault)
.section __ex_table,"a"
+ .align 8
.quad 1b, strncpy_from_user_fault
.popsection
@@ -126,6 +80,7 @@ STD_ENTRY(clear_user_asm)
bnezt r1, 1b
2: { move r0, r1; jrp lr }
.pushsection __ex_table,"a"
+ .align 8
.quad 1b, 2b
.popsection
@@ -135,6 +90,7 @@ STD_ENTRY(clear_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(clear_user_asm)
.pushsection __ex_table,"a"
+ .align 8
.quad 1b, 2b
.popsection
@@ -154,25 +110,7 @@ STD_ENTRY(flush_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(flush_user_asm)
.pushsection __ex_table,"a"
- .quad 1b, 2b
- .popsection
-
-/*
- * inv_user_asm takes the user target address in r0 and the
- * number of bytes to invalidate in r1.
- * It returns the number of not inv'able bytes (hopefully zero) in r0.
- */
-STD_ENTRY(inv_user_asm)
- beqz r1, 2f
- { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
- { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
- { and r0, r0, r2; and r1, r1, r2 }
- { sub r1, r1, r0 }
-1: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() }
- { addi r0, r0, CHIP_INV_STRIDE(); bnezt r1, 1b }
-2: { move r0, r1; jrp lr }
- STD_ENDPROC(inv_user_asm)
- .pushsection __ex_table,"a"
+ .align 8
.quad 1b, 2b
.popsection
@@ -192,5 +130,6 @@ STD_ENTRY(finv_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(finv_user_asm)
.pushsection __ex_table,"a"
+ .align 8
.quad 1b, 2b
.popsection
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
index 55e58e93bfc..23f044e8a7a 100644
--- a/arch/tile/mm/elf.c
+++ b/arch/tile/mm/elf.c
@@ -21,6 +21,8 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
+#include <asm/vdso.h>
+#include <arch/sim.h>
/* Notify a running simulator, if any, that an exec just occurred. */
static void sim_notify_exec(const char *binary_name)
@@ -35,28 +37,57 @@ static void sim_notify_exec(const char *binary_name)
} while (c);
}
-static int notify_exec(void)
+static int notify_exec(struct mm_struct *mm)
{
- int retval = 0; /* failure */
- struct vm_area_struct *vma = current->mm->mmap;
- while (vma) {
- if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
+ char *buf, *path;
+ struct vm_area_struct *vma;
+
+ if (!sim_is_simulator())
+ return 1;
+
+ if (mm->exe_file == NULL)
+ return 0;
+
+ for (vma = current->mm->mmap; ; vma = vma->vm_next) {
+ if (vma == NULL)
+ return 0;
+ if (vma->vm_file == mm->exe_file)
break;
- vma = vma->vm_next;
}
- if (vma) {
- char *buf = (char *) __get_free_page(GFP_KERNEL);
- if (buf) {
- char *path = d_path(&vma->vm_file->f_path,
- buf, PAGE_SIZE);
- if (!IS_ERR(path)) {
- sim_notify_exec(path);
- retval = 1;
- }
- free_page((unsigned long)buf);
+
+ buf = (char *) __get_free_page(GFP_KERNEL);
+ if (buf == NULL)
+ return 0;
+
+ path = d_path(&mm->exe_file->f_path, buf, PAGE_SIZE);
+ if (IS_ERR(path)) {
+ free_page((unsigned long)buf);
+ return 0;
+ }
+
+ /*
+ * Notify simulator of an ET_DYN object so we know the load address.
+ * The somewhat cryptic overuse of SIM_CONTROL_DLOPEN allows us
+ * to be backward-compatible with older simulator releases.
+ */
+ if (vma->vm_start == (ELF_ET_DYN_BASE & PAGE_MASK)) {
+ char buf[64];
+ int i;
+
+ snprintf(buf, sizeof(buf), "0x%lx:@", vma->vm_start);
+ for (i = 0; ; ++i) {
+ char c = buf[i];
+ __insn_mtspr(SPR_SIM_CONTROL,
+ (SIM_CONTROL_DLOPEN
+ | (c << _SIM_CONTROL_OPERATOR_BITS)));
+ if (c == '\0')
+ break;
}
}
- return retval;
+
+ sim_notify_exec(path);
+ free_page((unsigned long)buf);
+ return 1;
}
/* Notify a running simulator, if any, that we loaded an interpreter. */
@@ -72,63 +103,23 @@ static void sim_notify_interp(unsigned long load_addr)
}
-/* Kernel address of page used to map read-only kernel data into userspace. */
-static void *vdso_page;
-
-/* One-entry array used for install_special_mapping. */
-static struct page *vdso_pages[1];
-
-static int __init vdso_setup(void)
-{
- vdso_page = (void *)get_zeroed_page(GFP_ATOMIC);
- memcpy(vdso_page, __rt_sigreturn, __rt_sigreturn_end - __rt_sigreturn);
- vdso_pages[0] = virt_to_page(vdso_page);
- return 0;
-}
-device_initcall(vdso_setup);
-
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
- if (vma->vm_private_data == vdso_pages)
- return "[vdso]";
-#ifndef __tilegx__
- if (vma->vm_start == MEM_USER_INTRPT)
- return "[intrpt]";
-#endif
- return NULL;
-}
-
int arch_setup_additional_pages(struct linux_binprm *bprm,
int executable_stack)
{
struct mm_struct *mm = current->mm;
- unsigned long vdso_base;
int retval = 0;
+ down_write(&mm->mmap_sem);
+
/*
* Notify the simulator that an exec just occurred.
* If we can't find the filename of the mapping, just use
* whatever was passed as the linux_binprm filename.
*/
- if (!notify_exec())
+ if (!notify_exec(mm))
sim_notify_exec(bprm->filename);
- down_write(&mm->mmap_sem);
-
- /*
- * MAYWRITE to allow gdb to COW and set breakpoints
- *
- * Make sure the vDSO gets into every core dump. Dumping its
- * contents makes post-mortem fully interpretable later
- * without matching up the same kernel and hardware config to
- * see what PC values meant.
- */
- vdso_base = VDSO_BASE;
- retval = install_special_mapping(mm, vdso_base, PAGE_SIZE,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
- VM_ALWAYSDUMP,
- vdso_pages);
+ retval = setup_vdso_pages();
#ifndef __tilegx__
/*
@@ -140,7 +131,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
if (!retval) {
unsigned long addr = MEM_USER_INTRPT;
addr = mmap_region(NULL, addr, INTRPT_SIZE,
- MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0);
if (addr > (unsigned long) -PAGE_SIZE)
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index c1eaaa1fcc2..6c0571216a9 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -34,8 +34,8 @@
#include <linux/hugetlb.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
+#include <linux/kdebug.h>
-#include <asm/system.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/traps.h>
@@ -71,9 +71,10 @@ static noinline void force_sig_info_fault(const char *type, int si_signo,
* Synthesize the fault a PL0 process would get by doing a word-load of
* an unaligned address or a high kernel address.
*/
-SYSCALL_DEFINE2(cmpxchg_badaddr, unsigned long, address,
- struct pt_regs *, regs)
+SYSCALL_DEFINE1(cmpxchg_badaddr, unsigned long, address)
{
+ struct pt_regs *regs = current_pt_regs();
+
if (address >= PAGE_OFFSET)
force_sig_info_fault("atomic segfault", SIGSEGV, SEGV_MAPERR,
address, INT_DTLB_MISS, current, regs);
@@ -122,16 +123,15 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
pmd_k = pmd_offset(pud_k, address);
if (!pmd_present(*pmd_k))
return NULL;
- if (!pmd_present(*pmd)) {
+ if (!pmd_present(*pmd))
set_pmd(pmd, *pmd_k);
- arch_flush_lazy_mmu_mode();
- } else
+ else
BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k));
return pmd_k;
}
/*
- * Handle a fault on the vmalloc or module mapping area
+ * Handle a fault on the vmalloc area.
*/
static inline int vmalloc_fault(pgd_t *pgd, unsigned long address)
{
@@ -149,8 +149,6 @@ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address)
pmd_k = vmalloc_sync_one(pgd, address);
if (!pmd_k)
return -1;
- if (pmd_huge(*pmd_k))
- return 0; /* support TILE huge_vmap() API */
pte_k = pte_offset_kernel(pmd_k, address);
if (!pte_present(*pte_k))
return -1;
@@ -188,7 +186,7 @@ static pgd_t *get_current_pgd(void)
HV_Context ctx = hv_inquire_context();
unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT;
struct page *pgd_page = pfn_to_page(pgd_pfn);
- BUG_ON(PageHighMem(pgd_page)); /* oops, HIGHPTE? */
+ BUG_ON(PageHighMem(pgd_page));
return (pgd_t *) __va(ctx.page_table);
}
@@ -204,9 +202,14 @@ static pgd_t *get_current_pgd(void)
* interrupt or a critical region, and must do as little as possible.
* Similarly, we can't use atomic ops here, since we may be handling a
* fault caused by an atomic op access.
+ *
+ * If we find a migrating PTE while we're in an NMI context, and we're
+ * at a PC that has a registered exception handler, we don't wait,
+ * since this thread may (e.g.) have been interrupted while migrating
+ * its own stack, which would then cause us to self-deadlock.
*/
static int handle_migrating_pte(pgd_t *pgd, int fault_num,
- unsigned long address,
+ unsigned long address, unsigned long pc,
int is_kernel_mode, int write)
{
pud_t *pud;
@@ -228,6 +231,8 @@ static int handle_migrating_pte(pgd_t *pgd, int fault_num,
pte_offset_kernel(pmd, address);
pteval = *pte;
if (pte_migrating(pteval)) {
+ if (in_nmi() && search_exception_tables(pc))
+ return 0;
wait_for_migration(pte);
return 1;
}
@@ -267,12 +272,15 @@ static int handle_page_fault(struct pt_regs *regs,
int si_code;
int is_kernel_mode;
pgd_t *pgd;
+ unsigned int flags;
/* on TILE, protection faults are always writes */
if (!is_page_fault)
write = 1;
- is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);
+ flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+ is_kernel_mode = !user_mode(regs);
tsk = validate_current();
@@ -301,7 +309,7 @@ static int handle_page_fault(struct pt_regs *regs,
* rather than trying to patch up the existing PTE.
*/
pgd = get_current_pgd();
- if (handle_migrating_pte(pgd, fault_num, address,
+ if (handle_migrating_pte(pgd, fault_num, address, regs->pc,
is_kernel_mode, write))
return 1;
@@ -336,9 +344,12 @@ static int handle_page_fault(struct pt_regs *regs,
/*
* If we're trying to touch user-space addresses, we must
* be either at PL0, or else with interrupts enabled in the
- * kernel, so either way we can re-enable interrupts here.
+ * kernel, so either way we can re-enable interrupts here
+ * unless we are doing atomic access to user space with
+ * interrupts disabled.
*/
- local_irq_enable();
+ if (!(regs->flags & PT_FLAGS_DISABLE_IRQ))
+ local_irq_enable();
mm = tsk->mm;
@@ -351,6 +362,9 @@ static int handle_page_fault(struct pt_regs *regs,
goto bad_area_nosemaphore;
}
+ if (!is_kernel_mode)
+ flags |= FAULT_FLAG_USER;
+
/*
* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
@@ -373,6 +387,8 @@ static int handle_page_fault(struct pt_regs *regs,
vma = NULL; /* happy compiler */
goto bad_area_nosemaphore;
}
+
+retry:
down_read(&mm->mmap_sem);
}
@@ -409,18 +425,22 @@ good_area:
#endif
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
} else {
if (!is_page_fault || !(vma->vm_flags & VM_READ))
goto bad_area;
}
- survive:
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(mm, vma, address, write);
+ fault = handle_mm_fault(mm, vma, address, flags);
+
+ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ return 0;
+
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
@@ -428,33 +448,33 @@ good_area:
goto do_sigbus;
BUG();
}
- if (fault & VM_FAULT_MAJOR)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR)
+ tsk->maj_flt++;
+ else
+ tsk->min_flt++;
+ if (fault & VM_FAULT_RETRY) {
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
+
+ /*
+ * No need to up_read(&mm->mmap_sem) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+ goto retry;
+ }
+ }
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
- /*
- * If this was an asynchronous fault,
- * restart the appropriate engine.
- */
- switch (fault_num) {
#if CHIP_HAS_TILE_DMA()
+ /* If this was a DMA TLB fault, restart the DMA engine. */
+ switch (fault_num) {
case INT_DMATLB_MISS:
case INT_DMATLB_MISS_DWNCL:
case INT_DMATLB_ACCESS:
case INT_DMATLB_ACCESS_DWNCL:
__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
break;
-#endif
-#if CHIP_HAS_SN_PROC()
- case INT_SNITLB_MISS:
- case INT_SNITLB_MISS_DWNCL:
- __insn_mtspr(SPR_SNCTL,
- __insn_mfspr(SPR_SNCTL) &
- ~SPR_SNCTL__FRZPROC_MASK);
- break;
-#endif
}
#endif
@@ -535,15 +555,10 @@ no_context:
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (is_global_init(tsk)) {
- yield();
- down_read(&mm->mmap_sem);
- goto survive;
- }
- pr_alert("VM: killing process %s\n", tsk->comm);
- if (!is_kernel_mode)
- do_group_exit(SIGKILL);
- goto no_context;
+ if (is_kernel_mode)
+ goto no_context;
+ pagefault_out_of_memory();
+ return 0;
do_sigbus:
up_read(&mm->mmap_sem);
@@ -666,7 +681,7 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
*/
if (fault_num == INT_DTLB_ACCESS)
write = 1;
- if (handle_migrating_pte(pgd, fault_num, address, 1, write))
+ if (handle_migrating_pte(pgd, fault_num, address, pc, 1, write))
return state;
/* Return zero so that we continue on with normal fault handling. */
@@ -689,8 +704,60 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
{
int is_page_fault;
+#ifdef CONFIG_KPROBES
+ /*
+ * This is to notify the fault handler of the kprobes. The
+ * exception code is redundant as it is also carried in REGS,
+ * but we pass it anyhow.
+ */
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
+ regs->faultnum, SIGSEGV) == NOTIFY_STOP)
+ return;
+#endif
+
+#ifdef __tilegx__
+ /*
+ * We don't need early do_page_fault_ics() support, since unlike
+ * Pro we don't need to worry about unlocking the atomic locks.
+ * There is only one current case in GX where we touch any memory
+ * under ICS other than our own kernel stack, and we handle that
+ * here. (If we crash due to trying to touch our own stack,
+ * we're in too much trouble for C code to help out anyway.)
+ */
+ if (write & ~1) {
+ unsigned long pc = write & ~1;
+ if (pc >= (unsigned long) __start_unalign_asm_code &&
+ pc < (unsigned long) __end_unalign_asm_code) {
+ struct thread_info *ti = current_thread_info();
+ /*
+ * Our EX_CONTEXT is still what it was from the
+ * initial unalign exception, but now we've faulted
+ * on the JIT page. We would like to complete the
+ * page fault however is appropriate, and then retry
+ * the instruction that caused the unalign exception.
+ * Our state has been "corrupted" by setting the low
+ * bit in "sp", and stashing r0..r3 in the
+ * thread_info area, so we revert all of that, then
+ * continue as if this were a normal page fault.
+ */
+ regs->sp &= ~1UL;
+ regs->regs[0] = ti->unalign_jit_tmp[0];
+ regs->regs[1] = ti->unalign_jit_tmp[1];
+ regs->regs[2] = ti->unalign_jit_tmp[2];
+ regs->regs[3] = ti->unalign_jit_tmp[3];
+ write &= 1;
+ } else {
+ pr_alert("%s/%d: ICS set at page fault at %#lx: %#lx\n",
+ current->comm, current->pid, pc, address);
+ show_regs(regs);
+ do_group_exit(SIGKILL);
+ return;
+ }
+ }
+#else
/* This case should have been handled by do_page_fault_ics(). */
BUG_ON(write & ~1);
+#endif
#if CHIP_HAS_TILE_DMA()
/*
@@ -719,10 +786,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
case INT_DMATLB_MISS:
case INT_DMATLB_MISS_DWNCL:
#endif
-#if CHIP_HAS_SN_PROC()
- case INT_SNITLB_MISS:
- case INT_SNITLB_MISS_DWNCL:
-#endif
is_page_fault = 1;
break;
@@ -738,8 +801,8 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
panic("Bad fault number %d in do_page_fault", fault_num);
}
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
- if (EX1_PL(regs->ex1) != USER_PL) {
+#if CHIP_HAS_TILE_DMA()
+ if (!user_mode(regs)) {
struct async_tlb *async;
switch (fault_num) {
#if CHIP_HAS_TILE_DMA()
@@ -750,12 +813,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
async = &current->thread.dma_async_tlb;
break;
#endif
-#if CHIP_HAS_SN_PROC()
- case INT_SNITLB_MISS:
- case INT_SNITLB_MISS_DWNCL:
- async = &current->thread.sn_async_tlb;
- break;
-#endif
default:
async = NULL;
}
@@ -788,14 +845,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
}
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+#if CHIP_HAS_TILE_DMA()
/*
- * Check an async_tlb structure to see if a deferred fault is waiting,
- * and if so pass it to the page-fault code.
+ * This routine effectively re-issues asynchronous page faults
+ * when we are returning to user space.
*/
-static void handle_async_page_fault(struct pt_regs *regs,
- struct async_tlb *async)
+void do_async_page_fault(struct pt_regs *regs)
{
+ struct async_tlb *async = &current->thread.dma_async_tlb;
+
+ /*
+ * Clear thread flag early. If we re-interrupt while processing
+ * code here, we will reset it and recall this routine before
+ * returning to user space.
+ */
+ clear_thread_flag(TIF_ASYNC_TLB);
+
if (async->fault_num) {
/*
* Clear async->fault_num before calling the page-fault
@@ -809,35 +874,15 @@ static void handle_async_page_fault(struct pt_regs *regs,
async->address, async->is_write);
}
}
-
-/*
- * This routine effectively re-issues asynchronous page faults
- * when we are returning to user space.
- */
-void do_async_page_fault(struct pt_regs *regs)
-{
- /*
- * Clear thread flag early. If we re-interrupt while processing
- * code here, we will reset it and recall this routine before
- * returning to user space.
- */
- clear_thread_flag(TIF_ASYNC_TLB);
-
-#if CHIP_HAS_TILE_DMA()
- handle_async_page_fault(regs, &current->thread.dma_async_tlb);
-#endif
-#if CHIP_HAS_SN_PROC()
- handle_async_page_fault(regs, &current->thread.sn_async_tlb);
-#endif
-}
-#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */
+#endif /* CHIP_HAS_TILE_DMA() */
void vmalloc_sync_all(void)
{
#ifdef __tilegx__
/* Currently all L1 kernel pmd's are static and shared. */
- BUG_ON(pgd_index(VMALLOC_END) != pgd_index(VMALLOC_START));
+ BUILD_BUG_ON(pgd_index(VMALLOC_END - PAGE_SIZE) !=
+ pgd_index(VMALLOC_START));
#else
/*
* Note that races in the updates of insync and start aren't
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
index 31dbbd9afe4..0dc21829477 100644
--- a/arch/tile/mm/highmem.c
+++ b/arch/tile/mm/highmem.c
@@ -93,7 +93,7 @@ static DEFINE_PER_CPU(struct kmap_amps, amps);
* If we examine it earlier we are exposed to a race where it looks
* writable earlier, but becomes immutable before we write the PTE.
*/
-static void kmap_atomic_register(struct page *page, enum km_type type,
+static void kmap_atomic_register(struct page *page, int type,
unsigned long va, pte_t *ptep, pte_t pteval)
{
unsigned long flags;
@@ -114,7 +114,6 @@ static void kmap_atomic_register(struct page *page, enum km_type type,
list_add(&amp->list, &amp_list);
set_pte(ptep, pteval);
- arch_flush_lazy_mmu_mode();
spin_unlock(&amp_lock);
homecache_kpte_unlock(flags);
@@ -224,12 +223,12 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
}
EXPORT_SYMBOL(kmap_atomic_prot);
-void *__kmap_atomic(struct page *page)
+void *kmap_atomic(struct page *page)
{
/* PAGE_NONE is a magic value that tells us to check immutability. */
return kmap_atomic_prot(page, PAGE_NONE);
}
-EXPORT_SYMBOL(__kmap_atomic);
+EXPORT_SYMBOL(kmap_atomic);
void __kunmap_atomic(void *kvaddr)
{
@@ -259,7 +258,6 @@ void __kunmap_atomic(void *kvaddr)
BUG_ON(vaddr >= (unsigned long)high_memory);
}
- arch_flush_lazy_mmu_mode();
pagefault_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 1cc6ae477c9..33294fdc402 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -30,6 +30,7 @@
#include <linux/cache.h>
#include <linux/smp.h>
#include <linux/module.h>
+#include <linux/hugetlb.h>
#include <asm/page.h>
#include <asm/sections.h>
@@ -42,12 +43,9 @@
#include "migrate.h"
-#if CHIP_HAS_COHERENT_LOCAL_CACHE()
-
/*
* The noallocl2 option suppresses all use of the L2 cache to cache
- * locally from a remote home. There's no point in using it if we
- * don't have coherent local caching, though.
+ * locally from a remote home.
*/
static int __write_once noallocl2;
static int __init set_noallocl2(char *str)
@@ -57,16 +55,6 @@ static int __init set_noallocl2(char *str)
}
early_param("noallocl2", set_noallocl2);
-#else
-
-#define noallocl2 0
-
-#endif
-
-/* Provide no-op versions of these routines to keep flush_remote() cleaner. */
-#define mark_caches_evicted_start() 0
-#define mark_caches_evicted_finish(mask, timestamp) do {} while (0)
-
/*
* Update the irq_stat for cpus that we are going to interrupt
@@ -106,7 +94,6 @@ static void hv_flush_update(const struct cpumask *cache_cpumask,
* there's never any good reason for hv_flush_remote() to fail.
* - Accepts a 32-bit PFN rather than a 64-bit PA, which generally
* is the type that Linux wants to pass around anyway.
- * - Centralizes the mark_caches_evicted() handling.
* - Canonicalizes that lengths of zero make cpumasks NULL.
* - Handles deferring TLB flushes for dataplane tiles.
* - Tracks remote interrupts in the per-cpu irq_cpustat_t.
@@ -125,7 +112,6 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
HV_Remote_ASID *asids, int asidcount)
{
int rc;
- int timestamp = 0; /* happy compiler */
struct cpumask cache_cpumask_copy, tlb_cpumask_copy;
struct cpumask *cache_cpumask, *tlb_cpumask;
HV_PhysAddr cache_pa;
@@ -156,15 +142,11 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length,
asids, asidcount);
cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT;
- if (cache_control & HV_FLUSH_EVICT_L2)
- timestamp = mark_caches_evicted_start();
rc = hv_flush_remote(cache_pa, cache_control,
cpumask_bits(cache_cpumask),
tlb_va, tlb_length, tlb_pgsize,
cpumask_bits(tlb_cpumask),
asids, asidcount);
- if (cache_control & HV_FLUSH_EVICT_L2)
- mark_caches_evicted_finish(cache_cpumask, timestamp);
if (rc == 0)
return;
cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy);
@@ -179,85 +161,88 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
panic("Unsafe to continue.");
}
-void flush_remote_page(struct page *page, int order)
+static void homecache_finv_page_va(void* va, int home)
{
- int i, pages = (1 << order);
- for (i = 0; i < pages; ++i, ++page) {
- void *p = kmap_atomic(page);
- int hfh = 0;
- int home = page_home(page);
-#if CHIP_HAS_CBOX_HOME_MAP()
- if (home == PAGE_HOME_HASH)
- hfh = 1;
- else
-#endif
- BUG_ON(home < 0 || home >= NR_CPUS);
- finv_buffer_remote(p, PAGE_SIZE, hfh);
- kunmap_atomic(p);
+ int cpu = get_cpu();
+ if (home == cpu) {
+ finv_buffer_local(va, PAGE_SIZE);
+ } else if (home == PAGE_HOME_HASH) {
+ finv_buffer_remote(va, PAGE_SIZE, 1);
+ } else {
+ BUG_ON(home < 0 || home >= NR_CPUS);
+ finv_buffer_remote(va, PAGE_SIZE, 0);
}
+ put_cpu();
}
-void homecache_evict(const struct cpumask *mask)
+void homecache_finv_map_page(struct page *page, int home)
{
- flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0);
+ unsigned long flags;
+ unsigned long va;
+ pte_t *ptep;
+ pte_t pte;
+
+ if (home == PAGE_HOME_UNCACHED)
+ return;
+ local_irq_save(flags);
+#ifdef CONFIG_HIGHMEM
+ va = __fix_to_virt(FIX_KMAP_BEGIN + kmap_atomic_idx_push() +
+ (KM_TYPE_NR * smp_processor_id()));
+#else
+ va = __fix_to_virt(FIX_HOMECACHE_BEGIN + smp_processor_id());
+#endif
+ ptep = virt_to_kpte(va);
+ pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL);
+ __set_pte(ptep, pte_set_home(pte, home));
+ homecache_finv_page_va((void *)va, home);
+ __pte_clear(ptep);
+ hv_flush_page(va, PAGE_SIZE);
+#ifdef CONFIG_HIGHMEM
+ kmap_atomic_idx_pop();
+#endif
+ local_irq_restore(flags);
}
-/*
- * Return a mask of the cpus whose caches currently own these pages.
- * The return value is whether the pages are all coherently cached
- * (i.e. none are immutable, incoherent, or uncached).
- */
-static int homecache_mask(struct page *page, int pages,
- struct cpumask *home_mask)
+static void homecache_finv_page_home(struct page *page, int home)
{
- int i;
- int cached_coherently = 1;
- cpumask_clear(home_mask);
- for (i = 0; i < pages; ++i) {
- int home = page_home(&page[i]);
- if (home == PAGE_HOME_IMMUTABLE ||
- home == PAGE_HOME_INCOHERENT) {
- cpumask_copy(home_mask, cpu_possible_mask);
- return 0;
- }
-#if CHIP_HAS_CBOX_HOME_MAP()
- if (home == PAGE_HOME_HASH) {
- cpumask_or(home_mask, home_mask, &hash_for_home_map);
- continue;
- }
-#endif
- if (home == PAGE_HOME_UNCACHED) {
- cached_coherently = 0;
- continue;
- }
- BUG_ON(home < 0 || home >= NR_CPUS);
- cpumask_set_cpu(home, home_mask);
- }
- return cached_coherently;
+ if (!PageHighMem(page) && home == page_home(page))
+ homecache_finv_page_va(page_address(page), home);
+ else
+ homecache_finv_map_page(page, home);
}
-/*
- * Return the passed length, or zero if it's long enough that we
- * believe we should evict the whole L2 cache.
- */
-static unsigned long cache_flush_length(unsigned long length)
+static inline bool incoherent_home(int home)
{
- return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length;
+ return home == PAGE_HOME_IMMUTABLE || home == PAGE_HOME_INCOHERENT;
}
-/* Flush a page out of whatever cache(s) it is in. */
-void homecache_flush_cache(struct page *page, int order)
+static void homecache_finv_page_internal(struct page *page, int force_map)
{
- int pages = 1 << order;
- int length = cache_flush_length(pages * PAGE_SIZE);
- unsigned long pfn = page_to_pfn(page);
- struct cpumask home_mask;
-
- homecache_mask(page, pages, &home_mask);
- flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0);
- sim_validate_lines_evicted(PFN_PHYS(pfn), pages * PAGE_SIZE);
+ int home = page_home(page);
+ if (home == PAGE_HOME_UNCACHED)
+ return;
+ if (incoherent_home(home)) {
+ int cpu;
+ for_each_cpu(cpu, &cpu_cacheable_map)
+ homecache_finv_map_page(page, cpu);
+ } else if (force_map) {
+ /* Force if, e.g., the normal mapping is migrating. */
+ homecache_finv_map_page(page, home);
+ } else {
+ homecache_finv_page_home(page, home);
+ }
+ sim_validate_lines_evicted(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE);
}
+void homecache_finv_page(struct page *page)
+{
+ homecache_finv_page_internal(page, 0);
+}
+
+void homecache_evict(const struct cpumask *mask)
+{
+ flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0);
+}
/* Report the home corresponding to a given PTE. */
static int pte_to_home(pte_t pte)
@@ -271,10 +256,8 @@ static int pte_to_home(pte_t pte)
return PAGE_HOME_INCOHERENT;
case HV_PTE_MODE_UNCACHED:
return PAGE_HOME_UNCACHED;
-#if CHIP_HAS_CBOX_HOME_MAP()
case HV_PTE_MODE_CACHE_HASH_L3:
return PAGE_HOME_HASH;
-#endif
}
panic("Bad PTE %#llx\n", pte.val);
}
@@ -331,20 +314,16 @@ pte_t pte_set_home(pte_t pte, int home)
HV_PTE_MODE_CACHE_NO_L3);
}
} else
-#if CHIP_HAS_CBOX_HOME_MAP()
if (hash_default)
pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
else
-#endif
pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
pte = hv_pte_set_nc(pte);
break;
-#if CHIP_HAS_CBOX_HOME_MAP()
case PAGE_HOME_HASH:
pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
break;
-#endif
default:
BUG_ON(home < 0 || home >= NR_CPUS ||
@@ -354,7 +333,6 @@ pte_t pte_set_home(pte_t pte, int home)
break;
}
-#if CHIP_HAS_NC_AND_NOALLOC_BITS()
if (noallocl2)
pte = hv_pte_set_no_alloc_l2(pte);
@@ -363,7 +341,6 @@ pte_t pte_set_home(pte_t pte, int home)
hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) {
pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED);
}
-#endif
/* Checking this case here gives a better panic than from the hv. */
BUG_ON(hv_pte_get_mode(pte) == 0);
@@ -379,21 +356,16 @@ EXPORT_SYMBOL(pte_set_home);
* so they're not suitable for anything but infrequent use.
*/
-#if CHIP_HAS_CBOX_HOME_MAP()
-static inline int initial_page_home(void) { return PAGE_HOME_HASH; }
-#else
-static inline int initial_page_home(void) { return 0; }
-#endif
-
int page_home(struct page *page)
{
if (PageHighMem(page)) {
- return initial_page_home();
+ return PAGE_HOME_HASH;
} else {
unsigned long kva = (unsigned long)page_address(page);
- return pte_to_home(*virt_to_pte(NULL, kva));
+ return pte_to_home(*virt_to_kpte(kva));
}
}
+EXPORT_SYMBOL(page_home);
void homecache_change_page_home(struct page *page, int order, int home)
{
@@ -409,12 +381,13 @@ void homecache_change_page_home(struct page *page, int order, int home)
NULL, 0);
for (i = 0; i < pages; ++i, kva += PAGE_SIZE) {
- pte_t *ptep = virt_to_pte(NULL, kva);
+ pte_t *ptep = virt_to_kpte(kva);
pte_t pteval = *ptep;
BUG_ON(!pte_present(pteval) || pte_huge(pteval));
__set_pte(ptep, pte_set_home(pteval, home));
}
}
+EXPORT_SYMBOL(homecache_change_page_home);
struct page *homecache_alloc_pages(gfp_t gfp_mask,
unsigned int order, int home)
@@ -439,22 +412,25 @@ struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
return page;
}
-void homecache_free_pages(unsigned long addr, unsigned int order)
+void __homecache_free_pages(struct page *page, unsigned int order)
{
- struct page *page;
-
- if (addr == 0)
- return;
-
- VM_BUG_ON(!virt_addr_valid((void *)addr));
- page = virt_to_page((void *)addr);
if (put_page_testzero(page)) {
- homecache_change_page_home(page, order, initial_page_home());
+ homecache_change_page_home(page, order, PAGE_HOME_HASH);
if (order == 0) {
- free_hot_cold_page(page, 0);
+ free_hot_cold_page(page, false);
} else {
init_page_count(page);
__free_pages(page, order);
}
}
}
+EXPORT_SYMBOL(__homecache_free_pages);
+
+void homecache_free_pages(unsigned long addr, unsigned int order)
+{
+ if (addr != 0) {
+ VM_BUG_ON(!virt_addr_valid((void *)addr));
+ __homecache_free_pages(virt_to_page((void *)addr), order);
+ }
+}
+EXPORT_SYMBOL(homecache_free_pages);
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 42cfcba4e1e..e514899e110 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -27,85 +27,129 @@
#include <linux/mman.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <asm/setup.h>
+
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+
+/*
+ * Provide an additional huge page size (in addition to the regular default
+ * huge page size) if no "hugepagesz" arguments are specified.
+ * Note that it must be smaller than the default huge page size so
+ * that it's possible to allocate them on demand from the buddy allocator.
+ * You can change this to 64K (on a 16K build), 256K, 1M, or 4M,
+ * or not define it at all.
+ */
+#define ADDITIONAL_HUGE_SIZE (1024 * 1024UL)
+
+/* "Extra" page-size multipliers, one per level of the page table. */
+int huge_shift[HUGE_SHIFT_ENTRIES] = {
+#ifdef ADDITIONAL_HUGE_SIZE
+#define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE)
+ [HUGE_SHIFT_PAGE] = ADDITIONAL_HUGE_SHIFT
+#endif
+};
+
+#endif
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pud_t *pud;
- pte_t *pte = NULL;
- /* We do not yet support multiple huge page sizes. */
- BUG_ON(sz != PMD_SIZE);
+ addr &= -sz; /* Mask off any low bits in the address. */
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
- if (pud)
- pte = (pte_t *) pmd_alloc(mm, pud, addr);
- BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
- return pte;
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+ if (sz >= PGDIR_SIZE) {
+ BUG_ON(sz != PGDIR_SIZE &&
+ sz != PGDIR_SIZE << huge_shift[HUGE_SHIFT_PGDIR]);
+ return (pte_t *)pud;
+ } else {
+ pmd_t *pmd = pmd_alloc(mm, pud, addr);
+ if (sz >= PMD_SIZE) {
+ BUG_ON(sz != PMD_SIZE &&
+ sz != (PMD_SIZE << huge_shift[HUGE_SHIFT_PMD]));
+ return (pte_t *)pmd;
+ }
+ else {
+ if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE])
+ panic("Unexpected page size %#lx\n", sz);
+ return pte_alloc_map(mm, NULL, pmd, addr);
+ }
+ }
+#else
+ BUG_ON(sz != PMD_SIZE);
+ return (pte_t *) pmd_alloc(mm, pud, addr);
+#endif
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+static pte_t *get_pte(pte_t *base, int index, int level)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd = NULL;
-
- pgd = pgd_offset(mm, addr);
- if (pgd_present(*pgd)) {
- pud = pud_offset(pgd, addr);
- if (pud_present(*pud))
- pmd = pmd_offset(pud, addr);
+ pte_t *ptep = base + index;
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+ if (!pte_present(*ptep) && huge_shift[level] != 0) {
+ unsigned long mask = -1UL << huge_shift[level];
+ pte_t *super_ptep = base + (index & mask);
+ pte_t pte = *super_ptep;
+ if (pte_present(pte) && pte_super(pte))
+ ptep = super_ptep;
}
- return (pte_t *) pmd;
+#endif
+ return ptep;
}
-#ifdef HUGETLB_TEST
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
- int write)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
- unsigned long start = address;
- int length = 1;
- int nr;
- struct page *page;
- struct vm_area_struct *vma;
-
- vma = find_vma(mm, addr);
- if (!vma || !is_vm_hugetlb_page(vma))
- return ERR_PTR(-EINVAL);
-
- pte = huge_pte_offset(mm, address);
-
- /* hugetlb should be locked, and hence, prefaulted */
- WARN_ON(!pte || pte_none(*pte));
-
- page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
-
- WARN_ON(!PageHead(page));
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+ pte_t *pte;
+#endif
- return page;
-}
+ /* Get the top-level page table entry. */
+ pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0);
-int pmd_huge(pmd_t pmd)
-{
- return 0;
-}
+ /* We don't have four levels. */
+ pud = pud_offset(pgd, addr);
+#ifndef __PAGETABLE_PUD_FOLDED
+# error support fourth page table level
+#endif
+ if (!pud_present(*pud))
+ return NULL;
+
+ /* Check for an L0 huge PTE, if we have three levels. */
+#ifndef __PAGETABLE_PMD_FOLDED
+ if (pud_huge(*pud))
+ return (pte_t *)pud;
+
+ pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud),
+ pmd_index(addr), 1);
+ if (!pmd_present(*pmd))
+ return NULL;
+#else
+ pmd = pmd_offset(pud, addr);
+#endif
-int pud_huge(pud_t pud)
-{
- return 0;
-}
+ /* Check for an L1 huge PTE. */
+ if (pmd_huge(*pmd))
+ return (pte_t *)pmd;
+
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+ /* Check for an L2 huge PTE. */
+ pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2);
+ if (!pte_present(*pte))
+ return NULL;
+ if (pte_super(*pte))
+ return pte;
+#endif
-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int write)
-{
return NULL;
}
-#else
-
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write)
{
@@ -149,50 +193,21 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
return 0;
}
-#endif
-
#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long start_addr;
-
- if (len > mm->cached_hole_size) {
- start_addr = mm->free_area_cache;
- } else {
- start_addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- }
-
-full_search:
- addr = ALIGN(start_addr, huge_page_size(h));
-
- for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
- /* At this point: (!vma || addr < vma->vm_end). */
- if (TASK_SIZE - len < addr) {
- /*
- * Start a new search - just in case we missed
- * some holes.
- */
- if (start_addr != TASK_UNMAPPED_BASE) {
- start_addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- goto full_search;
- }
- return -ENOMEM;
- }
- if (!vma || addr + len <= vma->vm_start) {
- mm->free_area_cache = addr + len;
- return addr;
- }
- if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
- addr = ALIGN(vma->vm_end, huge_page_size(h));
- }
+ struct vm_unmapped_area_info info;
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
}
static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
@@ -200,92 +215,30 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma, *prev_vma;
- unsigned long base = mm->mmap_base, addr = addr0;
- unsigned long largest_hole = mm->cached_hole_size;
- int first_time = 1;
-
- /* don't allow allocations above current base */
- if (mm->free_area_cache > base)
- mm->free_area_cache = base;
-
- if (len <= largest_hole) {
- largest_hole = 0;
- mm->free_area_cache = base;
- }
-try_again:
- /* make sure it can fit in the remaining address space */
- if (mm->free_area_cache < len)
- goto fail;
-
- /* either no address requested or can't fit in requested address hole */
- addr = (mm->free_area_cache - len) & huge_page_mask(h);
- do {
- /*
- * Lookup failure means no vma is above this address,
- * i.e. return with success:
- */
- vma = find_vma_prev(mm, addr, &prev_vma);
- if (!vma) {
- return addr;
- break;
- }
+ struct vm_unmapped_area_info info;
+ unsigned long addr;
- /*
- * new region fits between prev_vma->vm_end and
- * vma->vm_start, use it:
- */
- if (addr + len <= vma->vm_start &&
- (!prev_vma || (addr >= prev_vma->vm_end))) {
- /* remember the address as a hint for next time */
- mm->cached_hole_size = largest_hole;
- mm->free_area_cache = addr;
- return addr;
- } else {
- /* pull free_area_cache down to the first hole */
- if (mm->free_area_cache == vma->vm_end) {
- mm->free_area_cache = vma->vm_start;
- mm->cached_hole_size = largest_hole;
- }
- }
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = PAGE_SIZE;
+ info.high_limit = current->mm->mmap_base;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ addr = vm_unmapped_area(&info);
- /* remember the largest hole we saw so far */
- if (addr + largest_hole < vma->vm_start)
- largest_hole = vma->vm_start - addr;
-
- /* try just below the current vma->vm_start */
- addr = (vma->vm_start - len) & huge_page_mask(h);
-
- } while (len <= vma->vm_start);
-
-fail:
- /*
- * if hint left us with no space for the requested
- * mapping then try again:
- */
- if (first_time) {
- mm->free_area_cache = base;
- largest_hole = 0;
- first_time = 0;
- goto try_again;
- }
/*
* A failed mmap() very likely causes application failure,
* so fall back to the bottom-up function here. This scenario
* can happen with large stack limits and large mmap()
* allocations.
*/
- mm->free_area_cache = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = ~0UL;
- addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
- len, pgoff, flags);
-
- /*
- * Restore the topdown base:
- */
- mm->free_area_cache = base;
- mm->cached_hole_size = ~0UL;
+ if (addr & ~PAGE_MASK) {
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ addr = vm_unmapped_area(&info);
+ }
return addr;
}
@@ -322,21 +275,102 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return hugetlb_get_unmapped_area_topdown(file, addr, len,
pgoff, flags);
}
+#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
-static __init int setup_hugepagesz(char *opt)
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+static __init int __setup_hugepagesz(unsigned long ps)
{
- unsigned long ps = memparse(opt, &opt);
- if (ps == PMD_SIZE) {
- hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
- } else if (ps == PUD_SIZE) {
- hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+ int log_ps = __builtin_ctzl(ps);
+ int level, base_shift;
+
+ if ((1UL << log_ps) != ps || (log_ps & 1) != 0) {
+ pr_warn("Not enabling %ld byte huge pages;"
+ " must be a power of four.\n", ps);
+ return -EINVAL;
+ }
+
+ if (ps > 64*1024*1024*1024UL) {
+ pr_warn("Not enabling %ld MB huge pages;"
+ " largest legal value is 64 GB .\n", ps >> 20);
+ return -EINVAL;
+ } else if (ps >= PUD_SIZE) {
+ static long hv_jpage_size;
+ if (hv_jpage_size == 0)
+ hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO);
+ if (hv_jpage_size != PUD_SIZE) {
+ pr_warn("Not enabling >= %ld MB huge pages:"
+ " hypervisor reports size %ld\n",
+ PUD_SIZE >> 20, hv_jpage_size);
+ return -EINVAL;
+ }
+ level = 0;
+ base_shift = PUD_SHIFT;
+ } else if (ps >= PMD_SIZE) {
+ level = 1;
+ base_shift = PMD_SHIFT;
+ } else if (ps > PAGE_SIZE) {
+ level = 2;
+ base_shift = PAGE_SHIFT;
} else {
- pr_err("hugepagesz: Unsupported page size %lu M\n",
- ps >> 20);
- return 0;
+ pr_err("hugepagesz: huge page size %ld too small\n", ps);
+ return -EINVAL;
+ }
+
+ if (log_ps != base_shift) {
+ int shift_val = log_ps - base_shift;
+ if (huge_shift[level] != 0) {
+ int old_shift = base_shift + huge_shift[level];
+ pr_warn("Not enabling %ld MB huge pages;"
+ " already have size %ld MB.\n",
+ ps >> 20, (1UL << old_shift) >> 20);
+ return -EINVAL;
+ }
+ if (hv_set_pte_super_shift(level, shift_val) != 0) {
+ pr_warn("Not enabling %ld MB huge pages;"
+ " no hypervisor support.\n", ps >> 20);
+ return -EINVAL;
+ }
+ printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20);
+ huge_shift[level] = shift_val;
+ }
+
+ hugetlb_add_hstate(log_ps - PAGE_SHIFT);
+
+ return 0;
+}
+
+static bool saw_hugepagesz;
+
+static __init int setup_hugepagesz(char *opt)
+{
+ if (!saw_hugepagesz) {
+ saw_hugepagesz = true;
+ memset(huge_shift, 0, sizeof(huge_shift));
}
- return 1;
+ return __setup_hugepagesz(memparse(opt, NULL));
}
__setup("hugepagesz=", setup_hugepagesz);
-#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
+#ifdef ADDITIONAL_HUGE_SIZE
+/*
+ * Provide an additional huge page size if no "hugepagesz" args are given.
+ * In that case, all the cores have properly set up their hv super_shift
+ * already, but we need to notify the hugetlb code to enable the
+ * new huge page size from the Linux point of view.
+ */
+static __init int add_default_hugepagesz(void)
+{
+ if (!saw_hugepagesz) {
+ BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE >= PMD_SIZE ||
+ ADDITIONAL_HUGE_SIZE <= PAGE_SIZE);
+ BUILD_BUG_ON((PAGE_SIZE << ADDITIONAL_HUGE_SHIFT) !=
+ ADDITIONAL_HUGE_SIZE);
+ BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT & 1);
+ hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT);
+ }
+ return 0;
+}
+arch_initcall(add_default_hugepagesz);
+#endif
+
+#endif /* CONFIG_HUGETLB_SUPER_PAGES */
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 7309988c979..bfb3127b4df 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -38,7 +38,6 @@
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/processor.h>
-#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/dma.h>
@@ -83,7 +82,7 @@ static int num_l2_ptes[MAX_NUMNODES];
static void init_prealloc_ptes(int node, int pages)
{
- BUG_ON(pages & (HV_L2_ENTRIES-1));
+ BUG_ON(pages & (PTRS_PER_PTE - 1));
if (pages) {
num_l2_ptes[node] = pages;
l2_ptes[node] = __alloc_bootmem(pages * sizeof(pte_t),
@@ -107,10 +106,8 @@ pte_t *get_prealloc_pte(unsigned long pfn)
*/
static int initial_heap_home(void)
{
-#if CHIP_HAS_CBOX_HOME_MAP()
if (hash_default)
return PAGE_HOME_HASH;
-#endif
return smp_processor_id();
}
@@ -132,14 +129,9 @@ static void __init assign_pte(pmd_t *pmd, pte_t *page_table)
#ifdef __tilegx__
-#if HV_L1_SIZE != HV_L2_SIZE
-# error Rework assumption that L1 and L2 page tables are same size.
-#endif
-
-/* Since pmd_t arrays and pte_t arrays are the same size, just use casts. */
static inline pmd_t *alloc_pmd(void)
{
- return (pmd_t *)alloc_pte();
+ return __alloc_bootmem(L1_KERNEL_PGTABLE_SIZE, HV_PAGE_TABLE_ALIGN, 0);
}
static inline void assign_pmd(pud_t *pud, pmd_t *pmd)
@@ -156,7 +148,21 @@ void __init shatter_pmd(pmd_t *pmd)
assign_pte(pmd, pte);
}
-#ifdef CONFIG_HIGHMEM
+#ifdef __tilegx__
+static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
+{
+ pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va);
+ if (pud_none(*pud))
+ assign_pmd(pud, alloc_pmd());
+ return pmd_offset(pud, va);
+}
+#else
+static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
+{
+ return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va);
+}
+#endif
+
/*
* This function initializes a certain range of kernel virtual memory
* with new bootmem page tables, everywhere page tables are missing in
@@ -169,34 +175,24 @@ void __init shatter_pmd(pmd_t *pmd)
* checking the pgd every time.
*/
static void __init page_table_range_init(unsigned long start,
- unsigned long end, pgd_t *pgd_base)
+ unsigned long end, pgd_t *pgd)
{
- pgd_t *pgd;
- int pgd_idx;
unsigned long vaddr;
-
- vaddr = start;
- pgd_idx = pgd_index(vaddr);
- pgd = pgd_base + pgd_idx;
-
- for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
- pmd_t *pmd = pmd_offset(pud_offset(pgd, vaddr), vaddr);
+ start = round_down(start, PMD_SIZE);
+ end = round_up(end, PMD_SIZE);
+ for (vaddr = start; vaddr < end; vaddr += PMD_SIZE) {
+ pmd_t *pmd = get_pmd(pgd, vaddr);
if (pmd_none(*pmd))
assign_pte(pmd, alloc_pte());
- vaddr += PMD_SIZE;
}
}
-#endif /* CONFIG_HIGHMEM */
-
-#if CHIP_HAS_CBOX_HOME_MAP()
static int __initdata ktext_hash = 1; /* .text pages */
static int __initdata kdata_hash = 1; /* .data and .bss pages */
int __write_once hash_default = 1; /* kernel allocator pages */
EXPORT_SYMBOL(hash_default);
int __write_once kstack_hash = 1; /* if no homecaching, use h4h */
-#endif /* CHIP_HAS_CBOX_HOME_MAP */
/*
* CPUs to use to for striping the pages of kernel data. If hash-for-home
@@ -214,14 +210,12 @@ int __write_once kdata_huge; /* if no homecaching, small pages */
static pgprot_t __init construct_pgprot(pgprot_t prot, int home)
{
prot = pte_set_home(prot, home);
-#if CHIP_HAS_CBOX_HOME_MAP()
if (home == PAGE_HOME_IMMUTABLE) {
if (ktext_hash)
prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3);
else
prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3);
}
-#endif
return prot;
}
@@ -233,40 +227,28 @@ static pgprot_t __init init_pgprot(ulong address)
{
int cpu;
unsigned long page;
- enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
+ enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET };
-#if CHIP_HAS_CBOX_HOME_MAP()
/* For kdata=huge, everything is just hash-for-home. */
if (kdata_huge)
return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
-#endif
/* We map the aliased pages of permanent text inaccessible. */
if (address < (ulong) _sinittext - CODE_DELTA)
return PAGE_NONE;
- /*
- * We map read-only data non-coherent for performance. We could
- * use neighborhood caching on TILE64, but it's not clear it's a win.
- */
+ /* We map read-only data non-coherent for performance. */
if ((address >= (ulong) __start_rodata &&
address < (ulong) __end_rodata) ||
address == (ulong) empty_zero_page) {
return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE);
}
- /* As a performance optimization, keep the boot init stack here. */
- if (address >= (ulong)&init_thread_union &&
- address < (ulong)&init_thread_union + THREAD_SIZE)
- return construct_pgprot(PAGE_KERNEL, smp_processor_id());
-
#ifndef __tilegx__
-#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
/* Force the atomic_locks[] array page to be hash-for-home. */
if (address == (ulong) atomic_locks)
return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
#endif
-#endif
/*
* Everything else that isn't data or bss is heap, so mark it
@@ -284,28 +266,18 @@ static pgprot_t __init init_pgprot(ulong address)
if (address >= (ulong) _end || address < (ulong) _einitdata)
return construct_pgprot(PAGE_KERNEL, initial_heap_home());
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Use hash-for-home if requested for data/bss. */
if (kdata_hash)
return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
-#endif
-
- /*
- * Make the w1data homed like heap to start with, to avoid
- * making it part of the page-striped data area when we're just
- * going to convert it to read-only soon anyway.
- */
- if (address >= (ulong)__w1data_begin && address < (ulong)__w1data_end)
- return construct_pgprot(PAGE_KERNEL, initial_heap_home());
/*
* Otherwise we just hand out consecutive cpus. To avoid
* requiring this function to hold state, we just walk forward from
- * _sdata by PAGE_SIZE, skipping the readonly and init data, to reach
- * the requested address, while walking cpu home around kdata_mask.
- * This is typically no more than a dozen or so iterations.
+ * __end_rodata by PAGE_SIZE, skipping the readonly and init data, to
+ * reach the requested address, while walking cpu home around
+ * kdata_mask. This is typically no more than a dozen or so iterations.
*/
- page = (((ulong)__w1data_end) + PAGE_SIZE - 1) & PAGE_MASK;
+ page = (((ulong)__end_rodata) + PAGE_SIZE - 1) & PAGE_MASK;
BUG_ON(address < page || address >= (ulong)_end);
cpu = cpumask_first(&kdata_mask);
for (; page < address; page += PAGE_SIZE) {
@@ -315,11 +287,9 @@ static pgprot_t __init init_pgprot(ulong address)
if (page == (ulong)empty_zero_page)
continue;
#ifndef __tilegx__
-#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
if (page == (ulong)atomic_locks)
continue;
#endif
-#endif
cpu = cpumask_next(cpu, &kdata_mask);
if (cpu == NR_CPUS)
cpu = cpumask_first(&kdata_mask);
@@ -362,7 +332,7 @@ static int __init setup_ktext(char *str)
ktext_arg_seen = 1;
- /* Default setting on Tile64: use a huge page */
+ /* Default setting: use a huge page */
if (strcmp(str, "huge") == 0)
pr_info("ktext: using one huge locally cached page\n");
@@ -408,28 +378,11 @@ static inline pgprot_t ktext_set_nocache(pgprot_t prot)
{
if (!ktext_nocache)
prot = hv_pte_set_nc(prot);
-#if CHIP_HAS_NC_AND_NOALLOC_BITS()
else
prot = hv_pte_set_no_alloc_l2(prot);
-#endif
return prot;
}
-#ifndef __tilegx__
-static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
-{
- return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va);
-}
-#else
-static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
-{
- pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va);
- if (pud_none(*pud))
- assign_pmd(pud, alloc_pmd());
- return pmd_offset(pud, va);
-}
-#endif
-
/* Temporary page table we use for staging. */
static pgd_t pgtables[PTRS_PER_PGD]
__attribute__((aligned(HV_PAGE_TABLE_ALIGN)));
@@ -450,6 +403,7 @@ static pgd_t pgtables[PTRS_PER_PGD]
*/
static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
{
+ unsigned long long irqmask;
unsigned long address, pfn;
pmd_t *pmd;
pte_t *pte;
@@ -458,7 +412,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
struct cpumask kstripe_mask;
int rc, i;
-#if CHIP_HAS_CBOX_HOME_MAP()
if (ktext_arg_seen && ktext_hash) {
pr_warning("warning: \"ktext\" boot argument ignored"
" if \"kcache_hash\" sets up text hash-for-home\n");
@@ -475,7 +428,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
" kcache_hash=all or =allbutstack\n");
kdata_huge = 0;
}
-#endif
/*
* Set up a mask for cpus to use for kernel striping.
@@ -556,8 +508,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
}
}
- address = MEM_SV_INTRPT;
+ address = MEM_SV_START;
pmd = get_pmd(pgtables, address);
+ pfn = 0; /* code starts at PA 0 */
if (ktext_small) {
/* Allocate an L2 PTE for the kernel text */
int cpu = 0;
@@ -579,11 +532,16 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
prot = ktext_set_nocache(prot);
}
- BUG_ON(address != (unsigned long)_stext);
- pfn = 0; /* code starts at PA 0 */
- pte = alloc_pte();
- for (pte_ofs = 0; address < (unsigned long)_einittext;
- pfn++, pte_ofs++, address += PAGE_SIZE) {
+ BUG_ON(address != (unsigned long)_text);
+ pte = NULL;
+ for (; address < (unsigned long)_einittext;
+ pfn++, address += PAGE_SIZE) {
+ pte_ofs = pte_index(address);
+ if (pte_ofs == 0) {
+ if (pte)
+ assign_pte(pmd++, pte);
+ pte = alloc_pte();
+ }
if (!ktext_local) {
prot = set_remote_cache_cpu(prot, cpu);
cpu = cpumask_next(cpu, &ktext_mask);
@@ -592,17 +550,16 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
}
pte[pte_ofs] = pfn_pte(pfn, prot);
}
- assign_pte(pmd, pte);
+ if (pte)
+ assign_pte(pmd, pte);
} else {
pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC);
pteval = pte_mkhuge(pteval);
-#if CHIP_HAS_CBOX_HOME_MAP()
if (ktext_hash) {
pteval = hv_pte_set_mode(pteval,
HV_PTE_MODE_CACHE_HASH_L3);
pteval = ktext_set_nocache(pteval);
} else
-#endif /* CHIP_HAS_CBOX_HOME_MAP() */
if (cpumask_weight(&ktext_mask) == 1) {
pteval = set_remote_cache_cpu(pteval,
cpumask_first(&ktext_mask));
@@ -615,7 +572,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
else
pteval = hv_pte_set_mode(pteval,
HV_PTE_MODE_CACHE_NO_L3);
- *(pte_t *)pmd = pteval;
+ for (; address < (unsigned long)_einittext;
+ pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE)
+ *(pte_t *)(pmd++) = pfn_pte(pfn, pteval);
}
/* Set swapper_pgprot here so it is flushed to memory right away. */
@@ -630,10 +589,13 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
* - install pgtables[] as the real page table
* - flush the TLB so the new page table takes effect
*/
+ irqmask = interrupt_mask_save_mask();
+ interrupt_mask_set_mask(-1ULL);
rc = flush_and_install_context(__pa(pgtables),
init_pgprot((unsigned long)pgtables),
__get_cpu_var(current_asid),
cpumask_bits(my_cpu_mask));
+ interrupt_mask_restore_mask(irqmask);
BUG_ON(rc != 0);
/* Copy the page table back to the normal swapper_pg_dir. */
@@ -696,6 +658,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
#endif /* CONFIG_HIGHMEM */
+#ifndef CONFIG_64BIT
static void __init init_free_pfn_range(unsigned long start, unsigned long end)
{
unsigned long pfn;
@@ -725,7 +688,7 @@ static void __init init_free_pfn_range(unsigned long start, unsigned long end)
}
init_page_count(page);
__free_pages(page, order);
- totalram_pages += count;
+ adjust_managed_page_count(page, count);
page += count;
pfn += count;
@@ -738,16 +701,15 @@ static void __init set_non_bootmem_pages_init(void)
for_each_zone(z) {
unsigned long start, end;
int nid = z->zone_pgdat->node_id;
+#ifdef CONFIG_HIGHMEM
int idx = zone_idx(z);
+#endif
start = z->zone_start_pfn;
- if (start == 0)
- continue; /* bootmem */
end = start + z->spanned_pages;
- if (idx == ZONE_NORMAL) {
- BUG_ON(start != node_start_pfn[nid]);
- start = node_free_pfn[nid];
- }
+ start = max(start, node_free_pfn[nid]);
+ start = max(start, max_low_pfn);
+
#ifdef CONFIG_HIGHMEM
if (idx == ZONE_HIGHMEM)
totalhigh_pages += z->spanned_pages;
@@ -768,6 +730,7 @@ static void __init set_non_bootmem_pages_init(void)
init_free_pfn_range(start, end);
}
}
+#endif
/*
* paging_init() sets up the page tables - note that all of lowmem is
@@ -775,9 +738,6 @@ static void __init set_non_bootmem_pages_init(void)
*/
void __init paging_init(void)
{
-#ifdef CONFIG_HIGHMEM
- unsigned long vaddr, end;
-#endif
#ifdef __tilegx__
pud_t *pud;
#endif
@@ -785,14 +745,11 @@ void __init paging_init(void)
kernel_physical_mapping_init(pgd_base);
+ /* Fixed mappings, only the page table structure has to be created. */
+ page_table_range_init(fix_to_virt(__end_of_fixed_addresses - 1),
+ FIXADDR_TOP, pgd_base);
+
#ifdef CONFIG_HIGHMEM
- /*
- * Fixed mappings, only the page table structure has to be
- * created - mappings will be set by set_fixmap():
- */
- vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
- page_table_range_init(vaddr, end, pgd_base);
permanent_kmaps_init(pgd_base);
#endif
@@ -804,7 +761,7 @@ void __init paging_init(void)
* changing init_mm once we get up and running, and there's no
* need for e.g. vmalloc_sync_all().
*/
- BUILD_BUG_ON(pgd_index(VMALLOC_START) != pgd_index(VMALLOC_END));
+ BUILD_BUG_ON(pgd_index(VMALLOC_START) != pgd_index(VMALLOC_END - 1));
pud = pud_offset(pgd_base + pgd_index(VMALLOC_START), VMALLOC_START);
assign_pmd(pud, alloc_pmd());
#endif
@@ -829,7 +786,6 @@ static void __init set_max_mapnr_init(void)
void __init mem_init(void)
{
- int codesize, datasize, initsize;
int i;
#ifndef __tilegx__
void *last;
@@ -854,24 +810,14 @@ void __init mem_init(void)
set_max_mapnr_init();
/* this will put all bootmem onto the freelists */
- totalram_pages += free_all_bootmem();
+ free_all_bootmem();
+#ifndef CONFIG_64BIT
/* count all remaining LOWMEM and give all HIGHMEM to page allocator */
set_non_bootmem_pages_init();
+#endif
- codesize = (unsigned long)&_etext - (unsigned long)&_text;
- datasize = (unsigned long)&_end - (unsigned long)&_sdata;
- initsize = (unsigned long)&_einittext - (unsigned long)&_sinittext;
- initsize += (unsigned long)&_einitdata - (unsigned long)&_sinitdata;
-
- pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n",
- (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
- num_physpages << (PAGE_SHIFT-10),
- codesize >> 10,
- datasize >> 10,
- initsize >> 10,
- (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
- );
+ mem_init_print_info(NULL);
/*
* In debug mode, dump some interesting memory mappings.
@@ -882,10 +828,6 @@ void __init mem_init(void)
printk(KERN_DEBUG " PKMAP %#lx - %#lx\n",
PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1);
#endif
-#ifdef CONFIG_HUGEVMAP
- printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n",
- HUGE_VMAP_BASE, HUGE_VMAP_END - 1);
-#endif
printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n",
_VMALLOC_START, _VMALLOC_END - 1);
#ifdef __tilegx__
@@ -941,6 +883,14 @@ int remove_memory(u64 start, u64 size)
{
return -EINVAL;
}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+ /* TODO */
+ return -EBUSY;
+}
+#endif
#endif
struct kmem_cache *pgd_cache;
@@ -952,26 +902,6 @@ void __init pgtable_cache_init(void)
panic("pgtable_cache_init(): Cannot create pgd cache");
}
-#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
-/*
- * The __w1data area holds data that is only written during initialization,
- * and is read-only and thus freely cacheable thereafter. Fix the page
- * table entries that cover that region accordingly.
- */
-static void mark_w1data_ro(void)
-{
- /* Loop over page table entries */
- unsigned long addr = (unsigned long)__w1data_begin;
- BUG_ON((addr & (PAGE_SIZE-1)) != 0);
- for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) {
- unsigned long pfn = kaddr_to_pfn((void *)addr);
- pte_t *ptep = virt_to_pte(NULL, addr);
- BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */
- set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO));
- }
-}
-#endif
-
#ifdef CONFIG_DEBUG_PAGEALLOC
static long __write_once initfree;
#else
@@ -982,7 +912,7 @@ static long __write_once initfree = 1;
static int __init set_initfree(char *str)
{
long val;
- if (strict_strtol(str, 0, &val) == 0) {
+ if (kstrtol(str, 0, &val) == 0) {
initfree = val;
pr_info("initfree: %s free init pages\n",
initfree ? "will" : "won't");
@@ -1011,7 +941,7 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end)
*/
int pfn = kaddr_to_pfn((void *)addr);
struct page *page = pfn_to_page(pfn);
- pte_t *ptep = virt_to_pte(NULL, addr);
+ pte_t *ptep = virt_to_kpte(addr);
if (!initfree) {
/*
* If debugging page accesses then do not free
@@ -1022,31 +952,24 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end)
pte_clear(&init_mm, addr, ptep);
continue;
}
- __ClearPageReserved(page);
- init_page_count(page);
if (pte_huge(*ptep))
BUG_ON(!kdata_huge);
else
set_pte_at(&init_mm, addr, ptep,
pfn_pte(pfn, PAGE_KERNEL));
memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
+ free_reserved_page(page);
}
pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
}
void free_initmem(void)
{
- const unsigned long text_delta = MEM_SV_INTRPT - PAGE_OFFSET;
+ const unsigned long text_delta = MEM_SV_START - PAGE_OFFSET;
/*
- * Evict the dirty initdata on the boot cpu, evict the w1data
- * wherever it's homed, and evict all the init code everywhere.
- * We are guaranteed that no one will touch the init pages any
- * more, and although other cpus may be touching the w1data,
- * we only actually change the caching on tile64, which won't
- * be keeping local copies in the other tiles' caches anyway.
+ * Evict the cache on all cores to avoid incoherence.
+ * We are guaranteed that no one will touch the init pages any more.
*/
homecache_evict(&cpu_cacheable_map);
@@ -1057,26 +980,11 @@ void free_initmem(void)
/*
* Free the pages mapped from 0xc0000000 that correspond to code
- * pages from MEM_SV_INTRPT that we won't use again after init.
+ * pages from MEM_SV_START that we won't use again after init.
*/
free_init_pages("unused kernel text",
(unsigned long)_sinittext - text_delta,
(unsigned long)_einittext - text_delta);
-
-#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
- /*
- * Upgrade the .w1data section to globally cached.
- * We don't do this on tilepro, since the cache architecture
- * pretty much makes it irrelevant, and in any case we end
- * up having racing issues with other tiles that may touch
- * the data after we flush the cache but before we update
- * the PTEs and flush the TLBs, causing sharer shootdowns
- * later. Even though this is to clean data, it seems like
- * an unnecessary complication.
- */
- mark_w1data_ro();
-#endif
-
/* Do a global TLB flush so everyone sees the changes. */
flush_tlb_all();
}
diff --git a/arch/tile/mm/migrate.h b/arch/tile/mm/migrate.h
index cd45a0837fa..91683d97917 100644
--- a/arch/tile/mm/migrate.h
+++ b/arch/tile/mm/migrate.h
@@ -24,6 +24,9 @@
/*
* This function is used as a helper when setting up the initial
* page table (swapper_pg_dir).
+ *
+ * You must mask ALL interrupts prior to invoking this code, since
+ * you can't legally touch the stack during the cache flush.
*/
extern int flush_and_install_context(HV_PhysAddr page_table, HV_PTE access,
HV_ASID asid,
@@ -39,6 +42,9 @@ extern int flush_and_install_context(HV_PhysAddr page_table, HV_PTE access,
*
* Note that any non-NULL pointers must not point to the page that
* is handled by the stack_pte itself.
+ *
+ * You must mask ALL interrupts prior to invoking this code, since
+ * you can't legally touch the stack during the cache flush.
*/
extern int homecache_migrate_stack_and_flush(pte_t stack_pte, unsigned long va,
size_t length, pte_t *stack_ptep,
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S
index ac01a7cdf77..772085491bf 100644
--- a/arch/tile/mm/migrate_32.S
+++ b/arch/tile/mm/migrate_32.S
@@ -40,8 +40,7 @@
#define FRAME_R32 16
#define FRAME_R33 20
#define FRAME_R34 24
-#define FRAME_R35 28
-#define FRAME_SIZE 32
+#define FRAME_SIZE 28
@@ -66,12 +65,11 @@
#define r_my_cpumask r5
/* Locals (callee-save); must not be more than FRAME_xxx above. */
-#define r_save_ics r30
-#define r_context_lo r31
-#define r_context_hi r32
-#define r_access_lo r33
-#define r_access_hi r34
-#define r_asid r35
+#define r_context_lo r30
+#define r_context_hi r31
+#define r_access_lo r32
+#define r_access_hi r33
+#define r_asid r34
STD_ENTRY(flush_and_install_context)
/*
@@ -104,11 +102,7 @@ STD_ENTRY(flush_and_install_context)
sw r_tmp, r33
addi r_tmp, sp, FRAME_R34
}
- {
- sw r_tmp, r34
- addi r_tmp, sp, FRAME_R35
- }
- sw r_tmp, r35
+ sw r_tmp, r34
/* Move some arguments to callee-save registers. */
{
@@ -121,13 +115,6 @@ STD_ENTRY(flush_and_install_context)
}
move r_asid, r_asid_in
- /* Disable interrupts, since we can't use our stack. */
- {
- mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION
- movei r_tmp, 1
- }
- mtspr INTERRUPT_CRITICAL_SECTION, r_tmp
-
/* First, flush our L2 cache. */
{
move r0, zero /* cache_pa */
@@ -149,7 +136,7 @@ STD_ENTRY(flush_and_install_context)
move r8, zero /* asids */
move r9, zero /* asidcount */
}
- jal hv_flush_remote
+ jal _hv_flush_remote
bnz r0, .Ldone
/* Now install the new page table. */
@@ -163,9 +150,9 @@ STD_ENTRY(flush_and_install_context)
}
{
move r4, r_asid
- movei r5, HV_CTX_DIRECTIO
+ moveli r5, HV_CTX_DIRECTIO | CTX_PAGE_FLAG
}
- jal hv_install_context
+ jal _hv_install_context
bnz r0, .Ldone
/* Finally, flush the TLB. */
@@ -175,9 +162,6 @@ STD_ENTRY(flush_and_install_context)
}
.Ldone:
- /* Reset interrupts back how they were before. */
- mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics
-
/* Restore the callee-saved registers and return. */
addli lr, sp, FRAME_SIZE
{
@@ -202,10 +186,6 @@ STD_ENTRY(flush_and_install_context)
}
{
lw r34, r_tmp
- addli r_tmp, sp, FRAME_R35
- }
- {
- lw r35, r_tmp
addi sp, sp, FRAME_SIZE
}
jrp lr
diff --git a/arch/tile/mm/migrate_64.S b/arch/tile/mm/migrate_64.S
index e76fea688be..a49eee38f87 100644
--- a/arch/tile/mm/migrate_64.S
+++ b/arch/tile/mm/migrate_64.S
@@ -38,8 +38,7 @@
#define FRAME_R30 16
#define FRAME_R31 24
#define FRAME_R32 32
-#define FRAME_R33 40
-#define FRAME_SIZE 48
+#define FRAME_SIZE 40
@@ -60,10 +59,9 @@
#define r_my_cpumask r3
/* Locals (callee-save); must not be more than FRAME_xxx above. */
-#define r_save_ics r30
-#define r_context r31
-#define r_access r32
-#define r_asid r33
+#define r_context r30
+#define r_access r31
+#define r_asid r32
/*
* Caller-save locals and frame constants are the same as
@@ -93,11 +91,7 @@ STD_ENTRY(flush_and_install_context)
st r_tmp, r31
addi r_tmp, sp, FRAME_R32
}
- {
- st r_tmp, r32
- addi r_tmp, sp, FRAME_R33
- }
- st r_tmp, r33
+ st r_tmp, r32
/* Move some arguments to callee-save registers. */
{
@@ -106,13 +100,6 @@ STD_ENTRY(flush_and_install_context)
}
move r_asid, r_asid_in
- /* Disable interrupts, since we can't use our stack. */
- {
- mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION
- movei r_tmp, 1
- }
- mtspr INTERRUPT_CRITICAL_SECTION, r_tmp
-
/* First, flush our L2 cache. */
{
move r0, zero /* cache_pa */
@@ -136,7 +123,7 @@ STD_ENTRY(flush_and_install_context)
}
{
move r8, zero /* asidcount */
- jal hv_flush_remote
+ jal _hv_flush_remote
}
bnez r0, 1f
@@ -147,9 +134,9 @@ STD_ENTRY(flush_and_install_context)
}
{
move r2, r_asid
- movei r3, HV_CTX_DIRECTIO
+ moveli r3, HV_CTX_DIRECTIO | CTX_PAGE_FLAG
}
- jal hv_install_context
+ jal _hv_install_context
bnez r0, 1f
/* Finally, flush the TLB. */
@@ -158,10 +145,7 @@ STD_ENTRY(flush_and_install_context)
jal hv_flush_all
}
-1: /* Reset interrupts back how they were before. */
- mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics
-
- /* Restore the callee-saved registers and return. */
+1: /* Restore the callee-saved registers and return. */
addli lr, sp, FRAME_SIZE
{
ld lr, lr
@@ -177,10 +161,6 @@ STD_ENTRY(flush_and_install_context)
}
{
ld r32, r_tmp
- addli r_tmp, sp, FRAME_R33
- }
- {
- ld r33, r_tmp
addi sp, sp, FRAME_SIZE
}
jrp lr
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index f96f4cec602..851a94e6ae5 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -58,18 +58,36 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
#else
int is_32bit = 0;
#endif
+ unsigned long random_factor = 0UL;
+
+ /*
+ * 8 bits of randomness in 32bit mmaps, 24 address space bits
+ * 12 bits of randomness in 64bit mmaps, 28 address space bits
+ */
+ if (current->flags & PF_RANDOMIZE) {
+ if (is_32bit)
+ random_factor = get_random_int() % (1<<8);
+ else
+ random_factor = get_random_int() % (1<<12);
+
+ random_factor <<= PAGE_SHIFT;
+ }
/*
* Use standard layout if the expected stack growth is unlimited
* or we are running native 64 bits.
*/
- if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
+ if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
+ mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base(mm);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ unsigned long range_end = mm->brk + 0x02000000;
+ return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index de7d8e21e01..5e86eac4bfa 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -27,7 +27,6 @@
#include <linux/vmalloc.h>
#include <linux/smp.h>
-#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
@@ -62,7 +61,7 @@ void show_mem(unsigned int filter)
global_page_state(NR_PAGETABLE),
global_page_state(NR_BOUNCE),
global_page_state(NR_FILE_PAGES),
- nr_swap_pages);
+ get_nr_swap_pages());
for_each_zone(zone) {
unsigned long flags, order, total = 0, largest_order = -1;
@@ -84,64 +83,6 @@ void show_mem(unsigned int filter)
}
}
-/*
- * Associate a virtual page frame with a given physical page frame
- * and protection flags for that frame.
- */
-static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = swapper_pg_dir + pgd_index(vaddr);
- if (pgd_none(*pgd)) {
- BUG();
- return;
- }
- pud = pud_offset(pgd, vaddr);
- if (pud_none(*pud)) {
- BUG();
- return;
- }
- pmd = pmd_offset(pud, vaddr);
- if (pmd_none(*pmd)) {
- BUG();
- return;
- }
- pte = pte_offset_kernel(pmd, vaddr);
- /* <pfn,flags> stored as-is, to permit clearing entries */
- set_pte(pte, pfn_pte(pfn, flags));
-
- /*
- * It's enough to flush this one mapping.
- * This appears conservative since it is only called
- * from __set_fixmap.
- */
- local_flush_tlb_page(NULL, vaddr, PAGE_SIZE);
-}
-
-void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
-{
- unsigned long address = __fix_to_virt(idx);
-
- if (idx >= __end_of_fixed_addresses) {
- BUG();
- return;
- }
- set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
-}
-
-#if defined(CONFIG_HIGHPTE)
-pte_t *_pte_offset_map(pmd_t *dir, unsigned long address)
-{
- pte_t *pte = kmap_atomic(pmd_page(*dir)) +
- (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK;
- return &pte[pte_index(address)];
-}
-#endif
-
/**
* shatter_huge_page() - ensure a given address is mapped by a small page.
*
@@ -178,23 +119,19 @@ void shatter_huge_page(unsigned long addr)
if (!pmd_huge_page(*pmd))
return;
- /*
- * Grab the pgd_lock, since we may need it to walk the pgd_list,
- * and since we need some kind of lock here to avoid races.
- */
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock_irqsave(&init_mm.page_table_lock, flags);
if (!pmd_huge_page(*pmd)) {
/* Lost the race to convert the huge page. */
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
return;
}
/* Shatter the huge page into the preallocated L2 page table. */
- pmd_populate_kernel(&init_mm, pmd,
- get_prealloc_pte(pte_pfn(*(pte_t *)pmd)));
+ pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd)));
#ifdef __PAGETABLE_PMD_FOLDED
/* Walk every pgd on the system and update the pmd there. */
+ spin_lock(&pgd_lock);
list_for_each(pos, &pgd_list) {
pmd_t *copy_pmd;
pgd = list_to_pgd(pos) + pgd_index(addr);
@@ -202,6 +139,7 @@ void shatter_huge_page(unsigned long addr)
copy_pmd = pmd_offset(pud, addr);
__set_pmd(copy_pmd, *pmd);
}
+ spin_unlock(&pgd_lock);
#endif
/* Tell every cpu to notice the change. */
@@ -209,7 +147,7 @@ void shatter_huge_page(unsigned long addr)
cpu_possible_mask, NULL, 0);
/* Hold the lock until the TLB flush is finished to avoid races. */
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
}
/*
@@ -218,9 +156,13 @@ void shatter_huge_page(unsigned long addr)
* against pageattr.c; it is the unique case in which a valid change
* of kernel pagetables can't be lazily synchronized by vmalloc faults.
* vmalloc faults work because attached pagetables are never freed.
- * The locking scheme was chosen on the basis of manfred's
- * recommendations and having no core impact whatsoever.
- * -- wli
+ *
+ * The lock is always taken with interrupts disabled, unlike on x86
+ * and other platforms, because we need to take the lock in
+ * shatter_huge_page(), which may be called from an interrupt context.
+ * We are not at risk from the tlbflush IPI deadlock that was seen on
+ * x86, since we use the flush_remote() API to have the hypervisor do
+ * the TLB flushes regardless of irq disabling.
*/
DEFINE_SPINLOCK(pgd_lock);
LIST_HEAD(pgd_list);
@@ -288,35 +230,32 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
#define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER)
-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address,
+ int order)
{
gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO;
struct page *p;
-#if L2_USER_PGTABLE_ORDER > 0
int i;
-#endif
-
-#ifdef CONFIG_HIGHPTE
- flags |= __GFP_HIGHMEM;
-#endif
p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
if (p == NULL)
return NULL;
-#if L2_USER_PGTABLE_ORDER > 0
+ if (!pgtable_page_ctor(p)) {
+ __free_pages(p, L2_USER_PGTABLE_ORDER);
+ return NULL;
+ }
+
/*
* Make every page have a page_count() of one, not just the first.
* We don't use __GFP_COMP since it doesn't look like it works
* correctly with tlb_remove_page().
*/
- for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+ for (i = 1; i < order; ++i) {
init_page_count(p+i);
inc_zone_page_state(p+i, NR_PAGETABLE);
}
-#endif
- pgtable_page_ctor(p);
return p;
}
@@ -325,28 +264,28 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
* process). We have to correct whatever pte_alloc_one() did before
* returning the pages to the allocator.
*/
-void pte_free(struct mm_struct *mm, struct page *p)
+void pgtable_free(struct mm_struct *mm, struct page *p, int order)
{
int i;
pgtable_page_dtor(p);
__free_page(p);
- for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+ for (i = 1; i < order; ++i) {
__free_page(p+i);
dec_zone_page_state(p+i, NR_PAGETABLE);
}
}
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
- unsigned long address)
+void __pgtable_free_tlb(struct mmu_gather *tlb, struct page *pte,
+ unsigned long address, int order)
{
int i;
pgtable_page_dtor(pte);
tlb_remove_page(tlb, pte);
- for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+ for (i = 1; i < order; ++i) {
tlb_remove_page(tlb, pte + i);
dec_zone_page_state(pte + i, NR_PAGETABLE);
}
@@ -389,6 +328,17 @@ void ptep_set_wrprotect(struct mm_struct *mm,
#endif
+/*
+ * Return a pointer to the PTE that corresponds to the given
+ * address in the given page table. A NULL page table just uses
+ * the standard kernel page table; the preferred API in this case
+ * is virt_to_kpte().
+ *
+ * The returned pointer can point to a huge page in other levels
+ * of the page table than the bottom, if the huge page is present
+ * in the page table. For bottom-level PTEs, the returned pointer
+ * can point to a PTE that is either present or not.
+ */
pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr)
{
pgd_t *pgd;
@@ -402,13 +352,23 @@ pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr)
pud = pud_offset(pgd, addr);
if (!pud_present(*pud))
return NULL;
+ if (pud_huge_page(*pud))
+ return (pte_t *)pud;
pmd = pmd_offset(pud, addr);
- if (pmd_huge_page(*pmd))
- return (pte_t *)pmd;
if (!pmd_present(*pmd))
return NULL;
+ if (pmd_huge_page(*pmd))
+ return (pte_t *)pmd;
return pte_offset_kernel(pmd, addr);
}
+EXPORT_SYMBOL(virt_to_pte);
+
+pte_t *virt_to_kpte(unsigned long kaddr)
+{
+ BUG_ON(kaddr < PAGE_OFFSET);
+ return virt_to_pte(NULL, kaddr);
+}
+EXPORT_SYMBOL(virt_to_kpte);
pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu)
{
@@ -470,10 +430,18 @@ void __set_pte(pte_t *ptep, pte_t pte)
void set_pte(pte_t *ptep, pte_t pte)
{
- struct page *page = pfn_to_page(pte_pfn(pte));
-
- /* Update the home of a PTE if necessary */
- pte = pte_set_home(pte, page_home(page));
+ if (pte_present(pte) &&
+ (!CHIP_HAS_MMIO() || hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) {
+ /* The PTE actually references physical memory. */
+ unsigned long pfn = pte_pfn(pte);
+ if (pfn_valid(pfn)) {
+ /* Update the home of the PTE from the struct page. */
+ pte = pte_set_home(pte, page_home(pfn_to_page(pfn)));
+ } else if (hv_pte_get_mode(pte) == 0) {
+ /* remap_pfn_range(), etc, must supply PTE mode. */
+ panic("set_pte(): out-of-range PFN and mode 0\n");
+ }
+ }
__set_pte(ptep, pte);
}
@@ -481,7 +449,7 @@ void set_pte(pte_t *ptep, pte_t pte)
/* Can this mm load a PTE with cached_priority set? */
static inline int mm_is_priority_cached(struct mm_struct *mm)
{
- return mm->context.priority_cached;
+ return mm->context.priority_cached != 0;
}
/*
@@ -491,8 +459,8 @@ static inline int mm_is_priority_cached(struct mm_struct *mm)
void start_mm_caching(struct mm_struct *mm)
{
if (!mm_is_priority_cached(mm)) {
- mm->context.priority_cached = -1U;
- hv_set_caching(-1U);
+ mm->context.priority_cached = -1UL;
+ hv_set_caching(-1UL);
}
}
@@ -507,7 +475,7 @@ void start_mm_caching(struct mm_struct *mm)
* Presumably we'll come back later and have more luck and clear
* the value then; for now we'll just keep the cache marked for priority.
*/
-static unsigned int update_priority_cached(struct mm_struct *mm)
+static unsigned long update_priority_cached(struct mm_struct *mm)
{
if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) {
struct vm_area_struct *vm;
@@ -575,20 +543,13 @@ void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
addr = area->addr;
if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
phys_addr, pgprot)) {
- remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
+ free_vm_area(area);
return NULL;
}
return (__force void __iomem *) (offset + (char *)addr);
}
EXPORT_SYMBOL(ioremap_prot);
-/* Map a PCI MMIO bus address into VA space. */
-void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
-{
- panic("ioremap for PCI MMIO is not supported");
-}
-EXPORT_SYMBOL(ioremap);
-
/* Unmap an MMIO VA mapping. */
void iounmap(volatile void __iomem *addr_in)
{
@@ -606,12 +567,7 @@ void iounmap(volatile void __iomem *addr_in)
in parallel. Reuse of the virtual address is prevented by
leaving it in the global lists until we're done with it.
cpa takes care of the direct mappings. */
- read_lock(&vmlist_lock);
- for (p = vmlist; p; p = p->next) {
- if (p->addr == addr)
- break;
- }
- read_unlock(&vmlist_lock);
+ p = find_vm_area((void *)addr);
if (!p) {
pr_err("iounmap: bad address %p\n", addr);