diff options
Diffstat (limited to 'arch/tile/include')
146 files changed, 10564 insertions, 2536 deletions
diff --git a/arch/tile/include/arch/Kbuild b/arch/tile/include/arch/Kbuild index 9c0ea24cc94..3751c9fabcf 100644 --- a/arch/tile/include/arch/Kbuild +++ b/arch/tile/include/arch/Kbuild @@ -1,17 +1 @@ -header-y += abi.h -header-y += chip.h -header-y += chip_tile64.h -header-y += chip_tilegx.h -header-y += chip_tilepro.h -header-y += icache.h -header-y += interrupts.h -header-y += interrupts_32.h -header-y += interrupts_64.h -header-y += opcode.h -header-y += opcode_tilegx.h -header-y += opcode_tilepro.h -header-y += sim.h -header-y += sim_def.h -header-y += spr_def.h -header-y += spr_def_32.h -header-y += spr_def_64.h +# Tile arch headers diff --git a/arch/tile/include/arch/chip_tile64.h b/arch/tile/include/arch/chip_tile64.h deleted file mode 100644 index 261aaba092d..00000000000 --- a/arch/tile/include/arch/chip_tile64.h +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -/* - * @file - * Global header file. - * This header file specifies defines for TILE64. - */ - -#ifndef __ARCH_CHIP_H__ -#define __ARCH_CHIP_H__ - -/** Specify chip version. - * When possible, prefer the CHIP_xxx symbols below for future-proofing. - * This is intended for cross-compiling; native compilation should - * use the predefined __tile_chip__ symbol. - */ -#define TILE_CHIP 0 - -/** Specify chip revision. - * This provides for the case of a respin of a particular chip type; - * the normal value for this symbol is "0". - * This is intended for cross-compiling; native compilation should - * use the predefined __tile_chip_rev__ symbol. - */ -#define TILE_CHIP_REV 0 - -/** The name of this architecture. */ -#define CHIP_ARCH_NAME "tile64" - -/** The ELF e_machine type for binaries for this chip. */ -#define CHIP_ELF_TYPE() EM_TILE64 - -/** The alternate ELF e_machine type for binaries for this chip. */ -#define CHIP_COMPAT_ELF_TYPE() 0x2506 - -/** What is the native word size of the machine? */ -#define CHIP_WORD_SIZE() 32 - -/** How many bits of a virtual address are used. Extra bits must be - * the sign extension of the low bits. - */ -#define CHIP_VA_WIDTH() 32 - -/** How many bits are in a physical address? */ -#define CHIP_PA_WIDTH() 36 - -/** Size of the L2 cache, in bytes. */ -#define CHIP_L2_CACHE_SIZE() 65536 - -/** Log size of an L2 cache line in bytes. */ -#define CHIP_L2_LOG_LINE_SIZE() 6 - -/** Size of an L2 cache line, in bytes. */ -#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE()) - -/** Associativity of the L2 cache. */ -#define CHIP_L2_ASSOC() 2 - -/** Size of the L1 data cache, in bytes. */ -#define CHIP_L1D_CACHE_SIZE() 8192 - -/** Log size of an L1 data cache line in bytes. */ -#define CHIP_L1D_LOG_LINE_SIZE() 4 - -/** Size of an L1 data cache line, in bytes. */ -#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE()) - -/** Associativity of the L1 data cache. */ -#define CHIP_L1D_ASSOC() 2 - -/** Size of the L1 instruction cache, in bytes. */ -#define CHIP_L1I_CACHE_SIZE() 8192 - -/** Log size of an L1 instruction cache line in bytes. */ -#define CHIP_L1I_LOG_LINE_SIZE() 6 - -/** Size of an L1 instruction cache line, in bytes. */ -#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE()) - -/** Associativity of the L1 instruction cache. */ -#define CHIP_L1I_ASSOC() 1 - -/** Stride with which flush instructions must be issued. */ -#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE() - -/** Stride with which inv instructions must be issued. */ -#define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE() - -/** Stride with which finv instructions must be issued. */ -#define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE() - -/** Can the local cache coherently cache data that is homed elsewhere? */ -#define CHIP_HAS_COHERENT_LOCAL_CACHE() 0 - -/** How many simultaneous outstanding victims can the L2 cache have? */ -#define CHIP_MAX_OUTSTANDING_VICTIMS() 2 - -/** Does the TLB support the NC and NOALLOC bits? */ -#define CHIP_HAS_NC_AND_NOALLOC_BITS() 0 - -/** Does the chip support hash-for-home caching? */ -#define CHIP_HAS_CBOX_HOME_MAP() 0 - -/** Number of entries in the chip's home map tables. */ -/* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */ - -/** Do uncacheable requests miss in the cache regardless of whether - * there is matching data? */ -#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0 - -/** Does the mf instruction wait for victims? */ -#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1 - -/** Does the chip have an "inv" instruction that doesn't also flush? */ -#define CHIP_HAS_INV() 0 - -/** Does the chip have a "wh64" instruction? */ -#define CHIP_HAS_WH64() 0 - -/** Does this chip have a 'dword_align' instruction? */ -#define CHIP_HAS_DWORD_ALIGN() 0 - -/** Number of performance counters. */ -#define CHIP_PERFORMANCE_COUNTERS() 2 - -/** Does this chip have auxiliary performance counters? */ -#define CHIP_HAS_AUX_PERF_COUNTERS() 0 - -/** Is the CBOX_MSR1 SPR supported? */ -#define CHIP_HAS_CBOX_MSR1() 0 - -/** Is the TILE_RTF_HWM SPR supported? */ -#define CHIP_HAS_TILE_RTF_HWM() 0 - -/** Is the TILE_WRITE_PENDING SPR supported? */ -#define CHIP_HAS_TILE_WRITE_PENDING() 0 - -/** Is the PROC_STATUS SPR supported? */ -#define CHIP_HAS_PROC_STATUS_SPR() 0 - -/** Is the DSTREAM_PF SPR supported? */ -#define CHIP_HAS_DSTREAM_PF() 0 - -/** Log of the number of mshims we have. */ -#define CHIP_LOG_NUM_MSHIMS() 2 - -/** Are the bases of the interrupt vector areas fixed? */ -#define CHIP_HAS_FIXED_INTVEC_BASE() 1 - -/** Are the interrupt masks split up into 2 SPRs? */ -#define CHIP_HAS_SPLIT_INTR_MASK() 1 - -/** Is the cycle count split up into 2 SPRs? */ -#define CHIP_HAS_SPLIT_CYCLE() 1 - -/** Does the chip have a static network? */ -#define CHIP_HAS_SN() 1 - -/** Does the chip have a static network processor? */ -#define CHIP_HAS_SN_PROC() 1 - -/** Size of the L1 static network processor instruction cache, in bytes. */ -#define CHIP_L1SNI_CACHE_SIZE() 2048 - -/** Does the chip have DMA support in each tile? */ -#define CHIP_HAS_TILE_DMA() 1 - -/** Does the chip have the second revision of the directly accessible - * dynamic networks? This encapsulates a number of characteristics, - * including the absence of the catch-all, the absence of inline message - * tags, the absence of support for network context-switching, and so on. - */ -#define CHIP_HAS_REV1_XDN() 0 - -/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */ -#define CHIP_HAS_CMPEXCH() 0 - -/** Does the chip have memory-mapped I/O support? */ -#define CHIP_HAS_MMIO() 0 - -/** Does the chip have post-completion interrupts? */ -#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0 - -/** Does the chip have native single step support? */ -#define CHIP_HAS_SINGLE_STEP() 0 - -#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */ - -/** How many entries are present in the instruction TLB? */ -#define CHIP_ITLB_ENTRIES() 8 - -/** How many entries are present in the data TLB? */ -#define CHIP_DTLB_ENTRIES() 16 - -/** How many MAF entries does the XAUI shim have? */ -#define CHIP_XAUI_MAF_ENTRIES() 16 - -/** Does the memory shim have a source-id table? */ -#define CHIP_HAS_MSHIM_SRCID_TABLE() 1 - -/** Does the L1 instruction cache clear on reset? */ -#define CHIP_HAS_L1I_CLEAR_ON_RESET() 0 - -/** Does the chip come out of reset with valid coordinates on all tiles? - * Note that if defined, this also implies that the upper left is 1,1. - */ -#define CHIP_HAS_VALID_TILE_COORD_RESET() 0 - -/** Does the chip have unified packet formats? */ -#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0 - -/** Does the chip support write reordering? */ -#define CHIP_HAS_WRITE_REORDERING() 0 - -/** Does the chip support Y-X routing as well as X-Y? */ -#define CHIP_HAS_Y_X_ROUTING() 0 - -/** Is INTCTRL_3 managed with the correct MPL? */ -#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0 - -/** Is it possible to configure the chip to be big-endian? */ -#define CHIP_HAS_BIG_ENDIAN_CONFIG() 0 - -/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */ -#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0 - -/** Is the DIAG_TRACE_WAY SPR supported? */ -#define CHIP_HAS_DIAG_TRACE_WAY() 0 - -/** Is the MEM_STRIPE_CONFIG SPR supported? */ -#define CHIP_HAS_MEM_STRIPE_CONFIG() 0 - -/** Are the TLB_PERF SPRs supported? */ -#define CHIP_HAS_TLB_PERF() 0 - -/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */ -#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0 - -/** Does the chip support rev1 DMA packets? */ -#define CHIP_HAS_REV1_DMA_PACKETS() 0 - -/** Does the chip have an IPI shim? */ -#define CHIP_HAS_IPI() 0 - -#endif /* !__OPEN_SOURCE__ */ -#endif /* __ARCH_CHIP_H__ */ diff --git a/arch/tile/include/arch/interrupts_32.h b/arch/tile/include/arch/interrupts_32.h deleted file mode 100644 index 96b5710505b..00000000000 --- a/arch/tile/include/arch/interrupts_32.h +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#ifndef __ARCH_INTERRUPTS_H__ -#define __ARCH_INTERRUPTS_H__ - -/** Mask for an interrupt. */ -/* Note: must handle breaking interrupts into high and low words manually. */ -#define INT_MASK_LO(intno) (1 << (intno)) -#define INT_MASK_HI(intno) (1 << ((intno) - 32)) - -#ifndef __ASSEMBLER__ -#define INT_MASK(intno) (1ULL << (intno)) -#endif - - -/** Where a given interrupt executes */ -#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8)) - -/** Where to store a vector for a given interrupt. */ -#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0) - -/** The base address of user-level interrupts. */ -#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0) - - -/** Additional synthetic interrupt. */ -#define INT_BREAKPOINT (63) - -#define INT_ITLB_MISS 0 -#define INT_MEM_ERROR 1 -#define INT_ILL 2 -#define INT_GPV 3 -#define INT_SN_ACCESS 4 -#define INT_IDN_ACCESS 5 -#define INT_UDN_ACCESS 6 -#define INT_IDN_REFILL 7 -#define INT_UDN_REFILL 8 -#define INT_IDN_COMPLETE 9 -#define INT_UDN_COMPLETE 10 -#define INT_SWINT_3 11 -#define INT_SWINT_2 12 -#define INT_SWINT_1 13 -#define INT_SWINT_0 14 -#define INT_UNALIGN_DATA 15 -#define INT_DTLB_MISS 16 -#define INT_DTLB_ACCESS 17 -#define INT_DMATLB_MISS 18 -#define INT_DMATLB_ACCESS 19 -#define INT_SNITLB_MISS 20 -#define INT_SN_NOTIFY 21 -#define INT_SN_FIREWALL 22 -#define INT_IDN_FIREWALL 23 -#define INT_UDN_FIREWALL 24 -#define INT_TILE_TIMER 25 -#define INT_IDN_TIMER 26 -#define INT_UDN_TIMER 27 -#define INT_DMA_NOTIFY 28 -#define INT_IDN_CA 29 -#define INT_UDN_CA 30 -#define INT_IDN_AVAIL 31 -#define INT_UDN_AVAIL 32 -#define INT_PERF_COUNT 33 -#define INT_INTCTRL_3 34 -#define INT_INTCTRL_2 35 -#define INT_INTCTRL_1 36 -#define INT_INTCTRL_0 37 -#define INT_BOOT_ACCESS 38 -#define INT_WORLD_ACCESS 39 -#define INT_I_ASID 40 -#define INT_D_ASID 41 -#define INT_DMA_ASID 42 -#define INT_SNI_ASID 43 -#define INT_DMA_CPL 44 -#define INT_SN_CPL 45 -#define INT_DOUBLE_FAULT 46 -#define INT_SN_STATIC_ACCESS 47 -#define INT_AUX_PERF_COUNT 48 - -#define NUM_INTERRUPTS 49 - -#ifndef __ASSEMBLER__ -#define QUEUED_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - 0) -#define NONQUEUED_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ - 0) -#define CRITICAL_MASKED_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - 0) -#define CRITICAL_UNMASKED_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ - 0) -#define MASKABLE_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - 0) -#define UNMASKABLE_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ - 0) -#define SYNC_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ - 0) -#define NON_SYNC_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - 0) -#endif /* !__ASSEMBLER__ */ -#endif /* !__ARCH_INTERRUPTS_H__ */ diff --git a/arch/tile/include/arch/interrupts_64.h b/arch/tile/include/arch/interrupts_64.h deleted file mode 100644 index 5bb58b2e4e6..00000000000 --- a/arch/tile/include/arch/interrupts_64.h +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright 2011 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#ifndef __ARCH_INTERRUPTS_H__ -#define __ARCH_INTERRUPTS_H__ - -/** Mask for an interrupt. */ -#ifdef __ASSEMBLER__ -/* Note: must handle breaking interrupts into high and low words manually. */ -#define INT_MASK(intno) (1 << (intno)) -#else -#define INT_MASK(intno) (1ULL << (intno)) -#endif - - -/** Where a given interrupt executes */ -#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8)) - -/** Where to store a vector for a given interrupt. */ -#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0) - -/** The base address of user-level interrupts. */ -#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0) - - -/** Additional synthetic interrupt. */ -#define INT_BREAKPOINT (63) - -#define INT_MEM_ERROR 0 -#define INT_SINGLE_STEP_3 1 -#define INT_SINGLE_STEP_2 2 -#define INT_SINGLE_STEP_1 3 -#define INT_SINGLE_STEP_0 4 -#define INT_IDN_COMPLETE 5 -#define INT_UDN_COMPLETE 6 -#define INT_ITLB_MISS 7 -#define INT_ILL 8 -#define INT_GPV 9 -#define INT_IDN_ACCESS 10 -#define INT_UDN_ACCESS 11 -#define INT_SWINT_3 12 -#define INT_SWINT_2 13 -#define INT_SWINT_1 14 -#define INT_SWINT_0 15 -#define INT_ILL_TRANS 16 -#define INT_UNALIGN_DATA 17 -#define INT_DTLB_MISS 18 -#define INT_DTLB_ACCESS 19 -#define INT_IDN_FIREWALL 20 -#define INT_UDN_FIREWALL 21 -#define INT_TILE_TIMER 22 -#define INT_AUX_TILE_TIMER 23 -#define INT_IDN_TIMER 24 -#define INT_UDN_TIMER 25 -#define INT_IDN_AVAIL 26 -#define INT_UDN_AVAIL 27 -#define INT_IPI_3 28 -#define INT_IPI_2 29 -#define INT_IPI_1 30 -#define INT_IPI_0 31 -#define INT_PERF_COUNT 32 -#define INT_AUX_PERF_COUNT 33 -#define INT_INTCTRL_3 34 -#define INT_INTCTRL_2 35 -#define INT_INTCTRL_1 36 -#define INT_INTCTRL_0 37 -#define INT_BOOT_ACCESS 38 -#define INT_WORLD_ACCESS 39 -#define INT_I_ASID 40 -#define INT_D_ASID 41 -#define INT_DOUBLE_FAULT 42 - -#define NUM_INTERRUPTS 43 - -#ifndef __ASSEMBLER__ -#define QUEUED_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_AUX_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_IPI_3) | \ - INT_MASK(INT_IPI_2) | \ - INT_MASK(INT_IPI_1) | \ - INT_MASK(INT_IPI_0) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - 0) -#define NONQUEUED_INTERRUPTS ( \ - INT_MASK(INT_SINGLE_STEP_3) | \ - INT_MASK(INT_SINGLE_STEP_2) | \ - INT_MASK(INT_SINGLE_STEP_1) | \ - INT_MASK(INT_SINGLE_STEP_0) | \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_ILL_TRANS) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - 0) -#define CRITICAL_MASKED_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_SINGLE_STEP_3) | \ - INT_MASK(INT_SINGLE_STEP_2) | \ - INT_MASK(INT_SINGLE_STEP_1) | \ - INT_MASK(INT_SINGLE_STEP_0) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_AUX_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_IPI_3) | \ - INT_MASK(INT_IPI_2) | \ - INT_MASK(INT_IPI_1) | \ - INT_MASK(INT_IPI_0) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - 0) -#define CRITICAL_UNMASKED_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_ILL_TRANS) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - 0) -#define MASKABLE_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_SINGLE_STEP_3) | \ - INT_MASK(INT_SINGLE_STEP_2) | \ - INT_MASK(INT_SINGLE_STEP_1) | \ - INT_MASK(INT_SINGLE_STEP_0) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_AUX_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_IPI_3) | \ - INT_MASK(INT_IPI_2) | \ - INT_MASK(INT_IPI_1) | \ - INT_MASK(INT_IPI_0) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - 0) -#define UNMASKABLE_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_ILL_TRANS) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - 0) -#define SYNC_INTERRUPTS ( \ - INT_MASK(INT_SINGLE_STEP_3) | \ - INT_MASK(INT_SINGLE_STEP_2) | \ - INT_MASK(INT_SINGLE_STEP_1) | \ - INT_MASK(INT_SINGLE_STEP_0) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_ILL_TRANS) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - 0) -#define NON_SYNC_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_AUX_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_IPI_3) | \ - INT_MASK(INT_IPI_2) | \ - INT_MASK(INT_IPI_1) | \ - INT_MASK(INT_IPI_0) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - 0) -#endif /* !__ASSEMBLER__ */ -#endif /* !__ARCH_INTERRUPTS_H__ */ diff --git a/arch/tile/include/arch/mpipe.h b/arch/tile/include/arch/mpipe.h new file mode 100644 index 00000000000..904538e754d --- /dev/null +++ b/arch/tile/include/arch/mpipe.h @@ -0,0 +1,371 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_MPIPE_H__ +#define __ARCH_MPIPE_H__ + +#include <arch/abi.h> +#include <arch/mpipe_def.h> + +#ifndef __ASSEMBLER__ + +/* + * MMIO Ingress DMA Release Region Address. + * This is a description of the physical addresses used to manipulate ingress + * credit counters. Accesses to this address space should use an address of + * this form and a value like that specified in IDMA_RELEASE_REGION_VAL. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Reserved. */ + uint_reg_t __reserved_0 : 3; + /* NotifRing to be released */ + uint_reg_t ring : 8; + /* Bucket to be released */ + uint_reg_t bucket : 13; + /* Enable NotifRing release */ + uint_reg_t ring_enable : 1; + /* Enable Bucket release */ + uint_reg_t bucket_enable : 1; + /* + * This field of the address selects the region (address space) to be + * accessed. For the iDMA release region, this field must be 4. + */ + uint_reg_t region : 3; + /* Reserved. */ + uint_reg_t __reserved_1 : 6; + /* This field of the address indexes the 32 entry service domain table. */ + uint_reg_t svc_dom : 5; + /* Reserved. */ + uint_reg_t __reserved_2 : 24; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_2 : 24; + uint_reg_t svc_dom : 5; + uint_reg_t __reserved_1 : 6; + uint_reg_t region : 3; + uint_reg_t bucket_enable : 1; + uint_reg_t ring_enable : 1; + uint_reg_t bucket : 13; + uint_reg_t ring : 8; + uint_reg_t __reserved_0 : 3; +#endif + }; + + uint_reg_t word; +} MPIPE_IDMA_RELEASE_REGION_ADDR_t; + +/* + * MMIO Ingress DMA Release Region Value - Release NotifRing and/or Bucket. + * Provides release of the associated NotifRing. The address of the MMIO + * operation is described in IDMA_RELEASE_REGION_ADDR. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * Number of packets being released. The load balancer's count of + * inflight packets will be decremented by this amount for the associated + * Bucket and/or NotifRing + */ + uint_reg_t count : 16; + /* Reserved. */ + uint_reg_t __reserved : 48; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 48; + uint_reg_t count : 16; +#endif + }; + + uint_reg_t word; +} MPIPE_IDMA_RELEASE_REGION_VAL_t; + +/* + * MMIO Buffer Stack Manager Region Address. + * This MMIO region is used for posting or fetching buffers to/from the + * buffer stack manager. On an MMIO load, this pops a buffer descriptor from + * the top of stack if one is available. On an MMIO store, this pushes a + * buffer to the stack. The value read or written is described in + * BSM_REGION_VAL. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Reserved. */ + uint_reg_t __reserved_0 : 3; + /* BufferStack being accessed. */ + uint_reg_t stack : 5; + /* Reserved. */ + uint_reg_t __reserved_1 : 18; + /* + * This field of the address selects the region (address space) to be + * accessed. For the buffer stack manager region, this field must be 6. + */ + uint_reg_t region : 3; + /* Reserved. */ + uint_reg_t __reserved_2 : 6; + /* This field of the address indexes the 32 entry service domain table. */ + uint_reg_t svc_dom : 5; + /* Reserved. */ + uint_reg_t __reserved_3 : 24; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_3 : 24; + uint_reg_t svc_dom : 5; + uint_reg_t __reserved_2 : 6; + uint_reg_t region : 3; + uint_reg_t __reserved_1 : 18; + uint_reg_t stack : 5; + uint_reg_t __reserved_0 : 3; +#endif + }; + + uint_reg_t word; +} MPIPE_BSM_REGION_ADDR_t; + +/* + * MMIO Buffer Stack Manager Region Value. + * This MMIO region is used for posting or fetching buffers to/from the + * buffer stack manager. On an MMIO load, this pops a buffer descriptor from + * the top of stack if one is available. On an MMIO store, this pushes a + * buffer to the stack. The address of the MMIO operation is described in + * BSM_REGION_ADDR. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Reserved. */ + uint_reg_t __reserved_0 : 7; + /* + * Base virtual address of the buffer. Must be sign extended by consumer. + */ + int_reg_t va : 35; + /* Reserved. */ + uint_reg_t __reserved_1 : 6; + /* + * Index of the buffer stack to which this buffer belongs. Ignored on + * writes since the offset bits specify the stack being accessed. + */ + uint_reg_t stack_idx : 5; + /* Reserved. */ + uint_reg_t __reserved_2 : 3; + /* + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. This field is ignored on writes. + */ + uint_reg_t inst : 2; + /* + * Reads as one to indicate that this is a hardware managed buffer. + * Ignored on writes since all buffers on a given stack are the same size. + */ + uint_reg_t hwb : 1; + /* + * Encoded size of buffer (ignored on writes): + * 0 = 128 bytes + * 1 = 256 bytes + * 2 = 512 bytes + * 3 = 1024 bytes + * 4 = 1664 bytes + * 5 = 4096 bytes + * 6 = 10368 bytes + * 7 = 16384 bytes + */ + uint_reg_t size : 3; + /* + * Valid indication for the buffer. Ignored on writes. + * 0 : Valid buffer descriptor popped from stack. + * 3 : Could not pop a buffer from the stack. Either the stack is empty, + * or the hardware's prefetch buffer is empty for this stack. + */ + uint_reg_t c : 2; +#else /* __BIG_ENDIAN__ */ + uint_reg_t c : 2; + uint_reg_t size : 3; + uint_reg_t hwb : 1; + uint_reg_t inst : 2; + uint_reg_t __reserved_2 : 3; + uint_reg_t stack_idx : 5; + uint_reg_t __reserved_1 : 6; + int_reg_t va : 35; + uint_reg_t __reserved_0 : 7; +#endif + }; + + uint_reg_t word; +} MPIPE_BSM_REGION_VAL_t; + +/* + * MMIO Egress DMA Post Region Address. + * Used to post descriptor locations to the eDMA descriptor engine. The + * value to be written is described in EDMA_POST_REGION_VAL + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Reserved. */ + uint_reg_t __reserved_0 : 3; + /* eDMA ring being accessed */ + uint_reg_t ring : 6; + /* Reserved. */ + uint_reg_t __reserved_1 : 17; + /* + * This field of the address selects the region (address space) to be + * accessed. For the egress DMA post region, this field must be 5. + */ + uint_reg_t region : 3; + /* Reserved. */ + uint_reg_t __reserved_2 : 6; + /* This field of the address indexes the 32 entry service domain table. */ + uint_reg_t svc_dom : 5; + /* Reserved. */ + uint_reg_t __reserved_3 : 24; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_3 : 24; + uint_reg_t svc_dom : 5; + uint_reg_t __reserved_2 : 6; + uint_reg_t region : 3; + uint_reg_t __reserved_1 : 17; + uint_reg_t ring : 6; + uint_reg_t __reserved_0 : 3; +#endif + }; + + uint_reg_t word; +} MPIPE_EDMA_POST_REGION_ADDR_t; + +/* + * MMIO Egress DMA Post Region Value. + * Used to post descriptor locations to the eDMA descriptor engine. The + * address is described in EDMA_POST_REGION_ADDR. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * For writes, this specifies the current ring tail pointer prior to any + * post. For example, to post 1 or more descriptors starting at location + * 23, this would contain 23 (not 24). On writes, this index must be + * masked based on the ring size. The new tail pointer after this post + * is COUNT+RING_IDX (masked by the ring size). + * + * For reads, this provides the hardware descriptor fetcher's head + * pointer. The descriptors prior to the head pointer, however, may not + * yet have been processed so this indicator is only used to determine + * how full the ring is and if software may post more descriptors. + */ + uint_reg_t ring_idx : 16; + /* + * For writes, this specifies number of contiguous descriptors that are + * being posted. Software may post up to RingSize descriptors with a + * single MMIO store. A zero in this field on a write will "wake up" an + * eDMA ring and cause it fetch descriptors regardless of the hardware's + * current view of the state of the tail pointer. + * + * For reads, this field provides a rolling count of the number of + * descriptors that have been completely processed. This may be used by + * software to determine when buffers associated with a descriptor may be + * returned or reused. When the ring's flush bit is cleared by software + * (after having been set by HW or SW), the COUNT will be cleared. + */ + uint_reg_t count : 16; + /* + * For writes, this specifies the generation number of the tail being + * posted. Note that if tail+cnt wraps to the beginning of the ring, the + * eDMA hardware assumes that the descriptors posted at the beginning of + * the ring are also valid so it is okay to post around the wrap point. + * + * For reads, this is the current generation number. Valid descriptors + * will have the inverse of this generation number. + */ + uint_reg_t gen : 1; + /* Reserved. */ + uint_reg_t __reserved : 31; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 31; + uint_reg_t gen : 1; + uint_reg_t count : 16; + uint_reg_t ring_idx : 16; +#endif + }; + + uint_reg_t word; +} MPIPE_EDMA_POST_REGION_VAL_t; + +/* + * Load Balancer Bucket Status Data. + * Read/Write data for load balancer Bucket-Status Table. 4160 entries + * indexed by LBL_INIT_CTL.IDX when LBL_INIT_CTL.STRUCT_SEL is BSTS_TBL + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* NotifRing currently assigned to this bucket. */ + uint_reg_t notifring : 8; + /* Current reference count. */ + uint_reg_t count : 16; + /* Group associated with this bucket. */ + uint_reg_t group : 5; + /* Mode select for this bucket. */ + uint_reg_t mode : 3; + /* Reserved. */ + uint_reg_t __reserved : 32; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 32; + uint_reg_t mode : 3; + uint_reg_t group : 5; + uint_reg_t count : 16; + uint_reg_t notifring : 8; +#endif + }; + + uint_reg_t word; +} MPIPE_LBL_INIT_DAT_BSTS_TBL_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_MPIPE_H__) */ diff --git a/arch/tile/include/arch/mpipe_constants.h b/arch/tile/include/arch/mpipe_constants.h new file mode 100644 index 00000000000..84022ac5fe8 --- /dev/null +++ b/arch/tile/include/arch/mpipe_constants.h @@ -0,0 +1,42 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + + +#ifndef __ARCH_MPIPE_CONSTANTS_H__ +#define __ARCH_MPIPE_CONSTANTS_H__ + +#define MPIPE_NUM_CLASSIFIERS 16 +#define MPIPE_CLS_MHZ 1200 + +#define MPIPE_NUM_EDMA_RINGS 64 + +#define MPIPE_NUM_SGMII_MACS 16 +#define MPIPE_NUM_XAUI_MACS 16 +#define MPIPE_NUM_LOOPBACK_CHANNELS 4 +#define MPIPE_NUM_NON_LB_CHANNELS 28 + +#define MPIPE_NUM_IPKT_BLOCKS 1536 + +#define MPIPE_NUM_BUCKETS 4160 + +#define MPIPE_NUM_NOTIF_RINGS 256 + +#define MPIPE_NUM_NOTIF_GROUPS 32 + +#define MPIPE_NUM_TLBS_PER_ASID 16 +#define MPIPE_TLB_IDX_WIDTH 4 + +#define MPIPE_MMIO_NUM_SVC_DOM 32 + +#endif /* __ARCH_MPIPE_CONSTANTS_H__ */ diff --git a/arch/tile/include/arch/mpipe_def.h b/arch/tile/include/arch/mpipe_def.h new file mode 100644 index 00000000000..c3d30217fc6 --- /dev/null +++ b/arch/tile/include/arch/mpipe_def.h @@ -0,0 +1,39 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_MPIPE_DEF_H__ +#define __ARCH_MPIPE_DEF_H__ +#define MPIPE_MMIO_ADDR__REGION_SHIFT 26 +#define MPIPE_MMIO_ADDR__REGION_VAL_CFG 0x0 +#define MPIPE_MMIO_ADDR__REGION_VAL_IDMA 0x4 +#define MPIPE_MMIO_ADDR__REGION_VAL_EDMA 0x5 +#define MPIPE_MMIO_ADDR__REGION_VAL_BSM 0x6 +#define MPIPE_BSM_REGION_VAL__VA_SHIFT 7 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_128 0x0 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_256 0x1 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_512 0x2 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1024 0x3 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1664 0x4 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_4096 0x5 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_10368 0x6 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_16384 0x7 +#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_DFA 0x0 +#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_FIXED 0x1 +#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_ALWAYS_PICK 0x2 +#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY 0x3 +#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY_RAND 0x7 +#define MPIPE_LBL_NR_STATE__FIRST_WORD 0x2138 +#endif /* !defined(__ARCH_MPIPE_DEF_H__) */ diff --git a/arch/tile/include/arch/mpipe_shm.h b/arch/tile/include/arch/mpipe_shm.h new file mode 100644 index 00000000000..13b3c4300e5 --- /dev/null +++ b/arch/tile/include/arch/mpipe_shm.h @@ -0,0 +1,521 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + + +#ifndef __ARCH_MPIPE_SHM_H__ +#define __ARCH_MPIPE_SHM_H__ + +#include <arch/abi.h> +#include <arch/mpipe_shm_def.h> + +#ifndef __ASSEMBLER__ +/** + * MPIPE eDMA Descriptor. + * The eDMA descriptor is written by software and consumed by hardware. It + * is used to specify the location of egress packet data to be sent out of + * the chip via one of the packet interfaces. + */ + +__extension__ +typedef union +{ + struct + { + /* Word 0 */ + +#ifndef __BIG_ENDIAN__ + /** + * Generation number. Used to indicate a valid descriptor in ring. When + * a new descriptor is written into the ring, software must toggle this + * bit. The net effect is that the GEN bit being written into new + * descriptors toggles each time the ring tail pointer wraps. + */ + uint_reg_t gen : 1; + /** + * For devices with EDMA reorder support, this field allows the + * descriptor to select the egress FIFO. The associated DMA ring must + * have ALLOW_EFIFO_SEL enabled. + */ + uint_reg_t efifo_sel : 6; + /** Reserved. Must be zero. */ + uint_reg_t r0 : 1; + /** Checksum generation enabled for this transfer. */ + uint_reg_t csum : 1; + /** + * Nothing to be sent. Used, for example, when software has dropped a + * packet but still wishes to return all of the associated buffers. + */ + uint_reg_t ns : 1; + /** + * Notification interrupt will be delivered when packet has been egressed. + */ + uint_reg_t notif : 1; + /** + * Boundary indicator. When 1, this transfer includes the EOP for this + * command. Must be clear on all but the last descriptor for an egress + * packet. + */ + uint_reg_t bound : 1; + /** Reserved. Must be zero. */ + uint_reg_t r1 : 4; + /** + * Number of bytes to be sent for this descriptor. When zero, no data + * will be moved and the buffer descriptor will be ignored. If the + * buffer descriptor indicates that it is chained, the low 7 bits of the + * VA indicate the offset within the first buffer (e.g. 127 bytes is the + * maximum offset into the first buffer). If the size exceeds a single + * buffer, subsequent buffer descriptors will be fetched prior to + * processing the next eDMA descriptor in the ring. + */ + uint_reg_t xfer_size : 14; + /** Reserved. Must be zero. */ + uint_reg_t r2 : 2; + /** + * Destination of checksum relative to CSUM_START relative to the first + * byte moved by this descriptor. Must be zero if CSUM=0 in this + * descriptor. Must be less than XFER_SIZE (e.g. the first byte of the + * CSUM_DEST must be within the span of this descriptor). + */ + uint_reg_t csum_dest : 8; + /** + * Start byte of checksum relative to the first byte moved by this + * descriptor. If this is not the first descriptor for the egress + * packet, CSUM_START is still relative to the first byte in this + * descriptor. Must be zero if CSUM=0 in this descriptor. + */ + uint_reg_t csum_start : 8; + /** + * Initial value for 16-bit 1's compliment checksum if enabled via CSUM. + * Specified in network order. That is, bits[7:0] will be added to the + * byte pointed to by CSUM_START and bits[15:8] will be added to the byte + * pointed to by CSUM_START+1 (with appropriate 1's compliment carries). + * Must be zero if CSUM=0 in this descriptor. + */ + uint_reg_t csum_seed : 16; +#else /* __BIG_ENDIAN__ */ + uint_reg_t csum_seed : 16; + uint_reg_t csum_start : 8; + uint_reg_t csum_dest : 8; + uint_reg_t r2 : 2; + uint_reg_t xfer_size : 14; + uint_reg_t r1 : 4; + uint_reg_t bound : 1; + uint_reg_t notif : 1; + uint_reg_t ns : 1; + uint_reg_t csum : 1; + uint_reg_t r0 : 1; + uint_reg_t efifo_sel : 6; + uint_reg_t gen : 1; +#endif + + /* Word 1 */ + +#ifndef __BIG_ENDIAN__ + /** Virtual address. Must be sign extended by consumer. */ + int_reg_t va : 42; + /** Reserved. */ + uint_reg_t __reserved_0 : 6; + /** Index of the buffer stack to which this buffer belongs. */ + uint_reg_t stack_idx : 5; + /** Reserved. */ + uint_reg_t __reserved_1 : 3; + /** + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. + */ + uint_reg_t inst : 2; + /** + * Always set to one by hardware in iDMA packet descriptors. For eDMA, + * indicates whether the buffer will be released to the buffer stack + * manager. When 0, software is responsible for releasing the buffer. + */ + uint_reg_t hwb : 1; + /** + * Encoded size of buffer. Set by the ingress hardware for iDMA packet + * descriptors. For eDMA descriptors, indicates the buffer size if .c + * indicates a chained packet. If an eDMA descriptor is not chained and + * the .hwb bit is not set, this field is ignored and the size is + * specified by the .xfer_size field. + * 0 = 128 bytes + * 1 = 256 bytes + * 2 = 512 bytes + * 3 = 1024 bytes + * 4 = 1664 bytes + * 5 = 4096 bytes + * 6 = 10368 bytes + * 7 = 16384 bytes + */ + uint_reg_t size : 3; + /** + * Chaining configuration for the buffer. Indicates that an ingress + * packet or egress command is chained across multiple buffers, with each + * buffer's size indicated by the .size field. + */ + uint_reg_t c : 2; +#else /* __BIG_ENDIAN__ */ + uint_reg_t c : 2; + uint_reg_t size : 3; + uint_reg_t hwb : 1; + uint_reg_t inst : 2; + uint_reg_t __reserved_1 : 3; + uint_reg_t stack_idx : 5; + uint_reg_t __reserved_0 : 6; + int_reg_t va : 42; +#endif + + }; + + /** Word access */ + uint_reg_t words[2]; +} MPIPE_EDMA_DESC_t; + +/** + * MPIPE Packet Descriptor. + * The packet descriptor is filled by the mPIPE's classification, + * load-balancing, and buffer management services. Some fields are consumed + * by mPIPE hardware, and others are consumed by Tile software. + */ + +__extension__ +typedef union +{ + struct + { + /* Word 0 */ + +#ifndef __BIG_ENDIAN__ + /** + * Notification ring into which this packet descriptor is written. + * Typically written by load balancer, but can be overridden by + * classification program if NR is asserted. + */ + uint_reg_t notif_ring : 8; + /** Source channel for this packet. Written by mPIPE DMA hardware. */ + uint_reg_t channel : 5; + /** Reserved. */ + uint_reg_t __reserved_0 : 1; + /** + * MAC Error. + * Generated by the MAC interface. Asserted if there was an overrun of + * the MAC's receive FIFO. This condition generally only occurs if the + * mPIPE clock is running too slowly. + */ + uint_reg_t me : 1; + /** + * Truncation Error. + * Written by the iDMA hardware. Asserted if packet was truncated due to + * insufficient space in iPkt buffer + */ + uint_reg_t tr : 1; + /** + * Written by the iDMA hardware. Indicates the number of bytes written + * to Tile memory. In general, this is the actual size of the packet as + * received from the MAC. But if the packet is truncated due to running + * out of buffers or due to the iPkt buffer filling up, then the L2_SIZE + * will be reduced to reflect the actual number of valid bytes written to + * Tile memory. + */ + uint_reg_t l2_size : 14; + /** + * CRC Error. + * Generated by the MAC. Asserted if MAC indicated an L2 CRC error or + * other L2 error (bad length etc.) on the packet. + */ + uint_reg_t ce : 1; + /** + * Cut Through. + * Written by the iDMA hardware. Asserted if packet was not completely + * received before being sent to classifier. L2_Size will indicate + * number of bytes received so far. + */ + uint_reg_t ct : 1; + /** + * Written by the classification program. Used by the load balancer to + * select the ring into which this packet descriptor is written. + */ + uint_reg_t bucket_id : 13; + /** Reserved. */ + uint_reg_t __reserved_1 : 3; + /** + * Checksum. + * Written by classification program. When 1, the checksum engine will + * perform checksum based on the CSUM_SEED, CSUM_START, and CSUM_BYTES + * fields. The result will be placed in CSUM_VAL. + */ + uint_reg_t cs : 1; + /** + * Notification Ring Select. + * Written by the classification program. When 1, the NotifRingIDX is + * set by classification program rather than being set by load balancer. + */ + uint_reg_t nr : 1; + /** + * Written by classification program. Indicates whether packet and + * descriptor should both be dropped, both be delivered, or only the + * descriptor should be delivered. + */ + uint_reg_t dest : 2; + /** + * General Purpose Sequence Number Enable. + * Written by the classification program. When 1, the GP_SQN_SEL field + * contains the sequence number selector and the GP_SQN field will be + * replaced with the associated sequence number. When clear, the GP_SQN + * field is left intact and be used as "Custom" bytes. + */ + uint_reg_t sq : 1; + /** + * TimeStamp Enable. + * Enable TimeStamp insertion. When clear, timestamp field may be filled + * with custom data by classifier. When set, hardware inserts the + * timestamp when the start of packet is received from the MAC. + */ + uint_reg_t ts : 1; + /** + * Packet Sequence Number Enable. + * Enable PacketSQN insertion. When clear, PacketSQN field may be filled + * with custom data by classifier. When set, hardware inserts the packet + * sequence number when the packet descriptor is written to a + * notification ring. + */ + uint_reg_t ps : 1; + /** + * Buffer Error. + * Written by the iDMA hardware. Asserted if iDMA ran out of buffers + * while writing the packet. Software must still return any buffer + * descriptors whose C field indicates a valid descriptor was consumed. + */ + uint_reg_t be : 1; + /** + * Written by the classification program. The associated counter is + * incremented when the packet is sent. + */ + uint_reg_t ctr0 : 5; + /** Reserved. */ + uint_reg_t __reserved_2 : 3; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_2 : 3; + uint_reg_t ctr0 : 5; + uint_reg_t be : 1; + uint_reg_t ps : 1; + uint_reg_t ts : 1; + uint_reg_t sq : 1; + uint_reg_t dest : 2; + uint_reg_t nr : 1; + uint_reg_t cs : 1; + uint_reg_t __reserved_1 : 3; + uint_reg_t bucket_id : 13; + uint_reg_t ct : 1; + uint_reg_t ce : 1; + uint_reg_t l2_size : 14; + uint_reg_t tr : 1; + uint_reg_t me : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t channel : 5; + uint_reg_t notif_ring : 8; +#endif + + /* Word 1 */ + +#ifndef __BIG_ENDIAN__ + /** + * Written by the classification program. The associated counter is + * incremented when the packet is sent. + */ + uint_reg_t ctr1 : 5; + /** Reserved. */ + uint_reg_t __reserved_3 : 3; + /** + * Written by classification program. Indicates the start byte for + * checksum. Relative to 1st byte received from MAC. + */ + uint_reg_t csum_start : 8; + /** + * Checksum seed written by classification program. Overwritten with + * resultant checksum if CS bit is asserted. The endianness of the CSUM + * value bits when viewed by Tile software match the packet byte order. + * That is, bits[7:0] of the resulting checksum value correspond to + * earlier (more significant) bytes in the packet. To avoid classifier + * software from having to byte swap the CSUM_SEED, the iDMA checksum + * engine byte swaps the classifier's result before seeding the checksum + * calculation. Thus, the CSUM_START byte of packet data is added to + * bits[15:8] of the CSUM_SEED field generated by the classifier. This + * byte swap will be visible to Tile software if the CS bit is clear. + */ + uint_reg_t csum_seed_val : 16; + /** + * Written by the classification program. Not interpreted by mPIPE + * hardware. + */ + uint_reg_t custom0 : 32; +#else /* __BIG_ENDIAN__ */ + uint_reg_t custom0 : 32; + uint_reg_t csum_seed_val : 16; + uint_reg_t csum_start : 8; + uint_reg_t __reserved_3 : 3; + uint_reg_t ctr1 : 5; +#endif + + /* Word 2 */ + +#ifndef __BIG_ENDIAN__ + /** + * Written by the classification program. Not interpreted by mPIPE + * hardware. + */ + uint_reg_t custom1 : 64; +#else /* __BIG_ENDIAN__ */ + uint_reg_t custom1 : 64; +#endif + + /* Word 3 */ + +#ifndef __BIG_ENDIAN__ + /** + * Written by the classification program. Not interpreted by mPIPE + * hardware. + */ + uint_reg_t custom2 : 64; +#else /* __BIG_ENDIAN__ */ + uint_reg_t custom2 : 64; +#endif + + /* Word 4 */ + +#ifndef __BIG_ENDIAN__ + /** + * Written by the classification program. Not interpreted by mPIPE + * hardware. + */ + uint_reg_t custom3 : 64; +#else /* __BIG_ENDIAN__ */ + uint_reg_t custom3 : 64; +#endif + + /* Word 5 */ + +#ifndef __BIG_ENDIAN__ + /** + * Sequence number applied when packet is distributed. Classifier + * selects which sequence number is to be applied by writing the 13-bit + * SQN-selector into this field. For devices that support EXT_SQN (as + * indicated in IDMA_INFO.EXT_SQN_SUPPORT), the GP_SQN can be extended to + * 32-bits via the IDMA_CTL.EXT_SQN register. In this case the + * PACKET_SQN will be reduced to 32 bits. + */ + uint_reg_t gp_sqn : 16; + /** + * Written by notification hardware. The packet sequence number is + * incremented for each packet that wasn't dropped. + */ + uint_reg_t packet_sqn : 48; +#else /* __BIG_ENDIAN__ */ + uint_reg_t packet_sqn : 48; + uint_reg_t gp_sqn : 16; +#endif + + /* Word 6 */ + +#ifndef __BIG_ENDIAN__ + /** + * Written by hardware when the start-of-packet is received by the mPIPE + * from the MAC. This is the nanoseconds part of the packet timestamp. + */ + uint_reg_t time_stamp_ns : 32; + /** + * Written by hardware when the start-of-packet is received by the mPIPE + * from the MAC. This is the seconds part of the packet timestamp. + */ + uint_reg_t time_stamp_sec : 32; +#else /* __BIG_ENDIAN__ */ + uint_reg_t time_stamp_sec : 32; + uint_reg_t time_stamp_ns : 32; +#endif + + /* Word 7 */ + +#ifndef __BIG_ENDIAN__ + /** Virtual address. Must be sign extended by consumer. */ + int_reg_t va : 42; + /** Reserved. */ + uint_reg_t __reserved_4 : 6; + /** Index of the buffer stack to which this buffer belongs. */ + uint_reg_t stack_idx : 5; + /** Reserved. */ + uint_reg_t __reserved_5 : 3; + /** + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. + */ + uint_reg_t inst : 2; + /** + * Always set to one by hardware in iDMA packet descriptors. For eDMA, + * indicates whether the buffer will be released to the buffer stack + * manager. When 0, software is responsible for releasing the buffer. + */ + uint_reg_t hwb : 1; + /** + * Encoded size of buffer. Set by the ingress hardware for iDMA packet + * descriptors. For eDMA descriptors, indicates the buffer size if .c + * indicates a chained packet. If an eDMA descriptor is not chained and + * the .hwb bit is not set, this field is ignored and the size is + * specified by the .xfer_size field. + * 0 = 128 bytes + * 1 = 256 bytes + * 2 = 512 bytes + * 3 = 1024 bytes + * 4 = 1664 bytes + * 5 = 4096 bytes + * 6 = 10368 bytes + * 7 = 16384 bytes + */ + uint_reg_t size : 3; + /** + * Chaining configuration for the buffer. Indicates that an ingress + * packet or egress command is chained across multiple buffers, with each + * buffer's size indicated by the .size field. + */ + uint_reg_t c : 2; +#else /* __BIG_ENDIAN__ */ + uint_reg_t c : 2; + uint_reg_t size : 3; + uint_reg_t hwb : 1; + uint_reg_t inst : 2; + uint_reg_t __reserved_5 : 3; + uint_reg_t stack_idx : 5; + uint_reg_t __reserved_4 : 6; + int_reg_t va : 42; +#endif + + }; + + /** Word access */ + uint_reg_t words[8]; +} MPIPE_PDESC_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_MPIPE_SHM_H__) */ diff --git a/arch/tile/include/arch/mpipe_shm_def.h b/arch/tile/include/arch/mpipe_shm_def.h new file mode 100644 index 00000000000..6124d39c831 --- /dev/null +++ b/arch/tile/include/arch/mpipe_shm_def.h @@ -0,0 +1,23 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_MPIPE_SHM_DEF_H__ +#define __ARCH_MPIPE_SHM_DEF_H__ +#define MPIPE_EDMA_DESC_WORD1__C_VAL_UNCHAINED 0x0 +#define MPIPE_EDMA_DESC_WORD1__C_VAL_CHAINED 0x1 +#define MPIPE_EDMA_DESC_WORD1__C_VAL_NOT_RDY 0x2 +#define MPIPE_EDMA_DESC_WORD1__C_VAL_INVALID 0x3 +#endif /* !defined(__ARCH_MPIPE_SHM_DEF_H__) */ diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h index f548efeb2de..2de83e7aff3 100644 --- a/arch/tile/include/arch/spr_def.h +++ b/arch/tile/include/arch/spr_def.h @@ -11,15 +11,11 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. */ +#ifndef __ARCH_SPR_DEF_H__ +#define __ARCH_SPR_DEF_H__ -/* Include the proper base SPR definition file. */ -#ifdef __tilegx__ -#include <arch/spr_def_64.h> -#else -#include <arch/spr_def_32.h> -#endif +#include <uapi/arch/spr_def.h> -#ifdef __KERNEL__ /* * In addition to including the proper base SPR definition file, depending @@ -60,8 +56,8 @@ _concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,) #define SPR_IPI_EVENT_RESET_K \ _concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,) -#define SPR_IPI_MASK_SET_K \ - _concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_EVENT_SET_K \ + _concat4(SPR_IPI_EVENT_SET_, CONFIG_KERNEL_PL,,) #define INT_IPI_K \ _concat4(INT_IPI_, CONFIG_KERNEL_PL,,) @@ -110,4 +106,4 @@ #define INT_INTCTRL_K \ _concat4(INT_INTCTRL_, CONFIG_KERNEL_PL,,) -#endif /* __KERNEL__ */ +#endif /* __ARCH_SPR_DEF_H__ */ diff --git a/arch/tile/include/arch/trio.h b/arch/tile/include/arch/trio.h new file mode 100644 index 00000000000..c0ddedcae08 --- /dev/null +++ b/arch/tile/include/arch/trio.h @@ -0,0 +1,111 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_H__ +#define __ARCH_TRIO_H__ + +#include <arch/abi.h> +#include <arch/trio_def.h> + +#ifndef __ASSEMBLER__ + +/* + * Map SQ Doorbell Format. + * This describes the format of the write-only doorbell register that exists + * in the last 8-bytes of the MAP_SQ_BASE/LIM range. This register is only + * writable from PCIe space. Writes to this register will not be written to + * Tile memory space and thus no IO VA translation is required if the last + * page of the BASE/LIM range is not otherwise written. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * When written with a 1, the associated MAP_SQ region's doorbell + * interrupt will be triggered once all previous writes are visible to + * Tile software. + */ + uint_reg_t doorbell : 1; + /* + * When written with a 1, the descriptor at the head of the associated + * MAP_SQ's FIFO will be dequeued. + */ + uint_reg_t pop : 1; + /* Reserved. */ + uint_reg_t __reserved : 62; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 62; + uint_reg_t pop : 1; + uint_reg_t doorbell : 1; +#endif + }; + + uint_reg_t word; +} TRIO_MAP_SQ_DOORBELL_FMT_t; + + +/* + * Tile PIO Region Configuration - CFG Address Format. + * This register describes the address format for PIO accesses when the + * associated region is setup with TYPE=CFG. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Register Address (full byte address). */ + uint_reg_t reg_addr : 12; + /* Function Number */ + uint_reg_t fn : 3; + /* Device Number */ + uint_reg_t dev : 5; + /* BUS Number */ + uint_reg_t bus : 8; + /* Config Type: 0 for access to directly-attached device. 1 otherwise. */ + uint_reg_t type : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* + * MAC select. This must match the configuration in + * TILE_PIO_REGION_SETUP.MAC. + */ + uint_reg_t mac : 2; + /* Reserved. */ + uint_reg_t __reserved_1 : 32; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_1 : 32; + uint_reg_t mac : 2; + uint_reg_t __reserved_0 : 1; + uint_reg_t type : 1; + uint_reg_t bus : 8; + uint_reg_t dev : 5; + uint_reg_t fn : 3; + uint_reg_t reg_addr : 12; +#endif + }; + + uint_reg_t word; +} TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_TRIO_H__) */ diff --git a/arch/tile/include/arch/trio_constants.h b/arch/tile/include/arch/trio_constants.h new file mode 100644 index 00000000000..85647e91a45 --- /dev/null +++ b/arch/tile/include/arch/trio_constants.h @@ -0,0 +1,36 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + + +#ifndef __ARCH_TRIO_CONSTANTS_H__ +#define __ARCH_TRIO_CONSTANTS_H__ + +#define TRIO_NUM_ASIDS 32 +#define TRIO_NUM_TLBS_PER_ASID 16 + +#define TRIO_NUM_TPIO_REGIONS 8 +#define TRIO_LOG2_NUM_TPIO_REGIONS 3 + +#define TRIO_NUM_MAP_MEM_REGIONS 32 +#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 5 +#define TRIO_NUM_MAP_SQ_REGIONS 8 +#define TRIO_LOG2_NUM_MAP_SQ_REGIONS 3 + +#define TRIO_LOG2_NUM_SQ_FIFO_ENTRIES 6 + +#define TRIO_NUM_PUSH_DMA_RINGS 64 + +#define TRIO_NUM_PULL_DMA_RINGS 64 + +#endif /* __ARCH_TRIO_CONSTANTS_H__ */ diff --git a/arch/tile/include/arch/trio_def.h b/arch/tile/include/arch/trio_def.h new file mode 100644 index 00000000000..e80500317dc --- /dev/null +++ b/arch/tile/include/arch/trio_def.h @@ -0,0 +1,41 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_DEF_H__ +#define __ARCH_TRIO_DEF_H__ +#define TRIO_CFG_REGION_ADDR__REG_SHIFT 0 +#define TRIO_CFG_REGION_ADDR__INTFC_SHIFT 16 +#define TRIO_CFG_REGION_ADDR__INTFC_VAL_TRIO 0x0 +#define TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE 0x1 +#define TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD 0x2 +#define TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_PROTECTED 0x3 +#define TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT 18 +#define TRIO_CFG_REGION_ADDR__PROT_SHIFT 20 +#define TRIO_PIO_REGIONS_ADDR__REGION_SHIFT 32 +#define TRIO_MAP_MEM_REG_INT0 0x1000000000 +#define TRIO_MAP_MEM_REG_INT1 0x1000000008 +#define TRIO_MAP_MEM_REG_INT2 0x1000000010 +#define TRIO_MAP_MEM_REG_INT3 0x1000000018 +#define TRIO_MAP_MEM_REG_INT4 0x1000000020 +#define TRIO_MAP_MEM_REG_INT5 0x1000000028 +#define TRIO_MAP_MEM_REG_INT6 0x1000000030 +#define TRIO_MAP_MEM_REG_INT7 0x1000000038 +#define TRIO_MAP_MEM_LIM__ADDR_SHIFT 12 +#define TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_UNORDERED 0x0 +#define TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_STRICT 0x1 +#define TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_REL_ORD 0x2 +#define TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT 30 +#endif /* !defined(__ARCH_TRIO_DEF_H__) */ diff --git a/arch/tile/include/arch/trio_pcie_intfc.h b/arch/tile/include/arch/trio_pcie_intfc.h new file mode 100644 index 00000000000..0487fdb9d58 --- /dev/null +++ b/arch/tile/include/arch/trio_pcie_intfc.h @@ -0,0 +1,229 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_PCIE_INTFC_H__ +#define __ARCH_TRIO_PCIE_INTFC_H__ + +#include <arch/abi.h> +#include <arch/trio_pcie_intfc_def.h> + +#ifndef __ASSEMBLER__ + +/* + * Port Configuration. + * Configuration of the PCIe Port + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Provides the state of the strapping pins for this port. */ + uint_reg_t strap_state : 3; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* + * When 1, the device type will be overridden using OVD_DEV_TYPE_VAL. + * When 0, the device type is determined based on the STRAP_STATE. + */ + uint_reg_t ovd_dev_type : 1; + /* Provides the device type when OVD_DEV_TYPE is 1. */ + uint_reg_t ovd_dev_type_val : 4; + /* Determines how link is trained. */ + uint_reg_t train_mode : 2; + /* Reserved. */ + uint_reg_t __reserved_1 : 1; + /* + * For PCIe, used to flip physical RX lanes that were not properly wired. + * This is not the same as lane reversal which is handled automatically + * during link training. When 0, RX Lane0 must be wired to the link + * partner (either to its Lane0 or it's LaneN). When RX_LANE_FLIP is 1, + * the highest numbered lane for this port becomes Lane0 and Lane0 does + * NOT have to be wired to the link partner. + */ + uint_reg_t rx_lane_flip : 1; + /* + * For PCIe, used to flip physical TX lanes that were not properly wired. + * This is not the same as lane reversal which is handled automatically + * during link training. When 0, TX Lane0 must be wired to the link + * partner (either to its Lane0 or it's LaneN). When TX_LANE_FLIP is 1, + * the highest numbered lane for this port becomes Lane0 and Lane0 does + * NOT have to be wired to the link partner. + */ + uint_reg_t tx_lane_flip : 1; + /* + * For StreamIO port, configures the width of the port when TRAIN_MODE is + * not STRAP. + */ + uint_reg_t stream_width : 2; + /* + * For StreamIO port, configures the rate of the port when TRAIN_MODE is + * not STRAP. + */ + uint_reg_t stream_rate : 2; + /* Reserved. */ + uint_reg_t __reserved_2 : 46; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_2 : 46; + uint_reg_t stream_rate : 2; + uint_reg_t stream_width : 2; + uint_reg_t tx_lane_flip : 1; + uint_reg_t rx_lane_flip : 1; + uint_reg_t __reserved_1 : 1; + uint_reg_t train_mode : 2; + uint_reg_t ovd_dev_type_val : 4; + uint_reg_t ovd_dev_type : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t strap_state : 3; +#endif + }; + + uint_reg_t word; +} TRIO_PCIE_INTFC_PORT_CONFIG_t; + +/* + * Port Status. + * Status of the PCIe Port. This register applies to the StreamIO port when + * StreamIO is enabled. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * Indicates the DL state of the port. When 1, the port is up and ready + * to receive traffic. + */ + uint_reg_t dl_up : 1; + /* + * Indicates the number of times the link has gone down. Clears on read. + */ + uint_reg_t dl_down_cnt : 7; + /* Indicates the SERDES PLL has spun up and is providing a valid clock. */ + uint_reg_t clock_ready : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 7; + /* Device revision ID. */ + uint_reg_t device_rev : 8; + /* Link state (PCIe). */ + uint_reg_t ltssm_state : 6; + /* Link power management state (PCIe). */ + uint_reg_t pm_state : 3; + /* Reserved. */ + uint_reg_t __reserved_1 : 31; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_1 : 31; + uint_reg_t pm_state : 3; + uint_reg_t ltssm_state : 6; + uint_reg_t device_rev : 8; + uint_reg_t __reserved_0 : 7; + uint_reg_t clock_ready : 1; + uint_reg_t dl_down_cnt : 7; + uint_reg_t dl_up : 1; +#endif + }; + + uint_reg_t word; +} TRIO_PCIE_INTFC_PORT_STATUS_t; + +/* + * Transmit FIFO Control. + * Contains TX FIFO thresholds. These registers are for diagnostics purposes + * only. Changing these values causes undefined behavior. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * Almost-Empty level for TX0 data. Typically set to at least + * roundup(38.0*M/N) where N=tclk frequency and M=MAC symbol rate in MHz + * for a x4 port (250MHz). + */ + uint_reg_t tx0_data_ae_lvl : 7; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* Almost-Empty level for TX1 data. */ + uint_reg_t tx1_data_ae_lvl : 7; + /* Reserved. */ + uint_reg_t __reserved_1 : 1; + /* Almost-Full level for TX0 data. */ + uint_reg_t tx0_data_af_lvl : 7; + /* Reserved. */ + uint_reg_t __reserved_2 : 1; + /* Almost-Full level for TX1 data. */ + uint_reg_t tx1_data_af_lvl : 7; + /* Reserved. */ + uint_reg_t __reserved_3 : 1; + /* Almost-Full level for TX0 info. */ + uint_reg_t tx0_info_af_lvl : 5; + /* Reserved. */ + uint_reg_t __reserved_4 : 3; + /* Almost-Full level for TX1 info. */ + uint_reg_t tx1_info_af_lvl : 5; + /* Reserved. */ + uint_reg_t __reserved_5 : 3; + /* + * This register provides performance adjustment for high bandwidth + * flows. The MAC will assert almost-full to TRIO if non-posted credits + * fall below this level. Note that setting this larger than the initial + * PORT_CREDIT.NPH value will cause READS to never be sent. If the + * initial credit value from the link partner is smaller than this value + * when the link comes up, the value will be reset to the initial credit + * value to prevent lockup. + */ + uint_reg_t min_np_credits : 8; + /* + * This register provides performance adjustment for high bandwidth + * flows. The MAC will assert almost-full to TRIO if posted credits fall + * below this level. Note that setting this larger than the initial + * PORT_CREDIT.PH value will cause WRITES to never be sent. If the + * initial credit value from the link partner is smaller than this value + * when the link comes up, the value will be reset to the initial credit + * value to prevent lockup. + */ + uint_reg_t min_p_credits : 8; +#else /* __BIG_ENDIAN__ */ + uint_reg_t min_p_credits : 8; + uint_reg_t min_np_credits : 8; + uint_reg_t __reserved_5 : 3; + uint_reg_t tx1_info_af_lvl : 5; + uint_reg_t __reserved_4 : 3; + uint_reg_t tx0_info_af_lvl : 5; + uint_reg_t __reserved_3 : 1; + uint_reg_t tx1_data_af_lvl : 7; + uint_reg_t __reserved_2 : 1; + uint_reg_t tx0_data_af_lvl : 7; + uint_reg_t __reserved_1 : 1; + uint_reg_t tx1_data_ae_lvl : 7; + uint_reg_t __reserved_0 : 1; + uint_reg_t tx0_data_ae_lvl : 7; +#endif + }; + + uint_reg_t word; +} TRIO_PCIE_INTFC_TX_FIFO_CTL_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_TRIO_PCIE_INTFC_H__) */ diff --git a/arch/tile/include/arch/trio_pcie_intfc_def.h b/arch/tile/include/arch/trio_pcie_intfc_def.h new file mode 100644 index 00000000000..d3fd6781fb2 --- /dev/null +++ b/arch/tile/include/arch/trio_pcie_intfc_def.h @@ -0,0 +1,32 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_PCIE_INTFC_DEF_H__ +#define __ARCH_TRIO_PCIE_INTFC_DEF_H__ +#define TRIO_PCIE_INTFC_MAC_INT_STS 0x0000 +#define TRIO_PCIE_INTFC_MAC_INT_STS__INT_LEVEL_MASK 0xf000 +#define TRIO_PCIE_INTFC_PORT_CONFIG 0x0018 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_DISABLED 0x0 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT 0x1 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC 0x2 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT_G1 0x3 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC_G1 0x4 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_XLINK 0x5 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_STREAM_X1 0x6 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_STREAM_X4 0x7 +#define TRIO_PCIE_INTFC_PORT_STATUS 0x0020 +#define TRIO_PCIE_INTFC_TX_FIFO_CTL 0x0050 +#endif /* !defined(__ARCH_TRIO_PCIE_INTFC_DEF_H__) */ diff --git a/arch/tile/include/arch/trio_pcie_rc.h b/arch/tile/include/arch/trio_pcie_rc.h new file mode 100644 index 00000000000..6a25d0aca85 --- /dev/null +++ b/arch/tile/include/arch/trio_pcie_rc.h @@ -0,0 +1,156 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_PCIE_RC_H__ +#define __ARCH_TRIO_PCIE_RC_H__ + +#include <arch/abi.h> +#include <arch/trio_pcie_rc_def.h> + +#ifndef __ASSEMBLER__ + +/* Device Capabilities Register. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * Max_Payload_Size Supported, writablethrough the MAC_STANDARD interface + */ + uint_reg_t mps_sup : 3; + /* + * This field is writable through the MAC_STANDARD interface. However, + * Phantom Function is not supported. Therefore, the application must + * not write any value other than 0x0 to this field. + */ + uint_reg_t phantom_function_supported : 2; + /* This bit is writable through the MAC_STANDARD interface. */ + uint_reg_t ext_tag_field_supported : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 3; + /* Endpoint L1 Acceptable Latency Must be 0x0 for non-Endpoint devices. */ + uint_reg_t l1_lat : 3; + /* + * Undefined since PCI Express 1.1 (Was Attention Button Present for PCI + * Express 1.0a) + */ + uint_reg_t r1 : 1; + /* + * Undefined since PCI Express 1.1 (Was Attention Indicator Present for + * PCI Express 1.0a) + */ + uint_reg_t r2 : 1; + /* + * Undefined since PCI Express 1.1 (Was Power Indicator Present for PCI + * Express 1.0a) + */ + uint_reg_t r3 : 1; + /* + * Role-Based Error Reporting, writable through the MAC_STANDARD + * interface. Required to be set for device compliant to 1.1 spec and + * later. + */ + uint_reg_t rer : 1; + /* Reserved. */ + uint_reg_t __reserved_1 : 2; + /* Captured Slot Power Limit Value Upstream port only. */ + uint_reg_t slot_pwr_lim : 8; + /* Captured Slot Power Limit Scale Upstream port only. */ + uint_reg_t slot_pwr_scale : 2; + /* Reserved. */ + uint_reg_t __reserved_2 : 4; + /* Endpoint L0s Acceptable LatencyMust be 0x0 for non-Endpoint devices. */ + uint_reg_t l0s_lat : 1; + /* Reserved. */ + uint_reg_t __reserved_3 : 31; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_3 : 31; + uint_reg_t l0s_lat : 1; + uint_reg_t __reserved_2 : 4; + uint_reg_t slot_pwr_scale : 2; + uint_reg_t slot_pwr_lim : 8; + uint_reg_t __reserved_1 : 2; + uint_reg_t rer : 1; + uint_reg_t r3 : 1; + uint_reg_t r2 : 1; + uint_reg_t r1 : 1; + uint_reg_t l1_lat : 3; + uint_reg_t __reserved_0 : 3; + uint_reg_t ext_tag_field_supported : 1; + uint_reg_t phantom_function_supported : 2; + uint_reg_t mps_sup : 3; +#endif + }; + + uint_reg_t word; +} TRIO_PCIE_RC_DEVICE_CAP_t; + +/* Device Control Register. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Correctable Error Reporting Enable */ + uint_reg_t cor_err_ena : 1; + /* Non-Fatal Error Reporting Enable */ + uint_reg_t nf_err_ena : 1; + /* Fatal Error Reporting Enable */ + uint_reg_t fatal_err_ena : 1; + /* Unsupported Request Reporting Enable */ + uint_reg_t ur_ena : 1; + /* Relaxed orderring enable */ + uint_reg_t ro_ena : 1; + /* Max Payload Size */ + uint_reg_t max_payload_size : 3; + /* Extended Tag Field Enable */ + uint_reg_t ext_tag : 1; + /* Phantom Function Enable */ + uint_reg_t ph_fn_ena : 1; + /* AUX Power PM Enable */ + uint_reg_t aux_pm_ena : 1; + /* Enable NoSnoop */ + uint_reg_t no_snoop : 1; + /* Max read request size */ + uint_reg_t max_read_req_sz : 3; + /* Reserved. */ + uint_reg_t __reserved : 49; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 49; + uint_reg_t max_read_req_sz : 3; + uint_reg_t no_snoop : 1; + uint_reg_t aux_pm_ena : 1; + uint_reg_t ph_fn_ena : 1; + uint_reg_t ext_tag : 1; + uint_reg_t max_payload_size : 3; + uint_reg_t ro_ena : 1; + uint_reg_t ur_ena : 1; + uint_reg_t fatal_err_ena : 1; + uint_reg_t nf_err_ena : 1; + uint_reg_t cor_err_ena : 1; +#endif + }; + + uint_reg_t word; +} TRIO_PCIE_RC_DEVICE_CONTROL_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_TRIO_PCIE_RC_H__) */ diff --git a/arch/tile/include/arch/trio_pcie_rc_def.h b/arch/tile/include/arch/trio_pcie_rc_def.h new file mode 100644 index 00000000000..74081a65b6f --- /dev/null +++ b/arch/tile/include/arch/trio_pcie_rc_def.h @@ -0,0 +1,24 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_PCIE_RC_DEF_H__ +#define __ARCH_TRIO_PCIE_RC_DEF_H__ +#define TRIO_PCIE_RC_DEVICE_CAP 0x0074 +#define TRIO_PCIE_RC_DEVICE_CONTROL 0x0078 +#define TRIO_PCIE_RC_DEVICE_ID_VEN_ID 0x0000 +#define TRIO_PCIE_RC_DEVICE_ID_VEN_ID__DEV_ID_SHIFT 16 +#define TRIO_PCIE_RC_REVISION_ID 0x0008 +#endif /* !defined(__ARCH_TRIO_PCIE_RC_DEF_H__) */ diff --git a/arch/tile/include/arch/trio_shm.h b/arch/tile/include/arch/trio_shm.h new file mode 100644 index 00000000000..3382e38245a --- /dev/null +++ b/arch/tile/include/arch/trio_shm.h @@ -0,0 +1,125 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + + +#ifndef __ARCH_TRIO_SHM_H__ +#define __ARCH_TRIO_SHM_H__ + +#include <arch/abi.h> +#include <arch/trio_shm_def.h> + +#ifndef __ASSEMBLER__ +/** + * TRIO DMA Descriptor. + * The TRIO DMA descriptor is written by software and consumed by hardware. + * It is used to specify the location of transaction data in the IO and Tile + * domains. + */ + +__extension__ +typedef union +{ + struct + { + /* Word 0 */ + +#ifndef __BIG_ENDIAN__ + /** Tile side virtual address. */ + int_reg_t va : 42; + /** + * Encoded size of buffer used on push DMA when C=1: + * 0 = 128 bytes + * 1 = 256 bytes + * 2 = 512 bytes + * 3 = 1024 bytes + * 4 = 1664 bytes + * 5 = 4096 bytes + * 6 = 10368 bytes + * 7 = 16384 bytes + */ + uint_reg_t bsz : 3; + /** + * Chaining designation. Always zero for pull DMA + * 0 : Unchained buffer pointer + * 1 : Chained buffer pointer. Next buffer descriptor (e.g. VA) stored + * in 1st 8-bytes in buffer. For chained buffers, first 8-bytes of each + * buffer contain the next buffer descriptor formatted exactly like a PDE + * buffer descriptor. This allows a chained PDE buffer to be sent using + * push DMA. + */ + uint_reg_t c : 1; + /** + * Notification interrupt will be delivered when the transaction has + * completed (all data has been read from or written to the Tile-side + * buffer). + */ + uint_reg_t notif : 1; + /** + * When 0, the XSIZE field specifies the total byte count for the + * transaction. When 1, the XSIZE field is encoded as 2^(N+14) for N in + * {0..6}: + * 0 = 16KB + * 1 = 32KB + * 2 = 64KB + * 3 = 128KB + * 4 = 256KB + * 5 = 512KB + * 6 = 1MB + * All other encodings of the XSIZE field are reserved when SMOD=1 + */ + uint_reg_t smod : 1; + /** + * Total number of bytes to move for this transaction. When SMOD=1, + * this field is encoded - see SMOD description. + */ + uint_reg_t xsize : 14; + /** Reserved. */ + uint_reg_t __reserved_0 : 1; + /** + * Generation number. Used to indicate a valid descriptor in ring. When + * a new descriptor is written into the ring, software must toggle this + * bit. The net effect is that the GEN bit being written into new + * descriptors toggles each time the ring tail pointer wraps. + */ + uint_reg_t gen : 1; +#else /* __BIG_ENDIAN__ */ + uint_reg_t gen : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t xsize : 14; + uint_reg_t smod : 1; + uint_reg_t notif : 1; + uint_reg_t c : 1; + uint_reg_t bsz : 3; + int_reg_t va : 42; +#endif + + /* Word 1 */ + +#ifndef __BIG_ENDIAN__ + /** IO-side address */ + uint_reg_t io_address : 64; +#else /* __BIG_ENDIAN__ */ + uint_reg_t io_address : 64; +#endif + + }; + + /** Word access */ + uint_reg_t words[2]; +} TRIO_DMA_DESC_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_TRIO_SHM_H__) */ diff --git a/arch/tile/include/arch/trio_shm_def.h b/arch/tile/include/arch/trio_shm_def.h new file mode 100644 index 00000000000..72a59c88b06 --- /dev/null +++ b/arch/tile/include/arch/trio_shm_def.h @@ -0,0 +1,19 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_SHM_DEF_H__ +#define __ARCH_TRIO_SHM_DEF_H__ +#endif /* !defined(__ARCH_TRIO_SHM_DEF_H__) */ diff --git a/arch/tile/include/arch/uart.h b/arch/tile/include/arch/uart.h new file mode 100644 index 00000000000..07966970ada --- /dev/null +++ b/arch/tile/include/arch/uart.h @@ -0,0 +1,300 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_UART_H__ +#define __ARCH_UART_H__ + +#include <arch/abi.h> +#include <arch/uart_def.h> + +#ifndef __ASSEMBLER__ + +/* Divisor. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * Baud Rate Divisor. Desired_baud_rate = REF_CLK frequency / (baud * + * 16). + * Note: REF_CLK is always 125 MHz, the default + * divisor = 68, baud rate = 125M/(68*16) = 115200 baud. + */ + uint_reg_t divisor : 12; + /* Reserved. */ + uint_reg_t __reserved : 52; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 52; + uint_reg_t divisor : 12; +#endif + }; + + uint_reg_t word; +} UART_DIVISOR_t; + +/* FIFO Count. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * n: n active entries in the receive FIFO (max is 2**8). Each entry has + * 8 bits. + * 0: no active entry in the receive FIFO (that is empty). + */ + uint_reg_t rfifo_count : 9; + /* Reserved. */ + uint_reg_t __reserved_0 : 7; + /* + * n: n active entries in the transmit FIFO (max is 2**8). Each entry has + * 8 bits. + * 0: no active entry in the transmit FIFO (that is empty). + */ + uint_reg_t tfifo_count : 9; + /* Reserved. */ + uint_reg_t __reserved_1 : 7; + /* + * n: n active entries in the write FIFO (max is 2**2). Each entry has 8 + * bits. + * 0: no active entry in the write FIFO (that is empty). + */ + uint_reg_t wfifo_count : 3; + /* Reserved. */ + uint_reg_t __reserved_2 : 29; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_2 : 29; + uint_reg_t wfifo_count : 3; + uint_reg_t __reserved_1 : 7; + uint_reg_t tfifo_count : 9; + uint_reg_t __reserved_0 : 7; + uint_reg_t rfifo_count : 9; +#endif + }; + + uint_reg_t word; +} UART_FIFO_COUNT_t; + +/* FLAG. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* 1: receive FIFO is empty */ + uint_reg_t rfifo_empty : 1; + /* 1: write FIFO is empty. */ + uint_reg_t wfifo_empty : 1; + /* 1: transmit FIFO is empty. */ + uint_reg_t tfifo_empty : 1; + /* 1: receive FIFO is full. */ + uint_reg_t rfifo_full : 1; + /* 1: write FIFO is full. */ + uint_reg_t wfifo_full : 1; + /* 1: transmit FIFO is full. */ + uint_reg_t tfifo_full : 1; + /* Reserved. */ + uint_reg_t __reserved_1 : 57; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_1 : 57; + uint_reg_t tfifo_full : 1; + uint_reg_t wfifo_full : 1; + uint_reg_t rfifo_full : 1; + uint_reg_t tfifo_empty : 1; + uint_reg_t wfifo_empty : 1; + uint_reg_t rfifo_empty : 1; + uint_reg_t __reserved_0 : 1; +#endif + }; + + uint_reg_t word; +} UART_FLAG_t; + +/* + * Interrupt Vector Mask. + * Each bit in this register corresponds to a specific interrupt. When set, + * the associated interrupt will not be dispatched. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Read data FIFO read and no data available */ + uint_reg_t rdat_err : 1; + /* Write FIFO was written but it was full */ + uint_reg_t wdat_err : 1; + /* Stop bit not found when current data was received */ + uint_reg_t frame_err : 1; + /* Parity error was detected when current data was received */ + uint_reg_t parity_err : 1; + /* Data was received but the receive FIFO was full */ + uint_reg_t rfifo_overflow : 1; + /* + * An almost full event is reached when data is to be written to the + * receive FIFO, and the receive FIFO has more than or equal to + * BUFFER_THRESHOLD.RFIFO_AFULL bytes. + */ + uint_reg_t rfifo_afull : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* An entry in the transmit FIFO was popped */ + uint_reg_t tfifo_re : 1; + /* An entry has been pushed into the receive FIFO */ + uint_reg_t rfifo_we : 1; + /* An entry of the write FIFO has been popped */ + uint_reg_t wfifo_re : 1; + /* Rshim read receive FIFO in protocol mode */ + uint_reg_t rfifo_err : 1; + /* + * An almost empty event is reached when data is to be read from the + * transmit FIFO, and the transmit FIFO has less than or equal to + * BUFFER_THRESHOLD.TFIFO_AEMPTY bytes. + */ + uint_reg_t tfifo_aempty : 1; + /* Reserved. */ + uint_reg_t __reserved_1 : 52; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_1 : 52; + uint_reg_t tfifo_aempty : 1; + uint_reg_t rfifo_err : 1; + uint_reg_t wfifo_re : 1; + uint_reg_t rfifo_we : 1; + uint_reg_t tfifo_re : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t rfifo_afull : 1; + uint_reg_t rfifo_overflow : 1; + uint_reg_t parity_err : 1; + uint_reg_t frame_err : 1; + uint_reg_t wdat_err : 1; + uint_reg_t rdat_err : 1; +#endif + }; + + uint_reg_t word; +} UART_INTERRUPT_MASK_t; + +/* + * Interrupt vector, write-one-to-clear. + * Each bit in this register corresponds to a specific interrupt. Hardware + * sets the bit when the associated condition has occurred. Writing a 1 + * clears the status bit. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Read data FIFO read and no data available */ + uint_reg_t rdat_err : 1; + /* Write FIFO was written but it was full */ + uint_reg_t wdat_err : 1; + /* Stop bit not found when current data was received */ + uint_reg_t frame_err : 1; + /* Parity error was detected when current data was received */ + uint_reg_t parity_err : 1; + /* Data was received but the receive FIFO was full */ + uint_reg_t rfifo_overflow : 1; + /* + * Data was received and the receive FIFO is now almost full (more than + * BUFFER_THRESHOLD.RFIFO_AFULL bytes in it) + */ + uint_reg_t rfifo_afull : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* An entry in the transmit FIFO was popped */ + uint_reg_t tfifo_re : 1; + /* An entry has been pushed into the receive FIFO */ + uint_reg_t rfifo_we : 1; + /* An entry of the write FIFO has been popped */ + uint_reg_t wfifo_re : 1; + /* Rshim read receive FIFO in protocol mode */ + uint_reg_t rfifo_err : 1; + /* + * Data was read from the transmit FIFO and now it is almost empty (less + * than or equal to BUFFER_THRESHOLD.TFIFO_AEMPTY bytes in it). + */ + uint_reg_t tfifo_aempty : 1; + /* Reserved. */ + uint_reg_t __reserved_1 : 52; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_1 : 52; + uint_reg_t tfifo_aempty : 1; + uint_reg_t rfifo_err : 1; + uint_reg_t wfifo_re : 1; + uint_reg_t rfifo_we : 1; + uint_reg_t tfifo_re : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t rfifo_afull : 1; + uint_reg_t rfifo_overflow : 1; + uint_reg_t parity_err : 1; + uint_reg_t frame_err : 1; + uint_reg_t wdat_err : 1; + uint_reg_t rdat_err : 1; +#endif + }; + + uint_reg_t word; +} UART_INTERRUPT_STATUS_t; + +/* Type. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Number of stop bits, rx and tx */ + uint_reg_t sbits : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* Data word size, rx and tx */ + uint_reg_t dbits : 1; + /* Reserved. */ + uint_reg_t __reserved_1 : 1; + /* Parity selection, rx and tx */ + uint_reg_t ptype : 3; + /* Reserved. */ + uint_reg_t __reserved_2 : 57; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_2 : 57; + uint_reg_t ptype : 3; + uint_reg_t __reserved_1 : 1; + uint_reg_t dbits : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t sbits : 1; +#endif + }; + + uint_reg_t word; +} UART_TYPE_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_UART_H__) */ diff --git a/arch/tile/include/arch/uart_def.h b/arch/tile/include/arch/uart_def.h new file mode 100644 index 00000000000..42bcaf53537 --- /dev/null +++ b/arch/tile/include/arch/uart_def.h @@ -0,0 +1,120 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_UART_DEF_H__ +#define __ARCH_UART_DEF_H__ +#define UART_DIVISOR 0x0158 +#define UART_FIFO_COUNT 0x0110 +#define UART_FLAG 0x0108 +#define UART_INTERRUPT_MASK 0x0208 +#define UART_INTERRUPT_MASK__RDAT_ERR_SHIFT 0 +#define UART_INTERRUPT_MASK__RDAT_ERR_WIDTH 1 +#define UART_INTERRUPT_MASK__RDAT_ERR_RESET_VAL 1 +#define UART_INTERRUPT_MASK__RDAT_ERR_RMASK 0x1 +#define UART_INTERRUPT_MASK__RDAT_ERR_MASK 0x1 +#define UART_INTERRUPT_MASK__RDAT_ERR_FIELD 0,0 +#define UART_INTERRUPT_MASK__WDAT_ERR_SHIFT 1 +#define UART_INTERRUPT_MASK__WDAT_ERR_WIDTH 1 +#define UART_INTERRUPT_MASK__WDAT_ERR_RESET_VAL 1 +#define UART_INTERRUPT_MASK__WDAT_ERR_RMASK 0x1 +#define UART_INTERRUPT_MASK__WDAT_ERR_MASK 0x2 +#define UART_INTERRUPT_MASK__WDAT_ERR_FIELD 1,1 +#define UART_INTERRUPT_MASK__FRAME_ERR_SHIFT 2 +#define UART_INTERRUPT_MASK__FRAME_ERR_WIDTH 1 +#define UART_INTERRUPT_MASK__FRAME_ERR_RESET_VAL 1 +#define UART_INTERRUPT_MASK__FRAME_ERR_RMASK 0x1 +#define UART_INTERRUPT_MASK__FRAME_ERR_MASK 0x4 +#define UART_INTERRUPT_MASK__FRAME_ERR_FIELD 2,2 +#define UART_INTERRUPT_MASK__PARITY_ERR_SHIFT 3 +#define UART_INTERRUPT_MASK__PARITY_ERR_WIDTH 1 +#define UART_INTERRUPT_MASK__PARITY_ERR_RESET_VAL 1 +#define UART_INTERRUPT_MASK__PARITY_ERR_RMASK 0x1 +#define UART_INTERRUPT_MASK__PARITY_ERR_MASK 0x8 +#define UART_INTERRUPT_MASK__PARITY_ERR_FIELD 3,3 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_SHIFT 4 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_WIDTH 1 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_RESET_VAL 1 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_RMASK 0x1 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_MASK 0x10 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_FIELD 4,4 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_SHIFT 5 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_WIDTH 1 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_RESET_VAL 1 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_RMASK 0x1 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_MASK 0x20 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_FIELD 5,5 +#define UART_INTERRUPT_MASK__TFIFO_RE_SHIFT 7 +#define UART_INTERRUPT_MASK__TFIFO_RE_WIDTH 1 +#define UART_INTERRUPT_MASK__TFIFO_RE_RESET_VAL 1 +#define UART_INTERRUPT_MASK__TFIFO_RE_RMASK 0x1 +#define UART_INTERRUPT_MASK__TFIFO_RE_MASK 0x80 +#define UART_INTERRUPT_MASK__TFIFO_RE_FIELD 7,7 +#define UART_INTERRUPT_MASK__RFIFO_WE_SHIFT 8 +#define UART_INTERRUPT_MASK__RFIFO_WE_WIDTH 1 +#define UART_INTERRUPT_MASK__RFIFO_WE_RESET_VAL 1 +#define UART_INTERRUPT_MASK__RFIFO_WE_RMASK 0x1 +#define UART_INTERRUPT_MASK__RFIFO_WE_MASK 0x100 +#define UART_INTERRUPT_MASK__RFIFO_WE_FIELD 8,8 +#define UART_INTERRUPT_MASK__WFIFO_RE_SHIFT 9 +#define UART_INTERRUPT_MASK__WFIFO_RE_WIDTH 1 +#define UART_INTERRUPT_MASK__WFIFO_RE_RESET_VAL 1 +#define UART_INTERRUPT_MASK__WFIFO_RE_RMASK 0x1 +#define UART_INTERRUPT_MASK__WFIFO_RE_MASK 0x200 +#define UART_INTERRUPT_MASK__WFIFO_RE_FIELD 9,9 +#define UART_INTERRUPT_MASK__RFIFO_ERR_SHIFT 10 +#define UART_INTERRUPT_MASK__RFIFO_ERR_WIDTH 1 +#define UART_INTERRUPT_MASK__RFIFO_ERR_RESET_VAL 1 +#define UART_INTERRUPT_MASK__RFIFO_ERR_RMASK 0x1 +#define UART_INTERRUPT_MASK__RFIFO_ERR_MASK 0x400 +#define UART_INTERRUPT_MASK__RFIFO_ERR_FIELD 10,10 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_SHIFT 11 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_WIDTH 1 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_RESET_VAL 1 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_RMASK 0x1 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_MASK 0x800 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_FIELD 11,11 +#define UART_INTERRUPT_STATUS 0x0200 +#define UART_RECEIVE_DATA 0x0148 +#define UART_TRANSMIT_DATA 0x0140 +#define UART_TYPE 0x0160 +#define UART_TYPE__SBITS_SHIFT 0 +#define UART_TYPE__SBITS_WIDTH 1 +#define UART_TYPE__SBITS_RESET_VAL 1 +#define UART_TYPE__SBITS_RMASK 0x1 +#define UART_TYPE__SBITS_MASK 0x1 +#define UART_TYPE__SBITS_FIELD 0,0 +#define UART_TYPE__SBITS_VAL_ONE_SBITS 0x0 +#define UART_TYPE__SBITS_VAL_TWO_SBITS 0x1 +#define UART_TYPE__DBITS_SHIFT 2 +#define UART_TYPE__DBITS_WIDTH 1 +#define UART_TYPE__DBITS_RESET_VAL 0 +#define UART_TYPE__DBITS_RMASK 0x1 +#define UART_TYPE__DBITS_MASK 0x4 +#define UART_TYPE__DBITS_FIELD 2,2 +#define UART_TYPE__DBITS_VAL_EIGHT_DBITS 0x0 +#define UART_TYPE__DBITS_VAL_SEVEN_DBITS 0x1 +#define UART_TYPE__PTYPE_SHIFT 4 +#define UART_TYPE__PTYPE_WIDTH 3 +#define UART_TYPE__PTYPE_RESET_VAL 3 +#define UART_TYPE__PTYPE_RMASK 0x7 +#define UART_TYPE__PTYPE_MASK 0x70 +#define UART_TYPE__PTYPE_FIELD 4,6 +#define UART_TYPE__PTYPE_VAL_NONE 0x0 +#define UART_TYPE__PTYPE_VAL_MARK 0x1 +#define UART_TYPE__PTYPE_VAL_SPACE 0x2 +#define UART_TYPE__PTYPE_VAL_EVEN 0x3 +#define UART_TYPE__PTYPE_VAL_ODD 0x4 +#endif /* !defined(__ARCH_UART_DEF_H__) */ diff --git a/arch/tile/include/arch/usb_host.h b/arch/tile/include/arch/usb_host.h new file mode 100644 index 00000000000..d09f3268396 --- /dev/null +++ b/arch/tile/include/arch/usb_host.h @@ -0,0 +1,26 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_USB_HOST_H__ +#define __ARCH_USB_HOST_H__ + +#include <arch/abi.h> +#include <arch/usb_host_def.h> + +#ifndef __ASSEMBLER__ +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_USB_HOST_H__) */ diff --git a/arch/tile/include/arch/usb_host_def.h b/arch/tile/include/arch/usb_host_def.h new file mode 100644 index 00000000000..aeed7753e8e --- /dev/null +++ b/arch/tile/include/arch/usb_host_def.h @@ -0,0 +1,19 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_USB_HOST_DEF_H__ +#define __ARCH_USB_HOST_DEF_H__ +#endif /* !defined(__ARCH_USB_HOST_DEF_H__) */ diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 0bb42642343..0aa5675e702 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -1,33 +1,32 @@ -include include/asm-generic/Kbuild.asm header-y += ../arch/ -header-y += ucontext.h -header-y += hardwall.h - generic-y += bug.h generic-y += bugs.h +generic-y += clkdev.h generic-y += cputime.h -generic-y += device.h generic-y += div64.h generic-y += emergency-restart.h generic-y += errno.h +generic-y += exec.h generic-y += fb.h generic-y += fcntl.h +generic-y += hash.h +generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h -generic-y += ipc.h generic-y += ipcbuf.h generic-y += irq_regs.h -generic-y += kdebug.h generic-y += local.h -generic-y += module.h +generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += msgbuf.h generic-y += mutex.h generic-y += param.h generic-y += parport.h generic-y += poll.h generic-y += posix_types.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += sembuf.h @@ -39,6 +38,6 @@ generic-y += sockios.h generic-y += statfs.h generic-y += termbits.h generic-y += termios.h +generic-y += trace_clock.h generic-y += types.h -generic-y += ucontext.h generic-y += xor.h diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index 921dbeb8a70..70979846076 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -17,10 +17,12 @@ #ifndef _ASM_TILE_ATOMIC_H #define _ASM_TILE_ATOMIC_H +#include <asm/cmpxchg.h> + #ifndef __ASSEMBLY__ #include <linux/compiler.h> -#include <asm/system.h> +#include <linux/types.h> #define ATOMIC_INIT(i) { (i) } @@ -112,6 +114,32 @@ static inline int atomic_read(const atomic_t *v) #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) /** + * atomic_xchg - atomically exchange contents of memory with a new value + * @v: pointer of type atomic_t + * @i: integer value to store in memory + * + * Atomically sets @v to @i and returns old @v + */ +static inline int atomic_xchg(atomic_t *v, int n) +{ + return xchg(&v->counter, n); +} + +/** + * atomic_cmpxchg - atomically exchange contents of memory if it matches + * @v: pointer of type atomic_t + * @o: old value that memory should have + * @n: new value to write to memory if it matches + * + * Atomically checks if @v holds @o and replaces it with @n if so. + * Returns the old value at @v. + */ +static inline int atomic_cmpxchg(atomic_t *v, int o, int n) +{ + return cmpxchg(&v->counter, o, n); +} + +/** * atomic_add_negative - add and test if negative * @v: pointer of type atomic_t * @i: integer value to add @@ -121,54 +149,6 @@ static inline int atomic_read(const atomic_t *v) */ #define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) -/* Nonexistent functions intended to cause link errors. */ -extern unsigned long __xchg_called_with_bad_pointer(void); -extern unsigned long __cmpxchg_called_with_bad_pointer(void); - -#define xchg(ptr, x) \ - ({ \ - typeof(*(ptr)) __x; \ - switch (sizeof(*(ptr))) { \ - case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((x)-(x)))(x)); \ - break; \ - case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((x)-(x)))(x)); \ - break; \ - default: \ - __xchg_called_with_bad_pointer(); \ - } \ - __x; \ - }) - -#define cmpxchg(ptr, o, n) \ - ({ \ - typeof(*(ptr)) __x; \ - switch (sizeof(*(ptr))) { \ - case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((o)-(o)))(o), \ - (u32)(typeof((n)-(n)))(n)); \ - break; \ - case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((o)-(o)))(o), \ - (u64)(typeof((n)-(n)))(n)); \ - break; \ - default: \ - __cmpxchg_called_with_bad_pointer(); \ - } \ - __x; \ - }) - -#define tas(ptr) (xchg((ptr), 1)) - #endif /* __ASSEMBLY__ */ #ifndef __tilegx__ @@ -177,4 +157,52 @@ extern unsigned long __cmpxchg_called_with_bad_pointer(void); #include <asm/atomic_64.h> #endif +#ifndef __ASSEMBLY__ + +/** + * atomic64_xchg - atomically exchange contents of memory with a new value + * @v: pointer of type atomic64_t + * @i: integer value to store in memory + * + * Atomically sets @v to @i and returns old @v + */ +static inline long long atomic64_xchg(atomic64_t *v, long long n) +{ + return xchg64(&v->counter, n); +} + +/** + * atomic64_cmpxchg - atomically exchange contents of memory if it matches + * @v: pointer of type atomic64_t + * @o: old value that memory should have + * @n: new value to write to memory if it matches + * + * Atomically checks if @v holds @o and replaces it with @n if so. + * Returns the old value at @v. + */ +static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, + long long n) +{ + return cmpxchg64(&v->counter, o, n); +} + +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long c, old, dec; + + c = atomic64_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic64_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_TILE_ATOMIC_H */ diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index c03349e0ca9..1b109fad9ff 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -17,44 +17,11 @@ #ifndef _ASM_TILE_ATOMIC_32_H #define _ASM_TILE_ATOMIC_32_H +#include <asm/barrier.h> #include <arch/chip.h> #ifndef __ASSEMBLY__ -/* Tile-specific routines to support <linux/atomic.h>. */ -int _atomic_xchg(atomic_t *v, int n); -int _atomic_xchg_add(atomic_t *v, int i); -int _atomic_xchg_add_unless(atomic_t *v, int a, int u); -int _atomic_cmpxchg(atomic_t *v, int o, int n); - -/** - * atomic_xchg - atomically exchange contents of memory with a new value - * @v: pointer of type atomic_t - * @i: integer value to store in memory - * - * Atomically sets @v to @i and returns old @v - */ -static inline int atomic_xchg(atomic_t *v, int n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic_xchg(v, n); -} - -/** - * atomic_cmpxchg - atomically exchange contents of memory if it matches - * @v: pointer of type atomic_t - * @o: old value that memory should have - * @n: new value to write to memory if it matches - * - * Atomically checks if @v holds @o and replaces it with @n if so. - * Returns the old value at @v. - */ -static inline int atomic_cmpxchg(atomic_t *v, int o, int n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic_cmpxchg(v, o, n); -} - /** * atomic_add - add integer to atomic variable * @i: integer value to add @@ -64,7 +31,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int o, int n) */ static inline void atomic_add(int i, atomic_t *v) { - _atomic_xchg_add(v, i); + _atomic_xchg_add(&v->counter, i); } /** @@ -77,7 +44,7 @@ static inline void atomic_add(int i, atomic_t *v) static inline int atomic_add_return(int i, atomic_t *v) { smp_mb(); /* barrier for proper semantics */ - return _atomic_xchg_add(v, i) + i; + return _atomic_xchg_add(&v->counter, i) + i; } /** @@ -92,7 +59,7 @@ static inline int atomic_add_return(int i, atomic_t *v) static inline int __atomic_add_unless(atomic_t *v, int a, int u) { smp_mb(); /* barrier for proper semantics */ - return _atomic_xchg_add_unless(v, a, u); + return _atomic_xchg_add_unless(&v->counter, a, u); } /** @@ -107,64 +74,31 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) */ static inline void atomic_set(atomic_t *v, int n) { - _atomic_xchg(v, n); + _atomic_xchg(&v->counter, n); } /* A 64bit atomic type */ typedef struct { - u64 __aligned(8) counter; + long long counter; } atomic64_t; #define ATOMIC64_INIT(val) { (val) } -u64 _atomic64_xchg(atomic64_t *v, u64 n); -u64 _atomic64_xchg_add(atomic64_t *v, u64 i); -u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u); -u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n); - /** * atomic64_read - read atomic variable * @v: pointer of type atomic64_t * * Atomically reads the value of @v. */ -static inline u64 atomic64_read(const atomic64_t *v) +static inline long long atomic64_read(const atomic64_t *v) { /* * Requires an atomic op to read both 32-bit parts consistently. * Casting away const is safe since the atomic support routines * do not write to memory if the value has not been modified. */ - return _atomic64_xchg_add((atomic64_t *)v, 0); -} - -/** - * atomic64_xchg - atomically exchange contents of memory with a new value - * @v: pointer of type atomic64_t - * @i: integer value to store in memory - * - * Atomically sets @v to @i and returns old @v - */ -static inline u64 atomic64_xchg(atomic64_t *v, u64 n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic64_xchg(v, n); -} - -/** - * atomic64_cmpxchg - atomically exchange contents of memory if it matches - * @v: pointer of type atomic64_t - * @o: old value that memory should have - * @n: new value to write to memory if it matches - * - * Atomically checks if @v holds @o and replaces it with @n if so. - * Returns the old value at @v. - */ -static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic64_cmpxchg(v, o, n); + return _atomic64_xchg_add((long long *)&v->counter, 0); } /** @@ -174,9 +108,9 @@ static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) * * Atomically adds @i to @v. */ -static inline void atomic64_add(u64 i, atomic64_t *v) +static inline void atomic64_add(long long i, atomic64_t *v) { - _atomic64_xchg_add(v, i); + _atomic64_xchg_add(&v->counter, i); } /** @@ -186,10 +120,10 @@ static inline void atomic64_add(u64 i, atomic64_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline u64 atomic64_add_return(u64 i, atomic64_t *v) +static inline long long atomic64_add_return(long long i, atomic64_t *v) { smp_mb(); /* barrier for proper semantics */ - return _atomic64_xchg_add(v, i) + i; + return _atomic64_xchg_add(&v->counter, i) + i; } /** @@ -199,12 +133,13 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as @v was not already @u. - * Returns the old value of @v. + * Returns non-zero if @v was not @u, and zero otherwise. */ -static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) +static inline long long atomic64_add_unless(atomic64_t *v, long long a, + long long u) { smp_mb(); /* barrier for proper semantics */ - return _atomic64_xchg_add_unless(v, a, u) != u; + return _atomic64_xchg_add_unless(&v->counter, a, u) != u; } /** @@ -217,9 +152,9 @@ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) * atomic64_set() can't be just a raw store, since it would be lost if it * fell between the load and store of one of the other atomic ops. */ -static inline void atomic64_set(atomic64_t *v, u64 n) +static inline void atomic64_set(atomic64_t *v, long long n) { - _atomic64_xchg(v, n); + _atomic64_xchg(&v->counter, n); } #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) @@ -234,16 +169,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) -/* - * We need to barrier before modifying the word, since the _atomic_xxx() - * routines just tns the lock and then read/modify/write of the word. - * But after the word is updated, the routine issues an "mf" before returning, - * and since it's a function call, we don't even need a compiler barrier. - */ -#define smp_mb__before_atomic_dec() smp_mb() -#define smp_mb__before_atomic_inc() smp_mb() -#define smp_mb__after_atomic_dec() do { } while (0) -#define smp_mb__after_atomic_inc() do { } while (0) #endif /* !__ASSEMBLY__ */ @@ -251,21 +176,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) * Internal definitions only beyond this point. */ -#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \ - (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP)) - -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - -/* Number of entries in atomic_lock_ptr[]. */ -#define ATOMIC_HASH_L1_SHIFT 6 -#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT) - -/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */ -#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2) -#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT) - -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* * Number of atomic locks in atomic_locks[]. Must be a power of two. * There is no reason for more than PAGE_SIZE / 8 entries, since that @@ -280,8 +190,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) extern int atomic_locks[]; #endif -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* * All the code that may fault while holding an atomic lock must * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code @@ -302,7 +210,14 @@ void __init_atomic_per_cpu(void); void __atomic_fault_unlock(int *lock_ptr); #endif +/* Return a pointer to the lock for the given address. */ +int *__atomic_hashed_lock(volatile void *v); + /* Private helper routines in lib/atomic_asm_32.S */ +struct __get_user { + unsigned long val; + int err; +}; extern struct __get_user __atomic_cmpxchg(volatile int *p, int *lock, int o, int n); extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n); @@ -312,11 +227,16 @@ extern struct __get_user __atomic_xchg_add_unless(volatile int *p, extern struct __get_user __atomic_or(volatile int *p, int *lock, int n); extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n); extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n); -extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n); -extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n); -extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n); -extern u64 __atomic64_xchg_add_unless(volatile u64 *p, - int *lock, u64 o, u64 n); +extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, + long long o, long long n); +extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); +extern long long __atomic64_xchg_add(volatile long long *p, int *lock, + long long n); +extern long long __atomic64_xchg_add_unless(volatile long long *p, + int *lock, long long o, long long n); + +/* Return failure from the atomic wrappers. */ +struct __get_user __atomic_bad_address(int __user *addr); #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index 27fe667fddf..7b11c5fadd4 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -19,6 +19,7 @@ #ifndef __ASSEMBLY__ +#include <asm/barrier.h> #include <arch/spr_def.h> /* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */ @@ -31,25 +32,6 @@ * on any routine which updates memory and returns a value. */ -static inline int atomic_cmpxchg(atomic_t *v, int o, int n) -{ - int val; - __insn_mtspr(SPR_CMPEXCH_VALUE, o); - smp_mb(); /* barrier for proper semantics */ - val = __insn_cmpexch4((void *)&v->counter, n); - smp_mb(); /* barrier for proper semantics */ - return val; -} - -static inline int atomic_xchg(atomic_t *v, int n) -{ - int val; - smp_mb(); /* barrier for proper semantics */ - val = __insn_exch4((void *)&v->counter, n); - smp_mb(); /* barrier for proper semantics */ - return val; -} - static inline void atomic_add(int i, atomic_t *v) { __insn_fetchadd4((void *)&v->counter, i); @@ -71,7 +53,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) if (oldval == u) break; guess = oldval; - oldval = atomic_cmpxchg(v, guess, guess + a); + oldval = cmpxchg(&v->counter, guess, guess + a); } while (guess != oldval); return oldval; } @@ -83,25 +65,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #define atomic64_read(v) ((v)->counter) #define atomic64_set(v, i) ((v)->counter = (i)) -static inline long atomic64_cmpxchg(atomic64_t *v, long o, long n) -{ - long val; - smp_mb(); /* barrier for proper semantics */ - __insn_mtspr(SPR_CMPEXCH_VALUE, o); - val = __insn_cmpexch((void *)&v->counter, n); - smp_mb(); /* barrier for proper semantics */ - return val; -} - -static inline long atomic64_xchg(atomic64_t *v, long n) -{ - long val; - smp_mb(); /* barrier for proper semantics */ - val = __insn_exch((void *)&v->counter, n); - smp_mb(); /* barrier for proper semantics */ - return val; -} - static inline void atomic64_add(long i, atomic64_t *v) { __insn_fetchadd((void *)&v->counter, i); @@ -123,7 +86,7 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u) if (oldval == u) break; guess = oldval; - oldval = atomic64_cmpxchg(v, guess, guess + a); + oldval = cmpxchg(&v->counter, guess, guess + a); } while (guess != oldval); return oldval != u; } @@ -142,12 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) -/* Atomic dec and inc don't implement barrier, so provide them if needed. */ -#define smp_mb__before_atomic_dec() smp_mb() -#define smp_mb__after_atomic_dec() smp_mb() -#define smp_mb__before_atomic_inc() smp_mb() -#define smp_mb__after_atomic_inc() smp_mb() - /* Define this to indicate that cmpxchg is an efficient operation. */ #define __HAVE_ARCH_CMPXCHG diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h new file mode 100644 index 00000000000..96a42ae79f4 --- /dev/null +++ b/arch/tile/include/asm/barrier.h @@ -0,0 +1,92 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_BARRIER_H +#define _ASM_TILE_BARRIER_H + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> +#include <arch/chip.h> +#include <arch/spr_def.h> +#include <asm/timex.h> + +#define __sync() __insn_mf() + +#include <hv/syscall_public.h> +/* + * Issue an uncacheable load to each memory controller, then + * wait until those loads have completed. + */ +static inline void __mb_incoherent(void) +{ + long clobber_r10; + asm volatile("swint2" + : "=R10" (clobber_r10) + : "R10" (HV_SYS_fence_incoherent) + : "r0", "r1", "r2", "r3", "r4", + "r5", "r6", "r7", "r8", "r9", + "r11", "r12", "r13", "r14", + "r15", "r16", "r17", "r18", "r19", + "r20", "r21", "r22", "r23", "r24", + "r25", "r26", "r27", "r28", "r29"); +} + +/* Fence to guarantee visibility of stores to incoherent memory. */ +static inline void +mb_incoherent(void) +{ + __insn_mf(); + + { +#if CHIP_HAS_TILE_WRITE_PENDING() + const unsigned long WRITE_TIMEOUT_CYCLES = 400; + unsigned long start = get_cycles_low(); + do { + if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0) + return; + } while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES); +#endif /* CHIP_HAS_TILE_WRITE_PENDING() */ + (void) __mb_incoherent(); + } +} + +#define fast_wmb() __sync() +#define fast_rmb() __sync() +#define fast_mb() __sync() +#define fast_iob() mb_incoherent() + +#define wmb() fast_wmb() +#define rmb() fast_rmb() +#define mb() fast_mb() +#define iob() fast_iob() + +#ifndef __tilegx__ /* 32 bit */ +/* + * We need to barrier before modifying the word, since the _atomic_xxx() + * routines just tns the lock and then read/modify/write of the word. + * But after the word is updated, the routine issues an "mf" before returning, + * and since it's a function call, we don't even need a compiler barrier. + */ +#define smp_mb__before_atomic() smp_mb() +#define smp_mb__after_atomic() do { } while (0) +#else /* 64 bit */ +#define smp_mb__before_atomic() smp_mb() +#define smp_mb__after_atomic() smp_mb() +#endif + +#include <asm-generic/barrier.h> + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_TILE_BARRIER_H */ diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h index 16f1fa51fea..20caa346ac0 100644 --- a/arch/tile/include/asm/bitops.h +++ b/arch/tile/include/asm/bitops.h @@ -17,6 +17,7 @@ #define _ASM_TILE_BITOPS_H #include <linux/types.h> +#include <asm/barrier.h> #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly @@ -29,17 +30,6 @@ #endif /** - * __ffs - find first set bit in word - * @word: The word to search - * - * Undefined if no set bit exists, so code should check against 0 first. - */ -static inline unsigned long __ffs(unsigned long word) -{ - return __builtin_ctzl(word); -} - -/** * ffz - find first zero bit in word * @word: The word to search * @@ -50,31 +40,9 @@ static inline unsigned long ffz(unsigned long word) return __builtin_ctzl(~word); } -/** - * __fls - find last set bit in word - * @word: The word to search - * - * Undefined if no set bit exists, so code should check against 0 first. - */ -static inline unsigned long __fls(unsigned long word) -{ - return (sizeof(word) * 8) - 1 - __builtin_clzl(word); -} - -/** - * ffs - find first set bit in word - * @x: the word to search - * - * This is defined the same way as the libc and compiler builtin ffs - * routines, therefore differs in spirit from the other bitops. - * - * ffs(value) returns 0 if value is 0 or the position of the first - * set bit if value is nonzero. The first (least significant) bit - * is at position 1. - */ -static inline int ffs(int x) +static inline int fls64(__u64 w) { - return __builtin_ffs(x); + return (sizeof(__u64) * 8) - __builtin_clzll(w); } /** @@ -90,12 +58,7 @@ static inline int ffs(int x) */ static inline int fls(int x) { - return (sizeof(int) * 8) - __builtin_clz(x); -} - -static inline int fls64(__u64 w) -{ - return (sizeof(__u64) * 8) - __builtin_clzll(w); + return fls64((unsigned int) x); } static inline unsigned int __arch_hweight32(unsigned int w) @@ -118,6 +81,9 @@ static inline unsigned long __arch_hweight64(__u64 w) return __builtin_popcountll(w); } +#include <asm-generic/bitops/builtin-__ffs.h> +#include <asm-generic/bitops/builtin-__fls.h> +#include <asm-generic/bitops/builtin-ffs.h> #include <asm-generic/bitops/const_hweight.h> #include <asm-generic/bitops/lock.h> #include <asm-generic/bitops/find.h> diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h index 571b118bfd9..bbf7b666f21 100644 --- a/arch/tile/include/asm/bitops_32.h +++ b/arch/tile/include/asm/bitops_32.h @@ -16,8 +16,7 @@ #define _ASM_TILE_BITOPS_32_H #include <linux/compiler.h> -#include <linux/atomic.h> -#include <asm/system.h> +#include <asm/barrier.h> /* Tile-specific routines to support <asm/bitops.h>. */ unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask); @@ -50,8 +49,8 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr) * restricted to acting on a single-word quantity. * * clear_bit() may not contain a memory barrier, so if it is used for - * locking purposes, you should call smp_mb__before_clear_bit() and/or - * smp_mb__after_clear_bit() to ensure changes are visible on other cpus. + * locking purposes, you should call smp_mb__before_atomic() and/or + * smp_mb__after_atomic() to ensure changes are visible on other cpus. */ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) { @@ -122,10 +121,6 @@ static inline int test_and_change_bit(unsigned nr, return (_atomic_xor(addr, mask) & mask) != 0; } -/* See discussion at smp_mb__before_atomic_dec() in <asm/atomic_32.h>. */ -#define smp_mb__before_clear_bit() smp_mb() -#define smp_mb__after_clear_bit() do {} while (0) - #include <asm-generic/bitops/ext2-atomic.h> #endif /* _ASM_TILE_BITOPS_32_H */ diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h index e9c8e381ee0..bb1a29221fc 100644 --- a/arch/tile/include/asm/bitops_64.h +++ b/arch/tile/include/asm/bitops_64.h @@ -16,8 +16,7 @@ #define _ASM_TILE_BITOPS_64_H #include <linux/compiler.h> -#include <linux/atomic.h> -#include <asm/system.h> +#include <asm/cmpxchg.h> /* See <asm/bitops.h> for API comments. */ @@ -33,20 +32,15 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) __insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask); } -#define smp_mb__before_clear_bit() smp_mb() -#define smp_mb__after_clear_bit() smp_mb() - - static inline void change_bit(unsigned nr, volatile unsigned long *addr) { - unsigned long old, mask = (1UL << (nr % BITS_PER_LONG)); - long guess, oldval; + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + unsigned long guess, oldval; addr += nr / BITS_PER_LONG; - old = *addr; + oldval = *addr; do { guess = oldval; - oldval = atomic64_cmpxchg((atomic64_t *)addr, - guess, guess ^ mask); + oldval = cmpxchg(addr, guess, guess ^ mask); } while (guess != oldval); } @@ -86,13 +80,12 @@ static inline int test_and_change_bit(unsigned nr, volatile unsigned long *addr) { unsigned long mask = (1UL << (nr % BITS_PER_LONG)); - long guess, oldval = *addr; + unsigned long guess, oldval; addr += nr / BITS_PER_LONG; oldval = *addr; do { guess = oldval; - oldval = atomic64_cmpxchg((atomic64_t *)addr, - guess, guess ^ mask); + oldval = cmpxchg(addr, guess, guess ^ mask); } while (guess != oldval); return (oldval & mask) != 0; } diff --git a/arch/tile/include/asm/byteorder.h b/arch/tile/include/asm/byteorder.h deleted file mode 100644 index 9558416d578..00000000000 --- a/arch/tile/include/asm/byteorder.h +++ /dev/null @@ -1 +0,0 @@ -#include <linux/byteorder/little_endian.h> diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h index 392e5333dd8..6160761d5f6 100644 --- a/arch/tile/include/asm/cache.h +++ b/arch/tile/include/asm/cache.h @@ -27,11 +27,17 @@ #define L2_CACHE_ALIGN(x) (((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES) /* - * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN. + * TILEPro I/O is not always coherent (networking typically uses coherent + * I/O, but PCI traffic does not) and setting ARCH_DMA_MINALIGN to the + * L2 cacheline size helps ensure that kernel heap allocations are aligned. + * TILE-Gx I/O is always coherent when used on hash-for-home pages. + * + * However, it's possible at runtime to request not to use hash-for-home + * for the kernel heap, in which case the kernel will use flush-and-inval + * to manage coherence. As a result, we use L2_CACHE_BYTES for the + * DMA minimum alignment to avoid false sharing in the kernel heap. */ -#ifndef __tilegx__ #define ARCH_DMA_MINALIGN L2_CACHE_BYTES -#endif /* use the cache line size for the L2, which is where it counts */ #define SMP_CACHE_BYTES_SHIFT L2_CACHE_SHIFT @@ -43,9 +49,16 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) /* - * Attribute for data that is kept read/write coherent until the end of - * initialization, then bumped to read/only incoherent for performance. + * Originally we used small TLB pages for kernel data and grouped some + * things together as "write once", enforcing the property at the end + * of initialization by making those pages read-only and non-coherent. + * This allowed better cache utilization since cache inclusion did not + * need to be maintained. However, to do this requires an extra TLB + * entry, which on balance is more of a performance hit than the + * non-coherence is a performance gain, so we now just make "read + * mostly" and "write once" be synonyms. We keep the attribute + * separate in case we change our minds at a future date. */ -#define __write_once __attribute__((__section__(".w1data"))) +#define __write_once __read_mostly #endif /* _ASM_TILE_CACHE_H */ diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h index e925f4bb498..92ee4c8a4f7 100644 --- a/arch/tile/include/asm/cacheflush.h +++ b/arch/tile/include/asm/cacheflush.h @@ -20,7 +20,6 @@ /* Keep includes the same across arches. */ #include <linux/mm.h> #include <linux/cache.h> -#include <asm/system.h> #include <arch/icache.h> /* Caches are physically-indexed and so don't need special treatment */ @@ -76,23 +75,6 @@ static inline void copy_to_user_page(struct vm_area_struct *vma, #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ memcpy((dst), (src), (len)) -/* - * Invalidate a VA range; pads to L2 cacheline boundaries. - * - * Note that on TILE64, __inv_buffer() actually flushes modified - * cache lines in addition to invalidating them, i.e., it's the - * same as __finv_buffer(). - */ -static inline void __inv_buffer(void *buffer, size_t size) -{ - char *next = (char *)((long)buffer & -L2_CACHE_BYTES); - char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size); - while (next < finish) { - __insn_inv(next); - next += CHIP_INV_STRIDE(); - } -} - /* Flush a VA range; pads to L2 cacheline boundaries. */ static inline void __flush_buffer(void *buffer, size_t size) { @@ -116,13 +98,6 @@ static inline void __finv_buffer(void *buffer, size_t size) } -/* Invalidate a VA range and wait for it to be complete. */ -static inline void inv_buffer(void *buffer, size_t size) -{ - __inv_buffer(buffer, size); - mb(); -} - /* * Flush a locally-homecached VA range and wait for the evicted * cachelines to hit memory. @@ -143,6 +118,26 @@ static inline void finv_buffer_local(void *buffer, size_t size) mb_incoherent(); } +#ifdef __tilepro__ +/* Invalidate a VA range; pads to L2 cacheline boundaries. */ +static inline void __inv_buffer(void *buffer, size_t size) +{ + char *next = (char *)((long)buffer & -L2_CACHE_BYTES); + char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size); + while (next < finish) { + __insn_inv(next); + next += CHIP_INV_STRIDE(); + } +} + +/* Invalidate a VA range and wait for it to be complete. */ +static inline void inv_buffer(void *buffer, size_t size) +{ + __inv_buffer(buffer, size); + mb(); +} +#endif + /* * Flush and invalidate a VA range that is homed remotely, waiting * until the memory controller holds the flushed values. If "hfh" is @@ -152,4 +147,14 @@ static inline void finv_buffer_local(void *buffer, size_t size) */ void finv_buffer_remote(void *buffer, size_t size, int hfh); +/* + * On SMP systems, when the scheduler does migration-cost autodetection, + * it needs a way to flush as much of the CPU's caches as possible: + * + * TODO: fill this in! + */ +static inline void sched_cacheflush(void) +{ +} + #endif /* _ASM_TILE_CACHEFLUSH_H */ diff --git a/arch/tile/include/asm/checksum.h b/arch/tile/include/asm/checksum.h index a120766c726..b21a2fdec9f 100644 --- a/arch/tile/include/asm/checksum.h +++ b/arch/tile/include/asm/checksum.h @@ -21,4 +21,22 @@ __wsum do_csum(const unsigned char *buff, int len); #define do_csum do_csum +/* + * Return the sum of all the 16-bit subwords in a long. + * This sums two subwords on a 32-bit machine, and four on 64 bits. + * The implementation does two vector adds to capture any overflow. + */ +static inline unsigned int csum_long(unsigned long x) +{ + unsigned long ret; +#ifdef __tilegx__ + ret = __insn_v2sadu(x, 0); + ret = __insn_v2sadu(ret, 0); +#else + ret = __insn_sadh_u(x, 0); + ret = __insn_sadh_u(ret, 0); +#endif + return ret; +} + #endif /* _ASM_TILE_CHECKSUM_H */ diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h new file mode 100644 index 00000000000..0ccda3c425b --- /dev/null +++ b/arch/tile/include/asm/cmpxchg.h @@ -0,0 +1,134 @@ +/* + * cmpxchg.h -- forked from asm/atomic.h with this copyright: + * + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#ifndef _ASM_TILE_CMPXCHG_H +#define _ASM_TILE_CMPXCHG_H + +#ifndef __ASSEMBLY__ + +#include <asm/barrier.h> + +/* Nonexistent functions intended to cause compile errors. */ +extern void __xchg_called_with_bad_pointer(void) + __compiletime_error("Bad argument size for xchg"); +extern void __cmpxchg_called_with_bad_pointer(void) + __compiletime_error("Bad argument size for cmpxchg"); + +#ifndef __tilegx__ + +/* Note the _atomic_xxx() routines include a final mb(). */ +int _atomic_xchg(int *ptr, int n); +int _atomic_xchg_add(int *v, int i); +int _atomic_xchg_add_unless(int *v, int a, int u); +int _atomic_cmpxchg(int *ptr, int o, int n); +long long _atomic64_xchg(long long *v, long long n); +long long _atomic64_xchg_add(long long *v, long long i); +long long _atomic64_xchg_add_unless(long long *v, long long a, long long u); +long long _atomic64_cmpxchg(long long *v, long long o, long long n); + +#define xchg(ptr, n) \ + ({ \ + if (sizeof(*(ptr)) != 4) \ + __xchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic_xchg((int *)(ptr), (int)(n)); \ + }) + +#define cmpxchg(ptr, o, n) \ + ({ \ + if (sizeof(*(ptr)) != 4) \ + __cmpxchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o, \ + (int)n); \ + }) + +#define xchg64(ptr, n) \ + ({ \ + if (sizeof(*(ptr)) != 8) \ + __xchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic64_xchg((long long *)(ptr), \ + (long long)(n)); \ + }) + +#define cmpxchg64(ptr, o, n) \ + ({ \ + if (sizeof(*(ptr)) != 8) \ + __cmpxchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic64_cmpxchg((long long *)ptr, \ + (long long)o, (long long)n); \ + }) + +#else + +#define xchg(ptr, n) \ + ({ \ + typeof(*(ptr)) __x; \ + smp_mb(); \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(unsigned long) \ + __insn_exch4((ptr), \ + (u32)(unsigned long)(n)); \ + break; \ + case 8: \ + __x = (typeof(__x)) \ + __insn_exch((ptr), (unsigned long)(n)); \ + break; \ + default: \ + __xchg_called_with_bad_pointer(); \ + break; \ + } \ + smp_mb(); \ + __x; \ + }) + +#define cmpxchg(ptr, o, n) \ + ({ \ + typeof(*(ptr)) __x; \ + __insn_mtspr(SPR_CMPEXCH_VALUE, (unsigned long)(o)); \ + smp_mb(); \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(unsigned long) \ + __insn_cmpexch4((ptr), \ + (u32)(unsigned long)(n)); \ + break; \ + case 8: \ + __x = (typeof(__x))__insn_cmpexch((ptr), \ + (long long)(n)); \ + break; \ + default: \ + __cmpxchg_called_with_bad_pointer(); \ + break; \ + } \ + smp_mb(); \ + __x; \ + }) + +#define xchg64 xchg +#define cmpxchg64 cmpxchg + +#endif + +#define tas(ptr) xchg((ptr), 1) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_TILE_CMPXCHG_H */ diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index bf95f55b82b..ffd4493efc7 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -44,7 +44,6 @@ typedef __kernel_uid32_t __compat_gid32_t; typedef __kernel_mode_t compat_mode_t; typedef __kernel_dev_t compat_dev_t; typedef __kernel_loff_t compat_loff_t; -typedef __kernel_nlink_t compat_nlink_t; typedef __kernel_ipc_pid_t compat_ipc_pid_t; typedef __kernel_daddr_t compat_daddr_t; typedef __kernel_fsid_t compat_fsid_t; @@ -111,6 +110,68 @@ struct compat_flock64 { typedef u32 compat_sigset_word; +typedef union compat_sigval { + compat_int_t sival_int; + compat_uptr_t sival_ptr; +} compat_sigval_t; + +#define COMPAT_SI_PAD_SIZE (128/sizeof(int) - 3) + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[COMPAT_SI_PAD_SIZE]; + + /* kill() */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + int _overrun_incr; /* amount to add to overrun */ + } _timer; + + /* POSIX.1b signals */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + unsigned int _addr; /* faulting insn/memory ref. */ +#ifdef __ARCH_SI_TRAPNO + int _trapno; /* TRAP # which caused the signal */ +#endif + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} compat_siginfo_t; + #define COMPAT_OFF_T_MAX 0x7fffffff #define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL @@ -211,57 +272,26 @@ extern int compat_setup_rt_frame(int sig, struct k_sigaction *ka, struct pt_regs *regs); /* Compat syscalls. */ -struct compat_sigaction; struct compat_siginfo; struct compat_sigaltstack; -long compat_sys_execve(const char __user *path, - compat_uptr_t __user *argv, - compat_uptr_t __user *envp, struct pt_regs *); -long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act, - struct compat_sigaction __user *oact, - size_t sigsetsize); -long compat_sys_rt_sigqueueinfo(int pid, int sig, - struct compat_siginfo __user *uinfo); -long compat_sys_rt_sigreturn(struct pt_regs *); -long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, - struct compat_sigaltstack __user *uoss_ptr, - struct pt_regs *); +long compat_sys_rt_sigreturn(void); long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high); long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high); long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count, u32 dummy, u32 low, u32 high); long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count, u32 dummy, u32 low, u32 high); -long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len); long compat_sys_sync_file_range2(int fd, unsigned int flags, u32 offset_lo, u32 offset_hi, u32 nbytes_lo, u32 nbytes_hi); long compat_sys_fallocate(int fd, int mode, u32 offset_lo, u32 offset_hi, u32 len_lo, u32 len_hi); -long compat_sys_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval); - -/* Versions of compat functions that differ from generic Linux. */ -struct compat_msgbuf; -long tile_compat_sys_msgsnd(int msqid, - struct compat_msgbuf __user *msgp, - size_t msgsz, int msgflg); -long tile_compat_sys_msgrcv(int msqid, - struct compat_msgbuf __user *msgp, - size_t msgsz, long msgtyp, int msgflg); -long tile_compat_sys_ptrace(compat_long_t request, compat_long_t pid, - compat_long_t addr, compat_long_t data); - -/* Tilera Linux syscalls that don't have "compat" versions. */ -#define compat_sys_flush_cache sys_flush_cache - -/* These are the intvec_64.S trampolines. */ -long _compat_sys_execve(const char __user *path, - const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp); -long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, - struct compat_sigaltstack __user *uoss_ptr); +long compat_sys_llseek(unsigned int fd, unsigned int offset_high, + unsigned int offset_low, loff_t __user * result, + unsigned int origin); + +/* Assembly trampoline to avoid clobbering r0. */ long _compat_sys_rt_sigreturn(void); #endif /* _ASM_TILE_COMPAT_H */ diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h new file mode 100644 index 00000000000..6ab8bf146d4 --- /dev/null +++ b/arch/tile/include/asm/device.h @@ -0,0 +1,36 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * Arch specific extensions to struct device + */ + +#ifndef _ASM_TILE_DEVICE_H +#define _ASM_TILE_DEVICE_H + +struct dev_archdata { + /* DMA operations on that device */ + struct dma_map_ops *dma_ops; + + /* Offset of the DMA address from the PA. */ + dma_addr_t dma_offset; + + /* + * Highest DMA address that can be generated by devices that + * have limited DMA capability, i.e. non 64-bit capable. + */ + dma_addr_t max_direct_dma_addr; +}; + +struct pdev_archdata { +}; + +#endif /* _ASM_TILE_DEVICE_H */ diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index eaa06d175b3..1eae359d831 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -20,69 +20,94 @@ #include <linux/cache.h> #include <linux/io.h> -/* - * Note that on x86 and powerpc, there is a "struct dma_mapping_ops" - * that is used for all the DMA operations. For now, we don't have an - * equivalent on tile, because we only have a single way of doing DMA. - * (Tilera bug 7994 to use dma_mapping_ops.) - */ +#ifdef __tilegx__ +#define ARCH_HAS_DMA_GET_REQUIRED_MASK +#endif + +extern struct dma_map_ops *tile_dma_map_ops; +extern struct dma_map_ops *gx_pci_dma_map_ops; +extern struct dma_map_ops *gx_legacy_pci_dma_map_ops; +extern struct dma_map_ops *gx_hybrid_pci_dma_map_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (dev && dev->archdata.dma_ops) + return dev->archdata.dma_ops; + else + return tile_dma_map_ops; +} + +static inline dma_addr_t get_dma_offset(struct device *dev) +{ + return dev->archdata.dma_offset; +} + +static inline void set_dma_offset(struct device *dev, dma_addr_t off) +{ + dev->archdata.dma_offset = off; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr; +} + +static inline void dma_mark_clean(void *addr, size_t size) {} + +#include <asm-generic/dma-mapping-common.h> + +static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +{ + dev->archdata.dma_ops = ops; +} -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) - -extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction); -extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction); -extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction); -extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nhwentries, enum dma_data_direction); -extern dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction); -extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address, - size_t size, enum dma_data_direction); -extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction); -extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction); - - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - -extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t, - enum dma_data_direction); -extern void dma_sync_single_for_device(struct device *, dma_addr_t, - size_t, enum dma_data_direction); -extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t, - unsigned long offset, size_t, - enum dma_data_direction); -extern void dma_sync_single_range_for_device(struct device *, dma_addr_t, - unsigned long offset, size_t, - enum dma_data_direction); -extern void dma_cache_sync(struct device *dev, void *vaddr, size_t, - enum dma_data_direction); +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size - 1 <= *dev->dma_mask; +} static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - return 0; + debug_dma_mapping_error(dev, dma_addr); + return get_dma_ops(dev)->mapping_error(dev, dma_addr); } static inline int dma_supported(struct device *dev, u64 mask) { - return 1; + return get_dma_ops(dev)->dma_supported(dev, mask); } static inline int dma_set_mask(struct device *dev, u64 mask) { + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + /* + * For PCI devices with 64-bit DMA addressing capability, promote + * the dma_ops to hybrid, with the consistent memory DMA space limited + * to 32-bit. For 32-bit capable devices, limit the streaming DMA + * address range to max_direct_dma_addr. + */ + if (dma_ops == gx_pci_dma_map_ops || + dma_ops == gx_hybrid_pci_dma_map_ops || + dma_ops == gx_legacy_pci_dma_map_ops) { + if (mask == DMA_BIT_MASK(64) && + dma_ops == gx_legacy_pci_dma_map_ops) + set_dma_ops(dev, gx_hybrid_pci_dma_map_ops); + else if (mask > dev->archdata.max_direct_dma_addr) + mask = dev->archdata.max_direct_dma_addr; + } + if (!dev->dma_mask || !dma_supported(dev, mask)) return -EIO; @@ -91,4 +116,43 @@ dma_set_mask(struct device *dev, u64 mask) return 0; } +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + void *cpu_addr; + + cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs); + + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + + return cpu_addr; +} + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + + dma_ops->free(dev, size, cpu_addr, dma_handle, attrs); +} + +#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) +#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL) +#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL) + +/* + * dma_alloc_noncoherent() is #defined to return coherent memory, + * so there's no need to do any flushing here. + */ +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) +{ +} + #endif /* _ASM_TILE_DMA_MAPPING_H */ diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h index 623a6bb741c..41d9878a968 100644 --- a/arch/tile/include/asm/elf.h +++ b/arch/tile/include/asm/elf.h @@ -30,7 +30,6 @@ typedef unsigned long elf_greg_t; #define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t)) typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -#define EM_TILE64 187 #define EM_TILEPRO 188 #define EM_TILEGX 191 @@ -44,7 +43,11 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; #else #define ELF_CLASS ELFCLASS32 #endif +#ifdef __BIG_ENDIAN__ +#define ELF_DATA ELFDATA2MSB +#else #define ELF_DATA ELFDATA2LSB +#endif /* * There seems to be a bug in how compat_binfmt_elf.c works: it @@ -59,6 +62,7 @@ enum { ELF_ARCH = CHIP_ELF_TYPE() }; */ #define elf_check_arch(x) \ ((x)->e_ident[EI_CLASS] == ELF_CLASS && \ + (x)->e_ident[EI_DATA] == ELF_DATA && \ (x)->e_machine == CHIP_ELF_TYPE()) /* The module loader only handles a few relocation types. */ @@ -127,6 +131,15 @@ extern int dump_task_regs(struct task_struct *, elf_gregset_t *); struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); +#define ARCH_DLINFO \ +do { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \ +} while (0) + +struct mm_struct; +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +#define arch_randomize_brk arch_randomize_brk + #ifdef CONFIG_COMPAT #define COMPAT_ELF_PLATFORM "tilegx-m32" @@ -143,6 +156,7 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define compat_start_thread(regs, ip, usp) do { \ regs->pc = ptr_to_compat_reg((void *)(ip)); \ regs->sp = ptr_to_compat_reg((void *)(usp)); \ + single_step_execve(); \ } while (0) /* @@ -151,12 +165,12 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #undef SET_PERSONALITY #define SET_PERSONALITY(ex) \ do { \ - current->personality = PER_LINUX; \ + set_personality(PER_LINUX | (current->personality & (~PER_MASK))); \ current_thread_info()->status &= ~TS_COMPAT; \ } while (0) #define COMPAT_SET_PERSONALITY(ex) \ do { \ - current->personality = PER_LINUX_32BIT; \ + set_personality(PER_LINUX | (current->personality & (~PER_MASK))); \ current_thread_info()->status |= TS_COMPAT; \ } while (0) @@ -164,4 +178,6 @@ do { \ #endif /* CONFIG_COMPAT */ +#define CORE_DUMP_USE_REGSET + #endif /* _ASM_TILE_ELF_H */ diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h index c66f7933bea..ffe2637aeb3 100644 --- a/arch/tile/include/asm/fixmap.h +++ b/arch/tile/include/asm/fixmap.h @@ -25,9 +25,6 @@ #include <asm/kmap_types.h> #endif -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at @@ -45,15 +42,23 @@ * * TLB entries of such buffers will not be flushed across * task switches. - * - * We don't bother with a FIX_HOLE since above the fixmaps - * is unmapped memory in any case. */ enum fixed_addresses { +#ifdef __tilegx__ + /* + * TILEPro has unmapped memory above so the hole isn't needed, + * and in any case the hole pushes us over a single 16MB pmd. + */ + FIX_HOLE, +#endif #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #endif +#ifdef __tilegx__ /* see homecache.c */ + FIX_HOMECACHE_BEGIN, + FIX_HOMECACHE_END = FIX_HOMECACHE_BEGIN+(NR_CPUS)-1, +#endif __end_of_permanent_fixed_addresses, /* @@ -70,48 +75,12 @@ enum fixed_addresses { #endif }; -extern void __set_fixmap(enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); - -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -#define clear_fixmap(idx) \ - __set_fixmap(idx, 0, __pgprot(0)) - #define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) #define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE) #define FIXADDR_BOOT_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_BOOT_SIZE) -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static __always_inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +#include <asm-generic/fixmap.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h index 461459b06d9..13a9bb81a8a 100644 --- a/arch/tile/include/asm/ftrace.h +++ b/arch/tile/include/asm/ftrace.h @@ -15,6 +15,26 @@ #ifndef _ASM_TILE_FTRACE_H #define _ASM_TILE_FTRACE_H -/* empty */ +#ifdef CONFIG_FUNCTION_TRACER + +#define MCOUNT_ADDR ((unsigned long)(__mcount)) +#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +extern void __mcount(void); + +#ifdef CONFIG_DYNAMIC_FTRACE +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +struct dyn_arch_ftrace { +}; +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_FUNCTION_TRACER */ #endif /* _ASM_TILE_FTRACE_H */ diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h index d03ec124a59..1a6ef1b69cb 100644 --- a/arch/tile/include/asm/futex.h +++ b/arch/tile/include/asm/futex.h @@ -28,29 +28,82 @@ #include <linux/futex.h> #include <linux/uaccess.h> #include <linux/errno.h> +#include <asm/atomic.h> -extern struct __get_user futex_set(u32 __user *v, int i); -extern struct __get_user futex_add(u32 __user *v, int n); -extern struct __get_user futex_or(u32 __user *v, int n); -extern struct __get_user futex_andn(u32 __user *v, int n); -extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n); +/* + * Support macros for futex operations. Do not use these macros directly. + * They assume "ret", "val", "oparg", and "uaddr" in the lexical context. + * __futex_cmpxchg() additionally assumes "oldval". + */ + +#ifdef __tilegx__ + +#define __futex_asm(OP) \ + asm("1: {" #OP " %1, %3, %4; movei %0, 0 }\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "0: { movei %0, %5; j 9f }\n" \ + ".section __ex_table,\"a\"\n" \ + ".align 8\n" \ + ".quad 1b, 0b\n" \ + ".popsection\n" \ + "9:" \ + : "=r" (ret), "=r" (val), "+m" (*(uaddr)) \ + : "r" (uaddr), "r" (oparg), "i" (-EFAULT)) + +#define __futex_set() __futex_asm(exch4) +#define __futex_add() __futex_asm(fetchadd4) +#define __futex_or() __futex_asm(fetchor4) +#define __futex_andn() ({ oparg = ~oparg; __futex_asm(fetchand4); }) +#define __futex_cmpxchg() \ + ({ __insn_mtspr(SPR_CMPEXCH_VALUE, oldval); __futex_asm(cmpexch4); }) + +#define __futex_xor() \ + ({ \ + u32 oldval, n = oparg; \ + if ((ret = __get_user(oldval, uaddr)) == 0) { \ + do { \ + oparg = oldval ^ n; \ + __futex_cmpxchg(); \ + } while (ret == 0 && oldval != val); \ + } \ + }) + +/* No need to prefetch, since the atomic ops go to the home cache anyway. */ +#define __futex_prolog() -#ifndef __tilegx__ -extern struct __get_user futex_xor(u32 __user *v, int n); #else -static inline struct __get_user futex_xor(u32 __user *uaddr, int n) -{ - struct __get_user asm_ret = __get_user_4(uaddr); - if (!asm_ret.err) { - int oldval, newval; - do { - oldval = asm_ret.val; - newval = oldval ^ n; - asm_ret = futex_cmpxchg(uaddr, oldval, newval); - } while (asm_ret.err == 0 && oldval != asm_ret.val); + +#define __futex_call(FN) \ + { \ + struct __get_user gu = FN((u32 __force *)uaddr, lock, oparg); \ + val = gu.val; \ + ret = gu.err; \ } - return asm_ret; -} + +#define __futex_set() __futex_call(__atomic_xchg) +#define __futex_add() __futex_call(__atomic_xchg_add) +#define __futex_or() __futex_call(__atomic_or) +#define __futex_andn() __futex_call(__atomic_andn) +#define __futex_xor() __futex_call(__atomic_xor) + +#define __futex_cmpxchg() \ + { \ + struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \ + lock, oldval, oparg); \ + val = gu.val; \ + ret = gu.err; \ + } + +/* + * Find the lock pointer for the atomic calls to use, and issue a + * prefetch to the user address to bring it into cache. Similar to + * __atomic_setup(), but we can't do a read into the L1 since it might + * fault; instead we do a prefetch into the L2. + */ +#define __futex_prolog() \ + int *lock; \ + __insn_prefetch(uaddr); \ + lock = __atomic_hashed_lock((int __force *)uaddr) #endif static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) @@ -59,8 +112,12 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int ret; - struct __get_user asm_ret; + int uninitialized_var(val), ret; + + __futex_prolog(); + + /* The 32-bit futex code makes this assumption, so validate it here. */ + BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int)); if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; @@ -71,46 +128,45 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) pagefault_disable(); switch (op) { case FUTEX_OP_SET: - asm_ret = futex_set(uaddr, oparg); + __futex_set(); break; case FUTEX_OP_ADD: - asm_ret = futex_add(uaddr, oparg); + __futex_add(); break; case FUTEX_OP_OR: - asm_ret = futex_or(uaddr, oparg); + __futex_or(); break; case FUTEX_OP_ANDN: - asm_ret = futex_andn(uaddr, oparg); + __futex_andn(); break; case FUTEX_OP_XOR: - asm_ret = futex_xor(uaddr, oparg); + __futex_xor(); break; default: - asm_ret.err = -ENOSYS; + ret = -ENOSYS; + break; } pagefault_enable(); - ret = asm_ret.err; - if (!ret) { switch (cmp) { case FUTEX_OP_CMP_EQ: - ret = (asm_ret.val == cmparg); + ret = (val == cmparg); break; case FUTEX_OP_CMP_NE: - ret = (asm_ret.val != cmparg); + ret = (val != cmparg); break; case FUTEX_OP_CMP_LT: - ret = (asm_ret.val < cmparg); + ret = (val < cmparg); break; case FUTEX_OP_CMP_GE: - ret = (asm_ret.val >= cmparg); + ret = (val >= cmparg); break; case FUTEX_OP_CMP_LE: - ret = (asm_ret.val <= cmparg); + ret = (val <= cmparg); break; case FUTEX_OP_CMP_GT: - ret = (asm_ret.val > cmparg); + ret = (val > cmparg); break; default: ret = -ENOSYS; @@ -120,22 +176,20 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) } static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) + u32 oldval, u32 oparg) { - struct __get_user asm_ret; + int ret, val; + + __futex_prolog(); if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - asm_ret = futex_cmpxchg(uaddr, oldval, newval); - *uval = asm_ret.val; - return asm_ret.err; -} + __futex_cmpxchg(); -#ifndef __tilegx__ -/* Return failure from the atomic wrappers. */ -struct __get_user __atomic_bad_address(int __user *addr); -#endif + *uval = val; + return ret; +} #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/hardirq.h b/arch/tile/include/asm/hardirq.h index 822390f9a15..54110af2398 100644 --- a/arch/tile/include/asm/hardirq.h +++ b/arch/tile/include/asm/hardirq.h @@ -42,6 +42,4 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ -#define HARDIRQ_BITS 8 - #endif /* _ASM_TILE_HARDIRQ_H */ diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h index 2ac422848c7..2f572b6b7bc 100644 --- a/arch/tile/include/asm/hardwall.h +++ b/arch/tile/include/asm/hardwall.h @@ -11,45 +11,13 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. * - * Provide methods for the HARDWALL_FILE for accessing the UDN. + * Provide methods for access control of per-cpu resources like + * UDN, IDN, or IPI. */ - #ifndef _ASM_TILE_HARDWALL_H #define _ASM_TILE_HARDWALL_H -#include <linux/ioctl.h> - -#define HARDWALL_IOCTL_BASE 0xa2 - -/* - * The HARDWALL_CREATE() ioctl is a macro with a "size" argument. - * The resulting ioctl value is passed to the kernel in conjunction - * with a pointer to a little-endian bitmask of cpus, which must be - * physically in a rectangular configuration on the chip. - * The "size" is the number of bytes of cpu mask data. - */ -#define _HARDWALL_CREATE 1 -#define HARDWALL_CREATE(size) \ - _IOC(_IOC_READ, HARDWALL_IOCTL_BASE, _HARDWALL_CREATE, (size)) - -#define _HARDWALL_ACTIVATE 2 -#define HARDWALL_ACTIVATE \ - _IO(HARDWALL_IOCTL_BASE, _HARDWALL_ACTIVATE) - -#define _HARDWALL_DEACTIVATE 3 -#define HARDWALL_DEACTIVATE \ - _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE) - -#define _HARDWALL_GET_ID 4 -#define HARDWALL_GET_ID \ - _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID) - -#ifndef __KERNEL__ - -/* This is the canonical name expected by userspace. */ -#define HARDWALL_FILE "/dev/hardwall" - -#else +#include <uapi/asm/hardwall.h> /* /proc hooks for hardwall. */ struct proc_dir_entry; @@ -59,7 +27,4 @@ int proc_pid_hardwall(struct task_struct *task, char *buffer); #else static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {} #endif - -#endif - #endif /* _ASM_TILE_HARDWALL_H */ diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h index b2a6c5de79a..fc8429a31c8 100644 --- a/arch/tile/include/asm/highmem.h +++ b/arch/tile/include/asm/highmem.h @@ -59,7 +59,7 @@ void *kmap_fix_kpte(struct page *page, int finished); /* This macro is used only in map_new_virtual() to map "page". */ #define kmap_prot page_to_kpgprot(page) -void *__kmap_atomic(struct page *page); +void *kmap_atomic(struct page *page); void __kunmap_atomic(void *kvaddr); void *kmap_atomic_pfn(unsigned long pfn); void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h index a8243865d49..7ddd1b8d691 100644 --- a/arch/tile/include/asm/homecache.h +++ b/arch/tile/include/asm/homecache.h @@ -33,8 +33,7 @@ struct zone; /* * Is this page immutable (unwritable) and thus able to be cached more - * widely than would otherwise be possible? On tile64 this means we - * mark the PTE to cache locally; on tilepro it means we have "nc" set. + * widely than would otherwise be possible? This means we have "nc" set. */ #define PAGE_HOME_IMMUTABLE -2 @@ -44,16 +43,8 @@ struct zone; */ #define PAGE_HOME_INCOHERENT -3 -#if CHIP_HAS_CBOX_HOME_MAP() /* Home for the page is distributed via hash-for-home. */ #define PAGE_HOME_HASH -4 -#endif - -/* Homing is unknown or unspecified. Not valid for page_home(). */ -#define PAGE_HOME_UNKNOWN -5 - -/* Home on the current cpu. Not valid for page_home(). */ -#define PAGE_HOME_HERE -6 /* Support wrapper to use instead of explicit hv_flush_remote(). */ extern void flush_remote(unsigned long cache_pfn, unsigned long cache_length, @@ -79,10 +70,17 @@ extern void homecache_change_page_home(struct page *, int order, int home); /* * Flush a page out of whatever cache(s) it is in. * This is more than just finv, since it properly handles waiting - * for the data to reach memory on tilepro, but it can be quite - * heavyweight, particularly on hash-for-home memory. + * for the data to reach memory, but it can be quite + * heavyweight, particularly on incoherent or immutable memory. + */ +extern void homecache_finv_page(struct page *); + +/* + * Flush a page out of the specified home cache. + * Note that the specified home need not be the actual home of the page, + * as for example might be the case when coordinating with I/O devices. */ -extern void homecache_flush_cache(struct page *, int order); +extern void homecache_finv_map_page(struct page *, int home); /* * Allocate a page with the given GFP flags, home, and optionally @@ -104,10 +102,10 @@ extern struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, * routines use homecache_change_page_home() to reset the home * back to the default before returning the page to the allocator. */ +void __homecache_free_pages(struct page *, unsigned int order); void homecache_free_pages(unsigned long addr, unsigned int order); -#define homecache_free_page(page) \ - homecache_free_pages((page), 0) - +#define __homecache_free_page(page) __homecache_free_pages((page), 0) +#define homecache_free_page(page) homecache_free_pages((page), 0) /* diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h index d396d180516..3257733003f 100644 --- a/arch/tile/include/asm/hugetlb.h +++ b/arch/tile/include/asm/hugetlb.h @@ -16,6 +16,7 @@ #define _ASM_TILE_HUGETLB_H #include <asm/page.h> +#include <asm-generic/hugetlb.h> static inline int is_hugepage_only_range(struct mm_struct *mm, @@ -106,4 +107,29 @@ static inline void arch_release_hugepage(struct page *page) { } +static inline void arch_clear_hugepage_flags(struct page *page) +{ +} + +#ifdef CONFIG_HUGETLB_SUPER_PAGES +static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable) +{ + size_t pagesize = huge_page_size(hstate_vma(vma)); + if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) + entry = pte_mksuper(entry); + return entry; +} +#define arch_make_huge_pte arch_make_huge_pte + +/* Sizes to scale up page size for PTEs with HV_PTE_SUPER bit. */ +enum { + HUGE_SHIFT_PGDIR = 0, + HUGE_SHIFT_PMD = 1, + HUGE_SHIFT_PAGE = 2, + HUGE_SHIFT_ENTRIES +}; +extern int huge_shift[HUGE_SHIFT_ENTRIES]; +#endif + #endif /* _ASM_TILE_HUGETLB_H */ diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index d2152deb1f3..9fe434969fa 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -19,7 +19,8 @@ #include <linux/bug.h> #include <asm/page.h> -#define IO_SPACE_LIMIT 0xfffffffful +/* Maximum PCI I/O space address supported. */ +#define IO_SPACE_LIMIT 0xffffffff /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem @@ -62,6 +63,92 @@ extern void iounmap(volatile void __iomem *addr); #define mm_ptov(addr) ((void *)phys_to_virt(addr)) #define mm_vtop(addr) ((unsigned long)virt_to_phys(addr)) +#if CHIP_HAS_MMIO() + +/* + * We use inline assembly to guarantee that the compiler does not + * split an access into multiple byte-sized accesses as it might + * sometimes do if a register data structure is marked "packed". + * Obviously on tile we can't tolerate such an access being + * actually unaligned, but we want to avoid the case where the + * compiler conservatively would generate multiple accesses even + * for an aligned read or write. + */ + +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + return *(const volatile u8 __force *)addr; +} + +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + u16 ret; + asm volatile("ld2u %0, %1" : "=r" (ret) : "r" (addr)); + barrier(); + return le16_to_cpu(ret); +} + +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + u32 ret; + /* Sign-extend to conform to u32 ABI sign-extension convention. */ + asm volatile("ld4s %0, %1" : "=r" (ret) : "r" (addr)); + barrier(); + return le32_to_cpu(ret); +} + +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + u64 ret; + asm volatile("ld %0, %1" : "=r" (ret) : "r" (addr)); + barrier(); + return le64_to_cpu(ret); +} + +static inline void __raw_writeb(u8 val, volatile void __iomem *addr) +{ + *(volatile u8 __force *)addr = val; +} + +static inline void __raw_writew(u16 val, volatile void __iomem *addr) +{ + asm volatile("st2 %0, %1" :: "r" (addr), "r" (cpu_to_le16(val))); +} + +static inline void __raw_writel(u32 val, volatile void __iomem *addr) +{ + asm volatile("st4 %0, %1" :: "r" (addr), "r" (cpu_to_le32(val))); +} + +static inline void __raw_writeq(u64 val, volatile void __iomem *addr) +{ + asm volatile("st %0, %1" :: "r" (addr), "r" (cpu_to_le64(val))); +} + +/* + * The on-chip I/O hardware on tilegx is configured with VA=PA for the + * kernel's PA range. The low-level APIs and field names use "va" and + * "void *" nomenclature, to be consistent with the general notion + * that the addresses in question are virtualizable, but in the kernel + * context we are actually manipulating PA values. (In other contexts, + * e.g. access from user space, we do in fact use real virtual addresses + * in the va fields.) To allow readers of the code to understand what's + * happening, we direct their attention to this comment by using the + * following two functions that just duplicate __va() and __pa(). + */ +typedef unsigned long tile_io_addr_t; +static inline tile_io_addr_t va_to_tile_io_addr(void *va) +{ + BUILD_BUG_ON(sizeof(phys_addr_t) != sizeof(tile_io_addr_t)); + return __pa(va); +} +static inline void *tile_io_addr_to_va(tile_io_addr_t tile_io_addr) +{ + return __va(tile_io_addr); +} + +#else /* CHIP_HAS_MMIO() */ + #ifdef CONFIG_PCI extern u8 _tile_readb(unsigned long addr); @@ -73,10 +160,19 @@ extern void _tile_writew(u16 val, unsigned long addr); extern void _tile_writel(u32 val, unsigned long addr); extern void _tile_writeq(u64 val, unsigned long addr); -#else +#define __raw_readb(addr) _tile_readb((unsigned long)addr) +#define __raw_readw(addr) _tile_readw((unsigned long)addr) +#define __raw_readl(addr) _tile_readl((unsigned long)addr) +#define __raw_readq(addr) _tile_readq((unsigned long)addr) +#define __raw_writeb(val, addr) _tile_writeb(val, (unsigned long)addr) +#define __raw_writew(val, addr) _tile_writew(val, (unsigned long)addr) +#define __raw_writel(val, addr) _tile_writel(val, (unsigned long)addr) +#define __raw_writeq(val, addr) _tile_writeq(val, (unsigned long)addr) + +#else /* CONFIG_PCI */ /* - * The Tile architecture does not support IOMEM unless PCI is enabled. + * The tilepro architecture does not support IOMEM unless PCI is enabled. * Unfortunately we can't yet simply not declare these methods, * since some generic code that compiles into the kernel, but * we never run, uses them unconditionally. @@ -88,65 +184,58 @@ static inline int iomem_panic(void) return 0; } -static inline u8 _tile_readb(unsigned long addr) +static inline u8 readb(unsigned long addr) { return iomem_panic(); } -static inline u16 _tile_readw(unsigned long addr) +static inline u16 _readw(unsigned long addr) { return iomem_panic(); } -static inline u32 _tile_readl(unsigned long addr) +static inline u32 readl(unsigned long addr) { return iomem_panic(); } -static inline u64 _tile_readq(unsigned long addr) +static inline u64 readq(unsigned long addr) { return iomem_panic(); } -static inline void _tile_writeb(u8 val, unsigned long addr) +static inline void writeb(u8 val, unsigned long addr) { iomem_panic(); } -static inline void _tile_writew(u16 val, unsigned long addr) +static inline void writew(u16 val, unsigned long addr) { iomem_panic(); } -static inline void _tile_writel(u32 val, unsigned long addr) +static inline void writel(u32 val, unsigned long addr) { iomem_panic(); } -static inline void _tile_writeq(u64 val, unsigned long addr) +static inline void writeq(u64 val, unsigned long addr) { iomem_panic(); } -#endif +#endif /* CONFIG_PCI */ -#define readb(addr) _tile_readb((unsigned long)addr) -#define readw(addr) _tile_readw((unsigned long)addr) -#define readl(addr) _tile_readl((unsigned long)addr) -#define readq(addr) _tile_readq((unsigned long)addr) -#define writeb(val, addr) _tile_writeb(val, (unsigned long)addr) -#define writew(val, addr) _tile_writew(val, (unsigned long)addr) -#define writel(val, addr) _tile_writel(val, (unsigned long)addr) -#define writeq(val, addr) _tile_writeq(val, (unsigned long)addr) - -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl -#define __raw_readq readq -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel -#define __raw_writeq writeq +#endif /* CHIP_HAS_MMIO() */ + +#define readb __raw_readb +#define readw __raw_readw +#define readl __raw_readl +#define readq __raw_readq +#define writeb __raw_writeb +#define writew __raw_writew +#define writel __raw_writel +#define writeq __raw_writeq #define readb_relaxed readb #define readw_relaxed readw @@ -162,9 +251,11 @@ static inline void _tile_writeq(u64 val, unsigned long addr) #define iowrite32 writel #define iowrite64 writeq -static inline void memset_io(void *dst, int val, size_t len) +#if CHIP_HAS_MMIO() || defined(CONFIG_PCI) + +static inline void memset_io(volatile void *dst, int val, size_t len) { - int x; + size_t x; BUG_ON((unsigned long)dst & 0x3); val = (val & 0xff) * 0x01010101; for (x = 0; x < len; x += 4) @@ -174,7 +265,7 @@ static inline void memset_io(void *dst, int val, size_t len) static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t len) { - int x; + size_t x; BUG_ON((unsigned long)src & 0x3); for (x = 0; x < len; x += 4) *(u32 *)(dst + x) = readl(src + x); @@ -183,14 +274,116 @@ static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, static inline void memcpy_toio(volatile void __iomem *dst, const void *src, size_t len) { - int x; + size_t x; BUG_ON((unsigned long)dst & 0x3); for (x = 0; x < len; x += 4) writel(*(u32 *)(src + x), dst + x); } +#endif + +#if CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO) + +static inline u8 inb(unsigned long addr) +{ + return readb((volatile void __iomem *) addr); +} + +static inline u16 inw(unsigned long addr) +{ + return readw((volatile void __iomem *) addr); +} + +static inline u32 inl(unsigned long addr) +{ + return readl((volatile void __iomem *) addr); +} + +static inline void outb(u8 b, unsigned long addr) +{ + writeb(b, (volatile void __iomem *) addr); +} + +static inline void outw(u16 b, unsigned long addr) +{ + writew(b, (volatile void __iomem *) addr); +} + +static inline void outl(u32 b, unsigned long addr) +{ + writel(b, (volatile void __iomem *) addr); +} + +static inline void insb(unsigned long addr, void *buffer, int count) +{ + if (count) { + u8 *buf = buffer; + do { + u8 x = inb(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void insw(unsigned long addr, void *buffer, int count) +{ + if (count) { + u16 *buf = buffer; + do { + u16 x = inw(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void insl(unsigned long addr, void *buffer, int count) +{ + if (count) { + u32 *buf = buffer; + do { + u32 x = inl(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void outsb(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u8 *buf = buffer; + do { + outb(*buf++, addr); + } while (--count); + } +} + +static inline void outsw(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u16 *buf = buffer; + do { + outw(*buf++, addr); + } while (--count); + } +} + +static inline void outsl(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u32 *buf = buffer; + do { + outl(*buf++, addr); + } while (--count); + } +} + +extern void __iomem *ioport_map(unsigned long port, unsigned int len); +extern void ioport_unmap(void __iomem *addr); + +#else + /* - * The Tile architecture does not support IOPORT, even with PCI. + * The TilePro architecture does not support IOPORT, even with PCI. * Unfortunately we can't yet simply not declare these methods, * since some generic code that compiles into the kernel, but * we never run, uses them unconditionally. @@ -198,7 +391,12 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, static inline long ioport_panic(void) { +#ifdef __tilegx__ + panic("PCI IO space support is disabled. Configure the kernel with" + " CONFIG_TILE_PCI_IO to enable it"); +#else panic("inb/outb and friends do not exist on tile"); +#endif return 0; } @@ -243,13 +441,6 @@ static inline void outl(u32 b, unsigned long addr) ioport_panic(); } -#define inb_p(addr) inb(addr) -#define inw_p(addr) inw(addr) -#define inl_p(addr) inl(addr) -#define outb_p(x, addr) outb((x), (addr)) -#define outw_p(x, addr) outw((x), (addr)) -#define outl_p(x, addr) outl((x), (addr)) - static inline void insb(unsigned long addr, void *buffer, int count) { ioport_panic(); @@ -280,6 +471,15 @@ static inline void outsl(unsigned long addr, const void *buffer, int count) ioport_panic(); } +#endif /* CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO) */ + +#define inb_p(addr) inb(addr) +#define inw_p(addr) inw(addr) +#define inl_p(addr) inl(addr) +#define outb_p(x, addr) outb((x), (addr)) +#define outw_p(x, addr) outw((x), (addr)) +#define outl_p(x, addr) outl((x), (addr)) + #define ioread16be(addr) be16_to_cpu(ioread16(addr)) #define ioread32be(addr) be32_to_cpu(ioread32(addr)) #define iowrite16be(v, addr) iowrite16(be16_to_cpu(v), (addr)) diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h index f80f8ceabc6..1fe86911838 100644 --- a/arch/tile/include/asm/irq.h +++ b/arch/tile/include/asm/irq.h @@ -18,10 +18,12 @@ #include <linux/hardirq.h> /* The hypervisor interface provides 32 IRQs. */ -#define NR_IRQS 32 +#define NR_IRQS 32 /* IRQ numbers used for linux IPIs. */ -#define IRQ_RESCHEDULE 1 +#define IRQ_RESCHEDULE 0 +/* Interrupts for dynamic allocation start at 1. Let the core allocate irq0 */ +#define NR_IRQS_LEGACY 1 #define irq_canonicalize(irq) (irq) diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index 5db0ce54284..71af5747874 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h @@ -18,32 +18,20 @@ #include <arch/interrupts.h> #include <arch/chip.h> -#if !defined(__tilegx__) && defined(__ASSEMBLY__) - /* * The set of interrupts we want to allow when interrupts are nominally * disabled. The remainder are effectively "NMI" interrupts from * the point of view of the generic Linux code. Note that synchronous * interrupts (aka "non-queued") are not blocked by the mask in any case. */ -#if CHIP_HAS_AUX_PERF_COUNTERS() -#define LINUX_MASKABLE_INTERRUPTS_HI \ - (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT))) -#else -#define LINUX_MASKABLE_INTERRUPTS_HI \ - (~(INT_MASK_HI(INT_PERF_COUNT))) -#endif - -#else - -#if CHIP_HAS_AUX_PERF_COUNTERS() -#define LINUX_MASKABLE_INTERRUPTS \ - (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT))) -#else #define LINUX_MASKABLE_INTERRUPTS \ - (~(INT_MASK(INT_PERF_COUNT))) -#endif + (~((_AC(1,ULL) << INT_PERF_COUNT) | (_AC(1,ULL) << INT_AUX_PERF_COUNT))) +#if CHIP_HAS_SPLIT_INTR_MASK() +/* The same macro, but for the two 32-bit SPRs separately. */ +#define LINUX_MASKABLE_INTERRUPTS_LO (-1) +#define LINUX_MASKABLE_INTERRUPTS_HI \ + (~((1 << (INT_PERF_COUNT - 32)) | (1 << (INT_AUX_PERF_COUNT - 32)))) #endif #ifndef __ASSEMBLY__ @@ -52,7 +40,15 @@ #include <asm/percpu.h> #include <arch/spr_def.h> -/* Set and clear kernel interrupt masks. */ +/* + * Set and clear kernel interrupt masks. + * + * NOTE: __insn_mtspr() is a compiler builtin marked as a memory + * clobber. We rely on it being equivalent to a compiler barrier in + * this code since arch_local_irq_save() and friends must act as + * compiler barriers. This compiler semantic is baked into enough + * places that the compiler will maintain it going forward. + */ #if CHIP_HAS_SPLIT_INTR_MASK() #if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32 # error Fix assumptions about which word various interrupts are in @@ -90,6 +86,14 @@ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, (unsigned long)(__m)); \ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, (unsigned long)(__m>>32)); \ } while (0) +#define interrupt_mask_save_mask() \ + (__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_0) | \ + (((unsigned long long)__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_1))<<32)) +#define interrupt_mask_restore_mask(mask) do { \ + unsigned long long __m = (mask); \ + __insn_mtspr(SPR_INTERRUPT_MASK_K_0, (unsigned long)(__m)); \ + __insn_mtspr(SPR_INTERRUPT_MASK_K_1, (unsigned long)(__m>>32)); \ +} while (0) #else #define interrupt_mask_set(n) \ __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (1UL << (n))) @@ -101,6 +105,10 @@ __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (mask)) #define interrupt_mask_reset_mask(mask) \ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (mask)) +#define interrupt_mask_save_mask() \ + __insn_mfspr(SPR_INTERRUPT_MASK_K) +#define interrupt_mask_restore_mask(mask) \ + __insn_mtspr(SPR_INTERRUPT_MASK_K, (mask)) #endif /* @@ -114,7 +122,13 @@ * to know our current state. */ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); -#define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR) +#define INITIAL_INTERRUPTS_ENABLED (1ULL << INT_MEM_ERROR) + +#ifdef CONFIG_DEBUG_PREEMPT +/* Due to inclusion issues, we can't rely on <linux/smp.h> here. */ +extern unsigned int debug_smp_processor_id(void); +# define smp_processor_id() debug_smp_processor_id() +#endif /* Disable interrupts. */ #define arch_local_irq_disable() \ @@ -122,11 +136,20 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Disable all interrupts, including NMIs. */ #define arch_local_irq_disable_all() \ - interrupt_mask_set_mask(-1UL) + interrupt_mask_set_mask(-1ULL) + +/* + * Read the set of maskable interrupts. + * We avoid the preemption warning here via __this_cpu_ptr since even + * if irqs are already enabled, it's harmless to read the wrong cpu's + * enabled mask. + */ +#define arch_local_irqs_enabled() \ + (*__this_cpu_ptr(&interrupts_enabled_mask)) /* Re-enable all maskable interrupts. */ #define arch_local_irq_enable() \ - interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask)) + interrupt_mask_reset_mask(arch_local_irqs_enabled()) /* Disable or enable interrupts based on flag argument. */ #define arch_local_irq_restore(disabled) do { \ @@ -153,7 +176,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Prevent the given interrupt from being enabled next time we enable irqs. */ #define arch_local_irq_mask(interrupt) \ - (__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt)) + this_cpu_and(interrupts_enabled_mask, ~(1ULL << (interrupt))) /* Prevent the given interrupt from being enabled immediately. */ #define arch_local_irq_mask_now(interrupt) do { \ @@ -163,7 +186,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Allow the given interrupt to be enabled next time we enable irqs. */ #define arch_local_irq_unmask(interrupt) \ - (__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt)) + this_cpu_or(interrupts_enabled_mask, (1ULL << (interrupt))) /* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */ #define arch_local_irq_unmask_now(interrupt) do { \ @@ -179,7 +202,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); #ifdef __tilegx__ #if INT_MEM_ERROR != 0 -# error Fix IRQ_DISABLED() macro +# error Fix IRQS_DISABLED() macro #endif /* Return 0 or 1 to indicate whether interrupts are currently disabled. */ @@ -207,9 +230,10 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); mtspr SPR_INTERRUPT_MASK_SET_K, tmp /* Enable interrupts. */ -#define IRQ_ENABLE(tmp0, tmp1) \ +#define IRQ_ENABLE_LOAD(tmp0, tmp1) \ GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \ - ld tmp0, tmp0; \ + ld tmp0, tmp0 +#define IRQ_ENABLE_APPLY(tmp0, tmp1) \ mtspr SPR_INTERRUPT_MASK_RESET_K, tmp0 #else /* !__tilegx__ */ @@ -237,7 +261,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Disable interrupts. */ #define IRQ_DISABLE(tmp0, tmp1) \ { \ - movei tmp0, -1; \ + movei tmp0, LINUX_MASKABLE_INTERRUPTS_LO; \ moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \ }; \ { \ @@ -253,17 +277,22 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp /* Enable interrupts. */ -#define IRQ_ENABLE(tmp0, tmp1) \ +#define IRQ_ENABLE_LOAD(tmp0, tmp1) \ GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \ { \ lw tmp0, tmp0; \ addi tmp1, tmp0, 4 \ }; \ - lw tmp1, tmp1; \ + lw tmp1, tmp1 +#define IRQ_ENABLE_APPLY(tmp0, tmp1) \ mtspr SPR_INTERRUPT_MASK_RESET_K_0, tmp0; \ mtspr SPR_INTERRUPT_MASK_RESET_K_1, tmp1 #endif +#define IRQ_ENABLE(tmp0, tmp1) \ + IRQ_ENABLE_LOAD(tmp0, tmp1); \ + IRQ_ENABLE_APPLY(tmp0, tmp1) + /* * Do the CPU's IRQ-state tracing from assembly code. We call a * C function, but almost everywhere we do, we don't mind clobbering diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h new file mode 100644 index 00000000000..5bbbfa904c2 --- /dev/null +++ b/arch/tile/include/asm/kdebug.h @@ -0,0 +1,28 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_KDEBUG_H +#define _ASM_TILE_KDEBUG_H + +#include <linux/notifier.h> + +enum die_val { + DIE_OOPS = 1, + DIE_BREAK, + DIE_SSTEPBP, + DIE_PAGE_FAULT, + DIE_COMPILED_BPT +}; + +#endif /* _ASM_TILE_KDEBUG_H */ diff --git a/arch/tile/include/asm/kexec.h b/arch/tile/include/asm/kexec.h index c11a6cc73bb..fc98ccfc98a 100644 --- a/arch/tile/include/asm/kexec.h +++ b/arch/tile/include/asm/kexec.h @@ -19,12 +19,24 @@ #include <asm/page.h> +#ifndef __tilegx__ /* Maximum physical address we can use pages from. */ #define KEXEC_SOURCE_MEMORY_LIMIT TASK_SIZE /* Maximum address we can reach in physical address mode. */ #define KEXEC_DESTINATION_MEMORY_LIMIT TASK_SIZE /* Maximum address we can use for the control code buffer. */ #define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE +#else +/* We need to limit the memory below PGDIR_SIZE since + * we only setup page table for [0, PGDIR_SIZE) before final kexec. + */ +/* Maximum physical address we can use pages from. */ +#define KEXEC_SOURCE_MEMORY_LIMIT PGDIR_SIZE +/* Maximum address we can reach in physical address mode. */ +#define KEXEC_DESTINATION_MEMORY_LIMIT PGDIR_SIZE +/* Maximum address we can use for the control code buffer. */ +#define KEXEC_CONTROL_MEMORY_LIMIT PGDIR_SIZE +#endif #define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE diff --git a/arch/tile/include/asm/kgdb.h b/arch/tile/include/asm/kgdb.h new file mode 100644 index 00000000000..280c181cf0d --- /dev/null +++ b/arch/tile/include/asm/kgdb.h @@ -0,0 +1,71 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE-Gx KGDB support. + */ + +#ifndef __TILE_KGDB_H__ +#define __TILE_KGDB_H__ + +#include <linux/kdebug.h> +#include <arch/opcode.h> + +#define GDB_SIZEOF_REG sizeof(unsigned long) + +/* + * TILE-Gx gdb is expecting the following register layout: + * 56 GPRs(R0 - R52, TP, SP, LR), 8 special GPRs(networks and ZERO), + * plus the PC and the faultnum. + * + * Even though kernel not use the 8 special GPRs, they need to be present + * in the registers sent for correct processing in the host-side gdb. + * + */ +#define DBG_MAX_REG_NUM (56+8+2) +#define NUMREGBYTES (DBG_MAX_REG_NUM * GDB_SIZEOF_REG) + +/* + * BUFMAX defines the maximum number of characters in inbound/outbound + * buffers at least NUMREGBYTES*2 are needed for register packets, + * Longer buffer is needed to list all threads. + */ +#define BUFMAX 2048 + +#define BREAK_INSTR_SIZE TILEGX_BUNDLE_SIZE_IN_BYTES + +/* + * Require cache flush for set/clear a software breakpoint or write memory. + */ +#define CACHE_FLUSH_IS_SAFE 1 + +/* + * The compiled-in breakpoint instruction can be used to "break" into + * the debugger via magic system request key (sysrq-G). + */ +static tile_bundle_bits compiled_bpt = TILEGX_BPT_BUNDLE | DIE_COMPILED_BPT; + +enum tilegx_regnum { + TILEGX_PC_REGNUM = TREG_LAST_GPR + 9, + TILEGX_FAULTNUM_REGNUM, +}; + +/* + * Generate a breakpoint exception to "break" into the debugger. + */ +static inline void arch_kgdb_breakpoint(void) +{ + asm volatile (".quad %0\n\t" + ::""(compiled_bpt)); +} + +#endif /* __TILE_KGDB_H__ */ diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h index 3d0f2024626..92b28e3e997 100644 --- a/arch/tile/include/asm/kmap_types.h +++ b/arch/tile/include/asm/kmap_types.h @@ -23,35 +23,6 @@ * adds 4MB of required address-space. For now we leave KM_TYPE_NR * set to depth 8. */ -enum km_type { - KM_TYPE_NR = 8 -}; - -/* - * We provide dummy definitions of all the stray values that used to be - * required for kmap_atomic() and no longer are. - */ -enum { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_SYNC_ICACHE, - KM_SYNC_DCACHE, - KM_UML_USERCOPY, - KM_IRQ_PTE, - KM_NMI, - KM_NMI_PTE, - KM_KDB -}; +#define KM_TYPE_NR 8 #endif /* _ASM_TILE_KMAP_TYPES_H */ diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h new file mode 100644 index 00000000000..d8f9a83943b --- /dev/null +++ b/arch/tile/include/asm/kprobes.h @@ -0,0 +1,79 @@ +/* + * arch/tile/include/asm/kprobes.h + * + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_KPROBES_H +#define _ASM_TILE_KPROBES_H + +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> + +#include <arch/opcode.h> + +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 2 + +#define kretprobe_blacklist_size 0 + +typedef tile_bundle_bits kprobe_opcode_t; + +#define flush_insn_slot(p) \ + flush_icache_range((unsigned long)p->addr, \ + (unsigned long)p->addr + \ + (MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) + +struct kprobe; + +/* Architecture specific copy of original instruction. */ +struct arch_specific_insn { + kprobe_opcode_t *insn; +}; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; + unsigned long saved_pc; +}; + +#define MAX_JPROBES_STACK_SIZE 128 +#define MAX_JPROBES_STACK_ADDR \ + (((unsigned long)current_thread_info()) + THREAD_SIZE - 32 \ + - sizeof(struct pt_regs)) + +#define MIN_JPROBES_STACK_SIZE(ADDR) \ + ((((ADDR) + MAX_JPROBES_STACK_SIZE) > MAX_JPROBES_STACK_ADDR) \ + ? MAX_JPROBES_STACK_ADDR - (ADDR) \ + : MAX_JPROBES_STACK_SIZE) + +/* per-cpu kprobe control block. */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_saved_pc; + unsigned long jprobe_saved_sp; + struct prev_kprobe prev_kprobe; + struct pt_regs jprobe_saved_regs; + char jprobes_stack[MAX_JPROBES_STACK_SIZE]; +}; + +extern tile_bundle_bits breakpoint2_insn; +extern tile_bundle_bits breakpoint_insn; + +void arch_remove_kprobe(struct kprobe *); + +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + +#endif /* _ASM_TILE_KPROBES_H */ diff --git a/arch/tile/include/asm/memprof.h b/arch/tile/include/asm/memprof.h deleted file mode 100644 index 359949be28c..00000000000 --- a/arch/tile/include/asm/memprof.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * The hypervisor's memory controller profiling infrastructure allows - * the programmer to find out what fraction of the available memory - * bandwidth is being consumed at each memory controller. The - * profiler provides start, stop, and clear operations to allows - * profiling over a specific time window, as well as an interface for - * reading the most recent profile values. - * - * This header declares IOCTL codes necessary to control memprof. - */ -#ifndef _ASM_TILE_MEMPROF_H -#define _ASM_TILE_MEMPROF_H - -#include <linux/ioctl.h> - -#define MEMPROF_IOCTL_TYPE 0xB4 -#define MEMPROF_IOCTL_START _IO(MEMPROF_IOCTL_TYPE, 0) -#define MEMPROF_IOCTL_STOP _IO(MEMPROF_IOCTL_TYPE, 1) -#define MEMPROF_IOCTL_CLEAR _IO(MEMPROF_IOCTL_TYPE, 2) - -#endif /* _ASM_TILE_MEMPROF_H */ diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h index 92f94c77b6e..0cab1182bde 100644 --- a/arch/tile/include/asm/mmu.h +++ b/arch/tile/include/asm/mmu.h @@ -21,7 +21,8 @@ struct mm_context { * Written under the mmap_sem semaphore; read without the * semaphore but atomically, but it is conservatively set. */ - unsigned int priority_cached; + unsigned long priority_cached; + unsigned long vdso_base; }; typedef struct mm_context mm_context_t; diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h index 15fb2464112..4734215e2ad 100644 --- a/arch/tile/include/asm/mmu_context.h +++ b/arch/tile/include/asm/mmu_context.h @@ -30,18 +30,22 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) return 0; } -/* Note that arch/tile/kernel/head.S also calls hv_install_context() */ +/* + * Note that arch/tile/kernel/head_NN.S and arch/tile/mm/migrate_NN.S + * also call hv_install_context(). + */ static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot) { /* FIXME: DIRECTIO should not always be set. FIXME. */ - int rc = hv_install_context(__pa(pgdir), prot, asid, HV_CTX_DIRECTIO); + int rc = hv_install_context(__pa(pgdir), prot, asid, + HV_CTX_DIRECTIO | CTX_PAGE_FLAG); if (rc < 0) panic("hv_install_context failed: %d", rc); } static inline void install_page_table(pgd_t *pgdir, int asid) { - pte_t *ptep = virt_to_pte(NULL, (unsigned long)pgdir); + pte_t *ptep = virt_to_kpte((unsigned long)pgdir); __install_page_table(pgdir, asid, *ptep); } diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h index 9d3dbce8f95..804f1098b6c 100644 --- a/arch/tile/include/asm/mmzone.h +++ b/arch/tile/include/asm/mmzone.h @@ -42,7 +42,7 @@ static inline int pfn_to_nid(unsigned long pfn) #define kern_addr_valid(kaddr) virt_addr_valid((void *)kaddr) -static inline int pfn_valid(int pfn) +static inline int pfn_valid(unsigned long pfn) { int nid = pfn_to_nid(pfn); diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h new file mode 100644 index 00000000000..44ed07ccd3d --- /dev/null +++ b/arch/tile/include/asm/module.h @@ -0,0 +1,40 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_MODULE_H +#define _ASM_TILE_MODULE_H + +#include <arch/chip.h> + +#include <asm-generic/module.h> + +/* We can't use modules built with different page sizes. */ +#if defined(CONFIG_PAGE_SIZE_16KB) +# define MODULE_PGSZ " 16KB" +#elif defined(CONFIG_PAGE_SIZE_64KB) +# define MODULE_PGSZ " 64KB" +#else +# define MODULE_PGSZ "" +#endif + +/* We don't really support no-SMP so tag if someone tries. */ +#ifdef CONFIG_SMP +#define MODULE_NOSMP "" +#else +#define MODULE_NOSMP " nosmp" +#endif + +#define MODULE_ARCH_VERMAGIC CHIP_ARCH_NAME MODULE_PGSZ MODULE_NOSMP + +#endif /* _ASM_TILE_MODULE_H */ diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index db93518fac0..67276800861 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -20,8 +20,17 @@ #include <arch/chip.h> /* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */ -#define PAGE_SHIFT HV_LOG2_PAGE_SIZE_SMALL -#define HPAGE_SHIFT HV_LOG2_PAGE_SIZE_LARGE +#if defined(CONFIG_PAGE_SIZE_16KB) +#define PAGE_SHIFT 14 +#define CTX_PAGE_FLAG HV_CTX_PG_SM_16K +#elif defined(CONFIG_PAGE_SIZE_64KB) +#define PAGE_SHIFT 16 +#define CTX_PAGE_FLAG HV_CTX_PG_SM_64K +#else +#define PAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_SMALL +#define CTX_PAGE_FLAG 0 +#endif +#define HPAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_LARGE #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) @@ -30,6 +39,12 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1)) /* + * We do define AT_SYSINFO_EHDR to support vDSO, + * but don't use the gate mechanism. + */ +#define __HAVE_ARCH_GATE_AREA 1 + +/* * If the Kconfig doesn't specify, set a maximum zone order that * is enough so that we can create huge pages from small pages given * the respective sizes of the two page types. See <linux/mmzone.h>. @@ -78,8 +93,7 @@ typedef HV_PTE pgprot_t; /* * User L2 page tables are managed as one L2 page table per page, * because we use the page allocator for them. This keeps the allocation - * simple and makes it potentially useful to implement HIGHPTE at some point. - * However, it's also inefficient, since L2 page tables are much smaller + * simple, but it's also inefficient, since L2 page tables are much smaller * than pages (currently 2KB vs 64KB). So we should revisit this. */ typedef struct page *pgtable_t; @@ -128,14 +142,18 @@ static inline __attribute_const__ int get_order(unsigned long size) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#define HUGE_MAX_HSTATE 2 +#define HUGE_MAX_HSTATE 6 #ifdef CONFIG_HUGETLB_PAGE #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif +/* Allow overriding how much VA or PA the kernel will use. */ +#define MAX_PA_WIDTH CHIP_PA_WIDTH() +#define MAX_VA_WIDTH CHIP_VA_WIDTH() + /* Each memory controller has PAs distinct in their high bits. */ -#define NR_PA_HIGHBIT_SHIFT (CHIP_PA_WIDTH() - CHIP_LOG_NUM_MSHIMS()) +#define NR_PA_HIGHBIT_SHIFT (MAX_PA_WIDTH - CHIP_LOG_NUM_MSHIMS()) #define NR_PA_HIGHBIT_VALUES (1 << CHIP_LOG_NUM_MSHIMS()) #define __pa_to_highbits(pa) ((phys_addr_t)(pa) >> NR_PA_HIGHBIT_SHIFT) #define __pfn_to_highbits(pfn) ((pfn) >> (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT)) @@ -146,7 +164,7 @@ static inline __attribute_const__ int get_order(unsigned long size) * We reserve the lower half of memory for user-space programs, and the * upper half for system code. We re-map all of physical memory in the * upper half, which takes a quarter of our VA space. Then we have - * the vmalloc regions. The supervisor code lives at 0xfffffff700000000, + * the vmalloc regions. The supervisor code lives at the highest address, * with the hypervisor above that. * * Loadable kernel modules are placed immediately after the static @@ -158,27 +176,18 @@ static inline __attribute_const__ int get_order(unsigned long size) * Similarly, for now we don't play any struct page mapping games. */ -#if CHIP_PA_WIDTH() + 2 > CHIP_VA_WIDTH() +#if MAX_PA_WIDTH + 2 > MAX_VA_WIDTH # error Too much PA to map with the VA available! #endif -#define HALF_VA_SPACE (_AC(1, UL) << (CHIP_VA_WIDTH() - 1)) - -#define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */ -#define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */ -#define PAGE_OFFSET MEM_HIGH_START -#define _VMALLOC_START _AC(0xfffffff500000000, UL) /* 4 GB */ -#define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */ -#define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */ -#define MEM_SV_INTRPT MEM_SV_START -#define MEM_MODULE_START _AC(0xfffffff710000000, UL) /* 256 MB */ -#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024)) -#define MEM_HV_START _AC(0xfffffff800000000, UL) /* 32 GB */ - -/* Highest DTLB address we will use */ -#define KERNEL_HIGH_VADDR MEM_SV_START -/* Since we don't currently provide any fixmaps, we use an impossible VA. */ -#define FIXADDR_TOP MEM_HV_START +#define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1))) +#define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */ +#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ +#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ +#define _VMALLOC_START FIXADDR_TOP +#define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */ +#define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */ +#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024)) #else /* !__tilegx__ */ @@ -200,25 +209,18 @@ static inline __attribute_const__ int get_order(unsigned long size) * values, and after that, we show "typical" values, since the actual * addresses depend on kernel #defines. * - * MEM_HV_INTRPT 0xfe000000 - * MEM_SV_INTRPT (kernel code) 0xfd000000 + * MEM_HV_START 0xfe000000 + * MEM_SV_START (kernel code) 0xfd000000 * MEM_USER_INTRPT (user vector) 0xfc000000 - * FIX_KMAP_xxx 0xf8000000 (via NR_CPUS * KM_TYPE_NR) - * PKMAP_BASE 0xf7000000 (via LAST_PKMAP) - * HUGE_VMAP 0xf3000000 (via CONFIG_NR_HUGE_VMAPS) - * VMALLOC_START 0xf0000000 (via __VMALLOC_RESERVE) + * FIX_KMAP_xxx 0xfa000000 (via NR_CPUS * KM_TYPE_NR) + * PKMAP_BASE 0xf9000000 (via LAST_PKMAP) + * VMALLOC_START 0xf7000000 (via VMALLOC_RESERVE) * mapped LOWMEM 0xc0000000 */ #define MEM_USER_INTRPT _AC(0xfc000000, UL) -#if CONFIG_KERNEL_PL == 1 -#define MEM_SV_INTRPT _AC(0xfd000000, UL) -#define MEM_HV_INTRPT _AC(0xfe000000, UL) -#else -#define MEM_GUEST_INTRPT _AC(0xfd000000, UL) -#define MEM_SV_INTRPT _AC(0xfe000000, UL) -#define MEM_HV_INTRPT _AC(0xff000000, UL) -#endif +#define MEM_SV_START _AC(0xfd000000, UL) +#define MEM_HV_START _AC(0xfe000000, UL) #define INTRPT_SIZE 0x4000 @@ -239,7 +241,7 @@ static inline __attribute_const__ int get_order(unsigned long size) #endif /* __tilegx__ */ -#ifndef __ASSEMBLY__ +#if !defined(__ASSEMBLY__) && !defined(VDSO_BUILD) #ifdef CONFIG_HIGHMEM @@ -325,6 +327,7 @@ static inline int pfn_valid(unsigned long pfn) struct mm_struct; extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); +extern pte_t *virt_to_kpte(unsigned long kaddr); #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index 5d5a635530b..dfedd7ac729 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -15,9 +15,12 @@ #ifndef _ASM_TILE_PCI_H #define _ASM_TILE_PCI_H +#include <linux/dma-mapping.h> #include <linux/pci.h> #include <asm-generic/pci_iomap.h> +#ifndef __tilegx__ + /* * Structure of a PCI controller (host bridge) */ @@ -25,7 +28,6 @@ struct pci_controller { int index; /* PCI domain number */ struct pci_bus *root_bus; - int first_busno; int last_busno; int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */ @@ -41,20 +43,161 @@ struct pci_controller { }; /* + * This flag tells if the platform is TILEmpower that needs + * special configuration for the PLX switch chip. + */ +extern int tile_plx_gen1; + +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} + +#define TILE_NUM_PCIE 2 + +/* * The hypervisor maps the entirety of CPA-space as bus addresses, so * bus addresses are physical addresses. The networking and block * device layers use this boolean for bounce buffer decisions. */ #define PCI_DMA_BUS_IS_PHYS 1 -int __devinit tile_pci_init(void); -int __devinit pcibios_init(void); +/* generic pci stuff */ +#include <asm-generic/pci.h> -static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} +#else -void __devinit pcibios_fixup_bus(struct pci_bus *bus); +#include <asm/page.h> +#include <gxio/trio.h> -#define TILE_NUM_PCIE 2 +/** + * We reserve the hugepage-size address range at the top of the 64-bit address + * space to serve as the PCI window, emulating the BAR0 space of an endpoint + * device. This window is used by the chip-to-chip applications running on + * the RC node. The reason for carving out this window is that Mem-Maps that + * back up this window will not overlap with those that map the real physical + * memory. + */ +#define PCIE_HOST_BAR0_SIZE HPAGE_SIZE +#define PCIE_HOST_BAR0_START HPAGE_MASK + +/** + * The first PAGE_SIZE of the above "BAR" window is mapped to the + * gxpci_host_regs structure. + */ +#define PCIE_HOST_REGS_SIZE PAGE_SIZE + +/* + * This is the PCI address where the Mem-Map interrupt regions start. + * We use the 2nd to the last huge page of the 64-bit address space. + * The last huge page is used for the rootcomplex "bar", for C2C purpose. + */ +#define MEM_MAP_INTR_REGIONS_BASE (HPAGE_MASK - HPAGE_SIZE) + +/* + * Each Mem-Map interrupt region occupies 4KB. + */ +#define MEM_MAP_INTR_REGION_SIZE (1 << TRIO_MAP_MEM_LIM__ADDR_SHIFT) + +/* + * Allocate the PCI BAR window right below 4GB. + */ +#define TILE_PCI_BAR_WINDOW_TOP (1ULL << 32) + +/* + * Allocate 1GB for the PCI BAR window. + */ +#define TILE_PCI_BAR_WINDOW_SIZE (1 << 30) + +/* + * This is the highest bus address targeting the host memory that + * can be generated by legacy PCI devices with 32-bit or less + * DMA capability, dictated by the BAR window size and location. + */ +#define TILE_PCI_MAX_DIRECT_DMA_ADDRESS \ + (TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE - 1) + +/* + * We shift the PCI bus range for all the physical memory up by the whole PA + * range. The corresponding CPA of an incoming PCI request will be the PCI + * address minus TILE_PCI_MEM_MAP_BASE_OFFSET. This also implies + * that the 64-bit capable devices will be given DMA addresses as + * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit + * devices, we create a separate map region that handles the low + * 4GB. + * + * This design lets us avoid the "PCI hole" problem where the host bridge + * won't pass DMA traffic with target addresses that happen to fall within the + * BAR space. This enables us to use all the physical memory for DMA, instead + * of wasting the same amount of physical memory as the BAR window size. + */ +#define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH()) + +/* + * Start of the PCI memory resource, which starts at the end of the + * maximum system physical RAM address. + */ +#define TILE_PCI_MEM_START (1ULL << CHIP_PA_WIDTH()) + +/* + * Structure of a PCI controller (host bridge) on Gx. + */ +struct pci_controller { + + /* Pointer back to the TRIO that this PCIe port is connected to. */ + gxio_trio_context_t *trio; + int mac; /* PCIe mac index on the TRIO shim */ + int trio_index; /* Index of TRIO shim that contains the MAC. */ + + int pio_mem_index; /* PIO region index for memory access */ + +#ifdef CONFIG_TILE_PCI_IO + int pio_io_index; /* PIO region index for I/O space access */ +#endif + + /* + * Mem-Map regions for all the memory controllers so that Linux can + * map all of its physical memory space to the PCI bus. + */ + int mem_maps[MAX_NUMNODES]; + + int index; /* PCI domain number */ + struct pci_bus *root_bus; + + /* PCI I/O space resource for this controller. */ + struct resource io_space; + char io_space_name[32]; + + /* PCI memory space resource for this controller. */ + struct resource mem_space; + char mem_space_name[32]; + + uint64_t mem_offset; /* cpu->bus memory mapping offset. */ + + int first_busno; + + struct pci_ops *ops; + + /* Table that maps the INTx numbers to Linux irq numbers. */ + int irq_intx_table[4]; +}; + +extern struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES]; +extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO]; +extern int num_trio_shims; + +extern void pci_iounmap(struct pci_dev *dev, void __iomem *); + +/* + * The PCI address space does not equal the physical memory address + * space (we have an IOMMU). The IDE and SCSI device layers use this + * boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS 0 + +#endif /* __tilegx__ */ + +int __init tile_pci_init(void); +int __init pcibios_init(void); + +void pcibios_fixup_bus(struct pci_bus *bus); #define pci_domain_nr(bus) (((struct pci_controller *)(bus)->sysdata)->index) @@ -77,13 +220,8 @@ static inline int pcibios_assign_all_busses(void) } #define PCIBIOS_MIN_MEM 0 -#define PCIBIOS_MIN_IO 0 - -/* - * This flag tells if the platform is TILEmpower that needs - * special configuration for the PLX switch chip. - */ -extern int tile_plx_gen1; +/* Minimum PCI I/O address, starting at the page boundary. */ +#define PCIBIOS_MIN_IO PAGE_SIZE /* Use any cpu for PCI. */ #define cpumask_of_pcibus(bus) cpu_online_mask @@ -91,7 +229,4 @@ extern int tile_plx_gen1; /* implement the pci_ DMA API in terms of the generic device dma_ one */ #include <asm-generic/pci-dma-compat.h> -/* generic pci stuff */ -#include <asm-generic/pci.h> - #endif /* _ASM_TILE_PCI_H */ diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h index 63294f5a8ef..4f7ae39fa20 100644 --- a/arch/tile/include/asm/percpu.h +++ b/arch/tile/include/asm/percpu.h @@ -15,9 +15,37 @@ #ifndef _ASM_TILE_PERCPU_H #define _ASM_TILE_PERCPU_H -register unsigned long __my_cpu_offset __asm__("tp"); -#define __my_cpu_offset __my_cpu_offset -#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp)) +register unsigned long my_cpu_offset_reg asm("tp"); + +#ifdef CONFIG_PREEMPT +/* + * For full preemption, we can't just use the register variable + * directly, since we need barrier() to hazard against it, causing the + * compiler to reload anything computed from a previous "tp" value. + * But we also don't want to use volatile asm, since we'd like the + * compiler to be able to cache the value across multiple percpu reads. + * So we use a fake stack read as a hazard against barrier(). + * The 'U' constraint is like 'm' but disallows postincrement. + */ +static inline unsigned long __my_cpu_offset(void) +{ + unsigned long tp; + register unsigned long *sp asm("sp"); + asm("move %0, tp" : "=r" (tp) : "U" (*sp)); + return tp; +} +#define __my_cpu_offset __my_cpu_offset() +#else +/* + * We don't need to hazard against barrier() since "tp" doesn't ever + * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only + * changes at function call points, at which we are already re-reading + * the value of "tp" due to "my_cpu_offset_reg" being a global variable. + */ +#define __my_cpu_offset my_cpu_offset_reg +#endif + +#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp)) #include <asm-generic/percpu.h> diff --git a/arch/tile/include/asm/perf_event.h b/arch/tile/include/asm/perf_event.h new file mode 100644 index 00000000000..59c5b164e5b --- /dev/null +++ b/arch/tile/include/asm/perf_event.h @@ -0,0 +1,22 @@ +/* + * Copyright 2014 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_PERF_EVENT_H +#define _ASM_TILE_PERF_EVENT_H + +#include <linux/percpu.h> +DECLARE_PER_CPU(u64, perf_irqs); + +unsigned long handle_syscall_link_address(void); +#endif /* _ASM_TILE_PERF_EVENT_H */ diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h index e919c0bdc22..1b902508b66 100644 --- a/arch/tile/include/asm/pgalloc.h +++ b/arch/tile/include/asm/pgalloc.h @@ -19,24 +19,24 @@ #include <linux/mm.h> #include <linux/mmzone.h> #include <asm/fixmap.h> +#include <asm/page.h> #include <hv/hypervisor.h> /* Bits for the size of the second-level page table. */ -#define L2_KERNEL_PGTABLE_SHIFT \ - (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL + HV_LOG2_PTE_SIZE) +#define L2_KERNEL_PGTABLE_SHIFT _HV_LOG2_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT) + +/* How big is a kernel L2 page table? */ +#define L2_KERNEL_PGTABLE_SIZE (1UL << L2_KERNEL_PGTABLE_SHIFT) /* We currently allocate user L2 page tables by page (unlike kernel L2s). */ -#if L2_KERNEL_PGTABLE_SHIFT < HV_LOG2_PAGE_SIZE_SMALL -#define L2_USER_PGTABLE_SHIFT HV_LOG2_PAGE_SIZE_SMALL +#if L2_KERNEL_PGTABLE_SHIFT < PAGE_SHIFT +#define L2_USER_PGTABLE_SHIFT PAGE_SHIFT #else #define L2_USER_PGTABLE_SHIFT L2_KERNEL_PGTABLE_SHIFT #endif /* How many pages do we need, as an "order", for a user L2 page table? */ -#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - HV_LOG2_PAGE_SIZE_SMALL) - -/* How big is a kernel L2 page table? */ -#define L2_KERNEL_PGTABLE_SIZE (1 << L2_KERNEL_PGTABLE_SHIFT) +#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - PAGE_SHIFT) static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { @@ -50,14 +50,14 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *ptep) { - set_pmd(pmd, ptfn_pmd(__pa(ptep) >> HV_LOG2_PAGE_TABLE_ALIGN, + set_pmd(pmd, ptfn_pmd(HV_CPA_TO_PTFN(__pa(ptep)), __pgprot(_PAGE_PRESENT))); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page) { - set_pmd(pmd, ptfn_pmd(HV_PFN_TO_PTFN(page_to_pfn(page)), + set_pmd(pmd, ptfn_pmd(HV_CPA_TO_PTFN(PFN_PHYS(page_to_pfn(page))), __pgprot(_PAGE_PRESENT))); } @@ -68,8 +68,20 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address); -extern void pte_free(struct mm_struct *mm, struct page *pte); +extern pgtable_t pgtable_alloc_one(struct mm_struct *mm, unsigned long address, + int order); +extern void pgtable_free(struct mm_struct *mm, struct page *pte, int order); + +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + return pgtable_alloc_one(mm, address, L2_USER_PGTABLE_ORDER); +} + +static inline void pte_free(struct mm_struct *mm, struct page *pte) +{ + pgtable_free(mm, pte, L2_USER_PGTABLE_ORDER); +} #define pmd_pgtable(pmd) pmd_page(pmd) @@ -85,8 +97,13 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) pte_free(mm, virt_to_page(pte)); } -extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, - unsigned long address); +extern void __pgtable_free_tlb(struct mmu_gather *tlb, struct page *pte, + unsigned long address, int order); +static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, + unsigned long address) +{ + __pgtable_free_tlb(tlb, pte, address, L2_USER_PGTABLE_ORDER); +} #define check_pgt_cache() do { } while (0) @@ -104,19 +121,44 @@ void shatter_pmd(pmd_t *pmd); void shatter_huge_page(unsigned long addr); #ifdef __tilegx__ -/* We share a single page allocator for both L1 and L2 page tables. */ -#if HV_L1_SIZE != HV_L2_SIZE -# error Rework assumption that L1 and L2 page tables are same size. -#endif -#define L1_USER_PGTABLE_ORDER L2_USER_PGTABLE_ORDER + #define pud_populate(mm, pud, pmd) \ pmd_populate_kernel((mm), (pmd_t *)(pud), (pte_t *)(pmd)) -#define pmd_alloc_one(mm, addr) \ - ((pmd_t *)page_to_virt(pte_alloc_one((mm), (addr)))) -#define pmd_free(mm, pmdp) \ - pte_free((mm), virt_to_page(pmdp)) -#define __pmd_free_tlb(tlb, pmdp, address) \ - __pte_free_tlb((tlb), virt_to_page(pmdp), (address)) + +/* Bits for the size of the L1 (intermediate) page table. */ +#define L1_KERNEL_PGTABLE_SHIFT _HV_LOG2_L1_SIZE(HPAGE_SHIFT) + +/* How big is a kernel L2 page table? */ +#define L1_KERNEL_PGTABLE_SIZE (1UL << L1_KERNEL_PGTABLE_SHIFT) + +/* We currently allocate L1 page tables by page. */ +#if L1_KERNEL_PGTABLE_SHIFT < PAGE_SHIFT +#define L1_USER_PGTABLE_SHIFT PAGE_SHIFT +#else +#define L1_USER_PGTABLE_SHIFT L1_KERNEL_PGTABLE_SHIFT #endif +/* How many pages do we need, as an "order", for an L1 page table? */ +#define L1_USER_PGTABLE_ORDER (L1_USER_PGTABLE_SHIFT - PAGE_SHIFT) + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *p = pgtable_alloc_one(mm, address, L1_USER_PGTABLE_ORDER); + return (pmd_t *)page_to_virt(p); +} + +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp) +{ + pgtable_free(mm, virt_to_page(pmdp), L1_USER_PGTABLE_ORDER); +} + +static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, + unsigned long address) +{ + __pgtable_free_tlb(tlb, virt_to_page(pmdp), address, + L1_USER_PGTABLE_ORDER); +} + +#endif /* __tilegx__ */ + #endif /* _ASM_TILE_PGALLOC_H */ diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index 1a20b7ef8ea..33587f16c15 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -27,9 +27,10 @@ #include <linux/slab.h> #include <linux/list.h> #include <linux/spinlock.h> +#include <linux/pfn.h> #include <asm/processor.h> #include <asm/fixmap.h> -#include <asm/system.h> +#include <asm/page.h> struct mm_struct; struct vm_area_struct; @@ -70,6 +71,7 @@ extern void set_page_homes(void); #define _PAGE_PRESENT HV_PTE_PRESENT #define _PAGE_HUGE_PAGE HV_PTE_PAGE +#define _PAGE_SUPER_PAGE HV_PTE_SUPER #define _PAGE_READABLE HV_PTE_READABLE #define _PAGE_WRITABLE HV_PTE_WRITABLE #define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE @@ -86,6 +88,7 @@ extern void set_page_homes(void); #define _PAGE_ALL (\ _PAGE_PRESENT | \ _PAGE_HUGE_PAGE | \ + _PAGE_SUPER_PAGE | \ _PAGE_READABLE | \ _PAGE_WRITABLE | \ _PAGE_EXECUTABLE | \ @@ -163,7 +166,7 @@ extern void set_page_homes(void); (pgprot_t) { ((oldprot).val & ~_PAGE_ALL) | (newprot).val } /* Just setting the PFN to zero suffices. */ -#define pte_pgprot(x) hv_pte_set_pfn((x), 0) +#define pte_pgprot(x) hv_pte_set_pa((x), 0) /* * For PTEs and PDEs, we must clear the Present bit first when @@ -188,6 +191,7 @@ static inline void __pte_clear(pte_t *ptep) * Undefined behaviour if not.. */ #define pte_present hv_pte_get_present +#define pte_mknotpresent hv_pte_clear_present #define pte_user hv_pte_get_user #define pte_read hv_pte_get_readable #define pte_dirty hv_pte_get_dirty @@ -195,6 +199,7 @@ static inline void __pte_clear(pte_t *ptep) #define pte_write hv_pte_get_writable #define pte_exec hv_pte_get_executable #define pte_huge hv_pte_get_page +#define pte_super hv_pte_get_super #define pte_rdprotect hv_pte_clear_readable #define pte_exprotect hv_pte_clear_executable #define pte_mkclean hv_pte_clear_dirty @@ -207,6 +212,7 @@ static inline void __pte_clear(pte_t *ptep) #define pte_mkyoung hv_pte_set_accessed #define pte_mkwrite hv_pte_set_writable #define pte_mkhuge hv_pte_set_page +#define pte_mksuper hv_pte_set_super #define pte_special(pte) 0 #define pte_mkspecial(pte) (pte) @@ -262,7 +268,7 @@ static inline int pte_none(pte_t pte) static inline unsigned long pte_pfn(pte_t pte) { - return hv_pte_get_pfn(pte); + return PFN_DOWN(hv_pte_get_pa(pte)); } /* Set or get the remote cache cpu in a pgprot with remote caching. */ @@ -271,7 +277,7 @@ extern int get_remote_cache_cpu(pgprot_t prot); static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) { - return hv_pte_set_pfn(prot, pfn); + return hv_pte_set_pa(prot, PFN_PHYS(pfn)); } /* Support for priority mappings. */ @@ -313,7 +319,7 @@ extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next); */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - return pfn_pte(hv_pte_get_pfn(pte), newprot); + return pfn_pte(pte_pfn(pte), newprot); } /* @@ -336,13 +342,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) -#if defined(CONFIG_HIGHPTE) -extern pte_t *pte_offset_map(pmd_t *, unsigned long address); -#define pte_unmap(pte) kunmap_atomic(pte) -#else #define pte_offset_map(dir, address) pte_offset_kernel(dir, address) #define pte_unmap(pte) do { } while (0) -#endif /* Clear a non-executable kernel PTE and flush it from the TLB. */ #define kpte_clear_flush(ptep, vaddr) \ @@ -361,9 +362,6 @@ do { \ #define kern_addr_valid(addr) (1) #endif /* CONFIG_FLATMEM */ -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - extern void vmalloc_sync_all(void); #endif /* !__ASSEMBLY__ */ @@ -411,6 +409,46 @@ static inline unsigned long pmd_index(unsigned long address) return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); } +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pmd_t *pmdp) +{ + return ptep_test_and_clear_young(vma, address, pmdp_ptep(pmdp)); +} + +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +static inline void pmdp_set_wrprotect(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + ptep_set_wrprotect(mm, address, pmdp_ptep(pmdp)); +} + + +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, + unsigned long address, + pmd_t *pmdp) +{ + return pte_pmd(ptep_get_and_clear(mm, address, pmdp_ptep(pmdp))); +} + +static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + set_pte(pmdp_ptep(pmdp), pmd_pte(pmdval)); +} + +#define set_pmd_at(mm, addr, pmdp, pmdval) __set_pmd(pmdp, pmdval) + +/* Create a pmd from a PTFN. */ +static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) +{ + return pte_pmd(hv_pte_set_ptfn(prot, ptfn)); +} + +/* Return the page-table frame number (ptfn) that a pmd_t points at. */ +#define pmd_ptfn(pmd) hv_pte_get_ptfn(pmd_pte(pmd)) + /* * A given kernel pmd_t maps to a specific virtual address (either a * kernel huge page or a kernel pte_t table). Since kernel pte_t @@ -431,7 +469,48 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) * OK for pte_lockptr(), since we just end up with potentially one * lock being used for several pte_t arrays. */ -#define pmd_page(pmd) pfn_to_page(HV_PTFN_TO_PFN(pmd_ptfn(pmd))) +#define pmd_page(pmd) pfn_to_page(PFN_DOWN(HV_PTFN_TO_CPA(pmd_ptfn(pmd)))) + +static inline void pmd_clear(pmd_t *pmdp) +{ + __pte_clear(pmdp_ptep(pmdp)); +} + +#define pmd_mknotpresent(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd))) +#define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) +#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) +#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd)) +#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) +#define __HAVE_ARCH_PMD_WRITE + +#define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot))) +#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) +#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + return pfn_pmd(pmd_pfn(pmd), newprot); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define has_transparent_hugepage() 1 +#define pmd_trans_huge pmd_huge_page + +static inline pmd_t pmd_mksplitting(pmd_t pmd) +{ + return pte_pmd(hv_pte_set_client2(pmd_pte(pmd))); +} + +static inline int pmd_trans_splitting(pmd_t pmd) +{ + return hv_pte_get_client2(pmd_pte(pmd)); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* * The pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] @@ -449,17 +528,13 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); } -static inline int pmd_huge_page(pmd_t pmd) -{ - return pmd_val(pmd) & _PAGE_HUGE_PAGE; -} - #include <asm-generic/pgtable.h> /* Support /proc/NN/pgtable API. */ struct seq_file; int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm, - unsigned long vaddr, pte_t *ptep, void **datap); + unsigned long vaddr, unsigned long pagesize, + pte_t *ptep, void **datap); #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index 9f98529761f..d26a4227903 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -20,11 +20,12 @@ * The level-1 index is defined by the huge page size. A PGD is composed * of PTRS_PER_PGD pgd_t's and is the top level of the page table. */ -#define PGDIR_SHIFT HV_LOG2_PAGE_SIZE_LARGE -#define PGDIR_SIZE HV_PAGE_SIZE_LARGE +#define PGDIR_SHIFT HPAGE_SHIFT +#define PGDIR_SIZE HPAGE_SIZE #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) -#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t)) +#define PTRS_PER_PGD _HV_L1_ENTRIES(HPAGE_SHIFT) +#define PGD_INDEX(va) _HV_L1_INDEX(va, HPAGE_SHIFT) +#define SIZEOF_PGD _HV_L1_SIZE(HPAGE_SHIFT) /* * The level-2 index is defined by the difference between the huge @@ -33,8 +34,9 @@ * Note that the hypervisor docs use PTE for what we call pte_t, so * this nomenclature is somewhat confusing. */ -#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) -#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t)) +#define PTRS_PER_PTE _HV_L2_ENTRIES(HPAGE_SHIFT, PAGE_SHIFT) +#define PTE_INDEX(va) _HV_L2_INDEX(va, HPAGE_SHIFT, PAGE_SHIFT) +#define SIZEOF_PTE _HV_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT) #ifndef __ASSEMBLY__ @@ -53,17 +55,9 @@ #define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK) #ifdef CONFIG_HIGHMEM -# define __VMAPPING_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) +# define _VMALLOC_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) #else -# define __VMAPPING_END (FIXADDR_START & ~(HPAGE_SIZE-1)) -#endif - -#ifdef CONFIG_HUGEVMAP -#define HUGE_VMAP_END __VMAPPING_END -#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE) -#define _VMALLOC_END HUGE_VMAP_BASE -#else -#define _VMALLOC_END __VMAPPING_END +# define _VMALLOC_END (FIXADDR_START & ~(HPAGE_SIZE-1)) #endif /* @@ -82,10 +76,12 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */; /* We have no pmd or pud since we are strictly a two-level page table */ #include <asm-generic/pgtable-nopmd.h> +static inline int pud_huge_page(pud_t pud) { return 0; } + /* We don't define any pgds for these addresses. */ static inline int pgd_addr_invalid(unsigned long addr) { - return addr >= MEM_HV_INTRPT; + return addr >= MEM_HV_START; } /* @@ -111,24 +107,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, return pte; } -static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval) -{ - set_pte(&pmdp->pud.pgd, pmdval.pud.pgd); -} - -/* Create a pmd from a PTFN. */ -static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) -{ - return (pmd_t){ { hv_pte_set_ptfn(prot, ptfn) } }; -} - -/* Return the page-table frame number (ptfn) that a pmd_t points at. */ -#define pmd_ptfn(pmd) hv_pte_get_ptfn((pmd).pud.pgd) - -static inline void pmd_clear(pmd_t *pmdp) -{ - __pte_clear(&pmdp->pud.pgd); -} +/* + * pmds are wrappers around pgds, which are the same as ptes. + * It's often convenient to "cast" back and forth and use the pte methods, + * which are the methods supplied by the hypervisor. + */ +#define pmd_pte(pmd) ((pmd).pud.pgd) +#define pmdp_ptep(pmdp) (&(pmdp)->pud.pgd) +#define pte_pmd(pte) ((pmd_t){ { (pte) } }) #endif /* __ASSEMBLY__ */ diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index fd80328523b..2c8a9cd102d 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -21,17 +21,19 @@ #define PGDIR_SIZE HV_L1_SPAN #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PTRS_PER_PGD HV_L0_ENTRIES -#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t)) +#define PGD_INDEX(va) HV_L0_INDEX(va) +#define SIZEOF_PGD HV_L0_SIZE /* * The level-1 index is defined by the huge page size. A PMD is composed * of PTRS_PER_PMD pgd_t's and is the middle level of the page table. */ -#define PMD_SHIFT HV_LOG2_PAGE_SIZE_LARGE -#define PMD_SIZE HV_PAGE_SIZE_LARGE +#define PMD_SHIFT HPAGE_SHIFT +#define PMD_SIZE HPAGE_SIZE #define PMD_MASK (~(PMD_SIZE-1)) -#define PTRS_PER_PMD (1 << (PGDIR_SHIFT - PMD_SHIFT)) -#define SIZEOF_PMD (PTRS_PER_PMD * sizeof(pmd_t)) +#define PTRS_PER_PMD _HV_L1_ENTRIES(HPAGE_SHIFT) +#define PMD_INDEX(va) _HV_L1_INDEX(va, HPAGE_SHIFT) +#define SIZEOF_PMD _HV_L1_SIZE(HPAGE_SHIFT) /* * The level-2 index is defined by the difference between the huge @@ -40,25 +42,34 @@ * Note that the hypervisor docs use PTE for what we call pte_t, so * this nomenclature is somewhat confusing. */ -#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) -#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t)) +#define PTRS_PER_PTE _HV_L2_ENTRIES(HPAGE_SHIFT, PAGE_SHIFT) +#define PTE_INDEX(va) _HV_L2_INDEX(va, HPAGE_SHIFT, PAGE_SHIFT) +#define SIZEOF_PTE _HV_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT) /* - * Align the vmalloc area to an L2 page table, and leave a guard page - * at the beginning and end. The vmalloc code also puts in an internal + * Align the vmalloc area to an L2 page table. Omit guard pages at + * the beginning and end for simplicity (particularly in the per-cpu + * memory allocation code). The vmalloc code puts in an internal * guard page between each allocation. */ -#define _VMALLOC_END HUGE_VMAP_BASE -#define VMALLOC_END (_VMALLOC_END - PAGE_SIZE) -#define VMALLOC_START (_VMALLOC_START + PAGE_SIZE) - -#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE) +#define _VMALLOC_END MEM_SV_START +#define VMALLOC_END _VMALLOC_END +#define VMALLOC_START _VMALLOC_START #ifndef __ASSEMBLY__ /* We have no pud since we are a three-level page table. */ #include <asm-generic/pgtable-nopud.h> +/* + * pmds are the same as pgds and ptes, so converting is a no-op. + */ +#define pmd_pte(pmd) (pmd) +#define pmdp_ptep(pmdp) (pmdp) +#define pte_pmd(pte) (pte) + +#define pud_pte(pud) ((pud).pgd) + static inline int pud_none(pud_t pud) { return pud_val(pud) == 0; @@ -69,6 +80,11 @@ static inline int pud_present(pud_t pud) return pud_val(pud) & _PAGE_PRESENT; } +static inline int pud_huge_page(pud_t pud) +{ + return pud_val(pud) & _PAGE_HUGE_PAGE; +} + #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e)) @@ -85,6 +101,9 @@ static inline int pud_bad(pud_t pud) /* Return the page-table frame number (ptfn) that a pud_t points at. */ #define pud_ptfn(pud) hv_pte_get_ptfn((pud).pgd) +/* Return the page frame number (pfn) that a pud_t points at. */ +#define pud_pfn(pud) pte_pfn(pud_pte(pud)) + /* * A given kernel pud_t maps to a kernel pmd_t table at a specific * virtual address. Since kernel pmd_t tables can be aligned at @@ -98,7 +117,7 @@ static inline int pud_bad(pud_t pud) * A pud_t points to a pmd_t array. Since we can have multiple per * page, we don't have a one-to-one mapping of pud_t's to pages. */ -#define pud_page(pud) pfn_to_page(HV_PTFN_TO_PFN(pud_ptfn(pud))) +#define pud_page(pud) pfn_to_page(PFN_DOWN(HV_PTFN_TO_CPA(pud_ptfn(pud)))) static inline unsigned long pud_index(unsigned long address) { @@ -108,28 +127,6 @@ static inline unsigned long pud_index(unsigned long address) #define pmd_offset(pud, address) \ ((pmd_t *)pud_page_vaddr(*(pud)) + pmd_index(address)) -static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval) -{ - set_pte(pmdp, pmdval); -} - -/* Create a pmd from a PTFN and pgprot. */ -static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) -{ - return hv_pte_set_ptfn(prot, ptfn); -} - -/* Return the page-table frame number (ptfn) that a pmd_t points at. */ -static inline unsigned long pmd_ptfn(pmd_t pmd) -{ - return hv_pte_get_ptfn(pmd); -} - -static inline void pmd_clear(pmd_t *pmdp) -{ - __pte_clear(pmdp); -} - /* Normalize an address to having the correct high bits set. */ #define pgd_addr_normalize pgd_addr_normalize static inline unsigned long pgd_addr_normalize(unsigned long addr) @@ -141,8 +138,7 @@ static inline unsigned long pgd_addr_normalize(unsigned long addr) /* We don't define any pgds for these addresses. */ static inline int pgd_addr_invalid(unsigned long addr) { - return addr >= MEM_HV_START || - (addr > MEM_LOW_END && addr < MEM_HIGH_START); + return addr >= KERNEL_HIGH_VADDR || addr != pgd_addr_normalize(addr); } /* diff --git a/arch/tile/include/asm/pmc.h b/arch/tile/include/asm/pmc.h new file mode 100644 index 00000000000..7ae3956d900 --- /dev/null +++ b/arch/tile/include/asm/pmc.h @@ -0,0 +1,64 @@ +/* + * Copyright 2014 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_PMC_H +#define _ASM_TILE_PMC_H + +#include <linux/ptrace.h> + +#define TILE_BASE_COUNTERS 2 + +/* Bitfields below are derived from SPR PERF_COUNT_CTL*/ +#ifndef __tilegx__ +/* PERF_COUNT_CTL on TILEPro */ +#define TILE_CTL_EXCL_USER (1 << 7) /* exclude user level */ +#define TILE_CTL_EXCL_KERNEL (1 << 8) /* exclude kernel level */ +#define TILE_CTL_EXCL_HV (1 << 9) /* exclude hypervisor level */ + +#define TILE_SEL_MASK 0x7f /* 7 bits for event SEL, + COUNT_0_SEL */ +#define TILE_PLM_MASK 0x780 /* 4 bits priv level msks, + COUNT_0_MASK*/ +#define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_PLM_MASK) + +#else /* __tilegx__*/ +/* PERF_COUNT_CTL on TILEGx*/ +#define TILE_CTL_EXCL_USER (1 << 10) /* exclude user level */ +#define TILE_CTL_EXCL_KERNEL (1 << 11) /* exclude kernel level */ +#define TILE_CTL_EXCL_HV (1 << 12) /* exclude hypervisor level */ + +#define TILE_SEL_MASK 0x3f /* 6 bits for event SEL, + COUNT_0_SEL*/ +#define TILE_BOX_MASK 0x1c0 /* 3 bits box msks, + COUNT_0_BOX */ +#define TILE_PLM_MASK 0x3c00 /* 4 bits priv level msks, + COUNT_0_MASK */ +#define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_BOX_MASK | TILE_PLM_MASK) +#endif /* __tilegx__*/ + +/* Takes register and fault number. Returns error to disable the interrupt. */ +typedef int (*perf_irq_t)(struct pt_regs *, int); + +int userspace_perf_handler(struct pt_regs *regs, int fault); + +perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq); +void release_pmc_hardware(void); + +unsigned long pmc_get_overflow(void); +void pmc_ack_overflow(unsigned long status); + +void unmask_pmc_interrupts(void); +void mask_pmc_interrupts(void); + +#endif /* _ASM_TILE_PMC_H */ diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 34c1e01ffb5..42323636c45 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -15,6 +15,8 @@ #ifndef _ASM_TILE_PROCESSOR_H #define _ASM_TILE_PROCESSOR_H +#include <arch/chip.h> + #ifndef __ASSEMBLY__ /* @@ -25,7 +27,6 @@ #include <asm/ptrace.h> #include <asm/percpu.h> -#include <arch/chip.h> #include <arch/spr_def.h> struct task_struct; @@ -76,6 +77,17 @@ struct async_tlb { #ifdef CONFIG_HARDWALL struct hardwall_info; +struct hardwall_task { + /* Which hardwall is this task tied to? (or NULL if none) */ + struct hardwall_info *info; + /* Chains this task into the list at info->task_head. */ + struct list_head list; +}; +#ifdef __tilepro__ +#define HARDWALL_TYPES 1 /* udn */ +#else +#define HARDWALL_TYPES 3 /* udn, idn, and ipi */ +#endif #endif struct thread_struct { @@ -99,47 +111,38 @@ struct thread_struct { unsigned long long interrupt_mask; /* User interrupt-control 0 state */ unsigned long intctrl_0; -#if CHIP_HAS_PROC_STATUS_SPR() + /* Is this task currently doing a backtrace? */ + bool in_backtrace; /* Any other miscellaneous processor state bits */ unsigned long proc_status; -#endif #if !CHIP_HAS_FIXED_INTVEC_BASE() /* Interrupt base for PL0 interrupts */ unsigned long interrupt_vector_base; #endif -#if CHIP_HAS_TILE_RTF_HWM() /* Tile cache retry fifo high-water mark */ unsigned long tile_rtf_hwm; -#endif #if CHIP_HAS_DSTREAM_PF() /* Data stream prefetch control */ unsigned long dstream_pf; #endif #ifdef CONFIG_HARDWALL - /* Is this task tied to an activated hardwall? */ - struct hardwall_info *hardwall; - /* Chains this task into the list at hardwall->list. */ - struct list_head hardwall_list; + /* Hardwall information for various resources. */ + struct hardwall_task hardwall[HARDWALL_TYPES]; #endif #if CHIP_HAS_TILE_DMA() /* Async DMA TLB fault information */ struct async_tlb dma_async_tlb; #endif -#if CHIP_HAS_SN_PROC() - /* Was static network processor when we were switched out? */ - int sn_proc_running; - /* Async SNI TLB fault information */ - struct async_tlb sn_async_tlb; -#endif }; #endif /* !__ASSEMBLY__ */ /* * Start with "sp" this many bytes below the top of the kernel stack. - * This preserves the invariant that a called function may write to *sp. + * This allows us to be cache-aware when handling the initial save + * of the pt_regs value to the stack. */ -#define STACK_TOP_DELTA 8 +#define STACK_TOP_DELTA 64 /* * When entering the kernel via a fault, start with the top of the @@ -155,7 +158,7 @@ struct thread_struct { #ifndef __ASSEMBLY__ #ifdef __tilegx__ -#define TASK_SIZE_MAX (MEM_LOW_END + 1) +#define TASK_SIZE_MAX (_AC(1, UL) << (MAX_VA_WIDTH - 1)) #else #define TASK_SIZE_MAX PAGE_OFFSET #endif @@ -169,10 +172,10 @@ struct thread_struct { #define TASK_SIZE TASK_SIZE_MAX #endif -/* We provide a minimal "vdso" a la x86; just the sigreturn code for now. */ -#define VDSO_BASE (TASK_SIZE - PAGE_SIZE) +#define VDSO_BASE ((unsigned long)current->active_mm->context.vdso_base) +#define VDSO_SYM(x) (VDSO_BASE + (unsigned long)(x)) -#define STACK_TOP VDSO_BASE +#define STACK_TOP TASK_SIZE /* STACK_TOP_MAX is used temporarily in execve and should not check COMPAT. */ #define STACK_TOP_MAX TASK_SIZE_MAX @@ -202,6 +205,7 @@ static inline void start_thread(struct pt_regs *regs, { regs->pc = pc; regs->sp = usp; + single_step_execve(); } /* Free all resources held by a thread. */ @@ -210,35 +214,40 @@ static inline void release_thread(struct task_struct *dead_task) /* Nothing for now */ } -/* Prepare to copy thread state - unlazy all lazy status. */ -#define prepare_to_copy(tsk) do { } while (0) - -extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); - extern int do_work_pending(struct pt_regs *regs, u32 flags); /* * Return saved (kernel) PC of a blocked thread. - * Only used in a printk() in kernel/sched.c, so don't work too hard. + * Only used in a printk() in kernel/sched/core.c, so don't work too hard. */ #define thread_saved_pc(t) ((t)->thread.pc) unsigned long get_wchan(struct task_struct *p); /* Return initial ksp value for given task. */ -#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE) +#define task_ksp0(task) \ + ((unsigned long)(task)->stack + THREAD_SIZE - STACK_TOP_DELTA) /* Return some info about the user process TASK. */ -#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA) #define task_pt_regs(task) \ - ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) + ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) +#define current_pt_regs() \ + ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \ + STACK_TOP_DELTA - (KSTK_PTREGS_GAP - 1)) - 1) #define task_sp(task) (task_pt_regs(task)->sp) #define task_pc(task) (task_pt_regs(task)->pc) /* Aliases for pc and sp (used in fs/proc/array.c) */ #define KSTK_EIP(task) task_pc(task) #define KSTK_ESP(task) task_sp(task) +/* Fine-grained unaligned JIT support */ +#define GET_UNALIGN_CTL(tsk, adr) get_unalign_ctl((tsk), (adr)) +#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val)) + +extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr); +extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); + /* Standard format for printing registers and other word-size data. */ #ifdef __tilegx__ # define REGFMT "0x%016lx" @@ -267,7 +276,6 @@ extern char chip_model[64]; /* Data on which physical memory controller corresponds to which NUMA node. */ extern int node_controller[]; -#if CHIP_HAS_CBOX_HOME_MAP() /* Does the heap allocator return hash-for-home pages by default? */ extern int hash_default; @@ -277,11 +285,6 @@ extern int kstack_hash; /* Does MAP_ANONYMOUS return hash-for-home pages by default? */ #define uheap_hash hash_default -#else -#define hash_default 0 -#define kstack_hash 0 -#define uheap_hash 0 -#endif /* Are we using huge pages in the TLB for kernel data? */ extern int kdata_huge; @@ -329,7 +332,6 @@ extern int kdata_huge; /* * Provide symbolic constants for PLs. - * Note that assembly code assumes that USER_PL is zero. */ #define USER_PL 0 #if CONFIG_KERNEL_PL == 2 @@ -338,20 +340,38 @@ extern int kdata_huge; #define KERNEL_PL CONFIG_KERNEL_PL /* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */ -#define CPU_LOG_MASK_VALUE 12 -#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1) -#if CONFIG_NR_CPUS > CPU_MASK_VALUE -# error Too many cpus! +#ifdef __tilegx__ +#define CPU_SHIFT 48 +#if CHIP_VA_WIDTH() > CPU_SHIFT +# error Too many VA bits! #endif +#define MAX_CPU_ID ((1 << (64 - CPU_SHIFT)) - 1) #define raw_smp_processor_id() \ - ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE) + ((int)(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) >> CPU_SHIFT)) #define get_current_ksp0() \ - (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE) + ((unsigned long)(((long)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) << \ + (64 - CPU_SHIFT)) >> (64 - CPU_SHIFT))) +#define next_current_ksp0(task) ({ \ + unsigned long __ksp0 = task_ksp0(task) & ((1UL << CPU_SHIFT) - 1); \ + unsigned long __cpu = (long)raw_smp_processor_id() << CPU_SHIFT; \ + __ksp0 | __cpu; \ +}) +#else +#define LOG2_NR_CPU_IDS 6 +#define MAX_CPU_ID ((1 << LOG2_NR_CPU_IDS) - 1) +#define raw_smp_processor_id() \ + ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & MAX_CPU_ID) +#define get_current_ksp0() \ + (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~MAX_CPU_ID) #define next_current_ksp0(task) ({ \ unsigned long __ksp0 = task_ksp0(task); \ int __cpu = raw_smp_processor_id(); \ - BUG_ON(__ksp0 & CPU_MASK_VALUE); \ + BUG_ON(__ksp0 & MAX_CPU_ID); \ __ksp0 | __cpu; \ }) +#endif +#if CONFIG_NR_CPUS > (MAX_CPU_ID + 1) +# error Too many cpus! +#endif #endif /* _ASM_TILE_PROCESSOR_H */ diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h index c6cddd7e8d5..b9620c077ab 100644 --- a/arch/tile/include/asm/ptrace.h +++ b/arch/tile/include/asm/ptrace.h @@ -11,87 +11,20 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. */ - #ifndef _ASM_TILE_PTRACE_H #define _ASM_TILE_PTRACE_H -#include <arch/chip.h> -#include <arch/abi.h> - -/* These must match struct pt_regs, below. */ -#if CHIP_WORD_SIZE() == 32 -#define PTREGS_OFFSET_REG(n) ((n)*4) -#else -#define PTREGS_OFFSET_REG(n) ((n)*8) -#endif -#define PTREGS_OFFSET_BASE 0 -#define PTREGS_OFFSET_TP PTREGS_OFFSET_REG(53) -#define PTREGS_OFFSET_SP PTREGS_OFFSET_REG(54) -#define PTREGS_OFFSET_LR PTREGS_OFFSET_REG(55) -#define PTREGS_NR_GPRS 56 -#define PTREGS_OFFSET_PC PTREGS_OFFSET_REG(56) -#define PTREGS_OFFSET_EX1 PTREGS_OFFSET_REG(57) -#define PTREGS_OFFSET_FAULTNUM PTREGS_OFFSET_REG(58) -#define PTREGS_OFFSET_ORIG_R0 PTREGS_OFFSET_REG(59) -#define PTREGS_OFFSET_FLAGS PTREGS_OFFSET_REG(60) -#if CHIP_HAS_CMPEXCH() -#define PTREGS_OFFSET_CMPEXCH PTREGS_OFFSET_REG(61) -#endif -#define PTREGS_SIZE PTREGS_OFFSET_REG(64) +#include <linux/compiler.h> #ifndef __ASSEMBLY__ - -#ifdef __KERNEL__ /* Benefit from consistent use of "long" on all chips. */ typedef unsigned long pt_reg_t; -#else -/* Provide appropriate length type to userspace regardless of -m32/-m64. */ -typedef uint_reg_t pt_reg_t; -#endif - -/* - * This struct defines the way the registers are stored on the stack during a - * system call or exception. "struct sigcontext" has the same shape. - */ -struct pt_regs { - /* Saved main processor registers; 56..63 are special. */ - /* tp, sp, and lr must immediately follow regs[] for aliasing. */ - pt_reg_t regs[53]; - pt_reg_t tp; /* aliases regs[TREG_TP] */ - pt_reg_t sp; /* aliases regs[TREG_SP] */ - pt_reg_t lr; /* aliases regs[TREG_LR] */ - - /* Saved special registers. */ - pt_reg_t pc; /* stored in EX_CONTEXT_K_0 */ - pt_reg_t ex1; /* stored in EX_CONTEXT_K_1 (PL and ICS bit) */ - pt_reg_t faultnum; /* fault number (INT_SWINT_1 for syscall) */ - pt_reg_t orig_r0; /* r0 at syscall entry, else zero */ - pt_reg_t flags; /* flags (see below) */ -#if !CHIP_HAS_CMPEXCH() - pt_reg_t pad[3]; -#else - pt_reg_t cmpexch; /* value of CMPEXCH_VALUE SPR at interrupt */ - pt_reg_t pad[2]; #endif -}; - -#endif /* __ASSEMBLY__ */ -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 -#define PTRACE_GETFPREGS 14 -#define PTRACE_SETFPREGS 15 +#include <uapi/asm/ptrace.h> -/* Support TILE-specific ptrace options, with events starting at 16. */ -#define PTRACE_O_TRACEMIGRATE 0x00010000 -#define PTRACE_EVENT_MIGRATE 16 -#ifdef __KERNEL__ #define PTRACE_O_MASK_TILE (PTRACE_O_TRACEMIGRATE) -#define PT_TRACE_MIGRATE 0x00080000 -#define PT_TRACE_MASK_TILE (PT_TRACE_MIGRATE) -#endif - -#ifdef __KERNEL__ +#define PT_TRACE_MIGRATE PT_EVENT_FLAG(PTRACE_EVENT_MIGRATE) /* Flag bits in pt_regs.flags */ #define PT_FLAGS_DISABLE_IRQ 1 /* on return to kernel, disable irqs */ @@ -100,17 +33,20 @@ struct pt_regs { #ifndef __ASSEMBLY__ +#define regs_return_value(regs) ((regs)->regs[0]) #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) +#define user_stack_pointer(regs) ((regs)->sp) /* Does the process account for user or for system time? */ -#define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL) +#define user_mode(regs) (EX1_PL((regs)->ex1) < KERNEL_PL) /* Fill in a struct pt_regs with the current kernel registers. */ struct pt_regs *get_pt_regs(struct pt_regs *); /* Trace the current syscall. */ -extern void do_syscall_trace(void); +extern int do_syscall_trace_enter(struct pt_regs *regs); +extern void do_syscall_trace_exit(struct pt_regs *regs); #define arch_has_single_step() (1) @@ -144,8 +80,7 @@ extern void single_step_execve(void); struct task_struct; -extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code); +extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs); #ifdef __tilegx__ /* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */ @@ -159,6 +94,4 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, #define SINGLESTEP_STATE_TARGET_LB 2 #define SINGLESTEP_STATE_TARGET_UB 7 -#endif /* !__KERNEL__ */ - #endif /* _ASM_TILE_PTRACE_H */ diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h index d062d463fca..5d5d3b739a6 100644 --- a/arch/tile/include/asm/sections.h +++ b/arch/tile/include/asm/sections.h @@ -25,16 +25,22 @@ extern char _sinitdata[], _einitdata[]; /* Write-once data is writable only till the end of initialization. */ extern char __w1data_begin[], __w1data_end[]; +extern char vdso_start[], vdso_end[]; +#ifdef CONFIG_COMPAT +extern char vdso32_start[], vdso32_end[]; +#endif /* Not exactly sections, but PC comparison points in the code. */ extern char __rt_sigreturn[], __rt_sigreturn_end[]; -#ifndef __tilegx__ +#ifdef __tilegx__ +extern char __start_unalign_asm_code[], __end_unalign_asm_code[]; +#else extern char sys_cmpxchg[], __sys_cmpxchg_end[]; extern char __sys_cmpxchg_grab_lock[]; extern char __start_atomic_asm_code[], __end_atomic_asm_code[]; #endif -/* Handle the discontiguity between _sdata and _stext. */ +/* Handle the discontiguity between _sdata and _text. */ static inline int arch_is_kernel_data(unsigned long addr) { return addr >= (unsigned long)_sdata && diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h index 7caf0f36b03..e98909033e5 100644 --- a/arch/tile/include/asm/setup.h +++ b/arch/tile/include/asm/setup.h @@ -11,26 +11,42 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. */ - #ifndef _ASM_TILE_SETUP_H #define _ASM_TILE_SETUP_H -#define COMMAND_LINE_SIZE 2048 - -#ifdef __KERNEL__ #include <linux/pfn.h> #include <linux/init.h> +#include <uapi/asm/setup.h> /* * Reserved space for vmalloc and iomap - defined in asm/page.h */ #define MAXMEM_PFN PFN_DOWN(MAXMEM) +int tile_console_write(const char *buf, int count); void early_panic(const char *fmt, ...); -void warn_early_printk(void); -void __init disable_early_printk(void); -#endif /* __KERNEL__ */ +/* Init-time routine to do tile-specific per-cpu setup. */ +void setup_cpu(int boot); + +/* User-level DMA management functions */ +void grant_dma_mpls(void); +void restrict_dma_mpls(void); + +#ifdef CONFIG_HARDWALL +/* User-level network management functions */ +void reset_network_state(void); +struct task_struct; +void hardwall_switch_tasks(struct task_struct *prev, struct task_struct *next); +void hardwall_deactivate_all(struct task_struct *task); +int hardwall_ipi_valid(int cpu); + +/* Hook hardwall code into changes in affinity. */ +#define arch_set_cpus_allowed(p, new_mask) do { \ + if (!cpumask_equal(&p->cpus_allowed, new_mask)) \ + hardwall_deactivate_all(p); \ +} while (0) +#endif #endif /* _ASM_TILE_SETUP_H */ diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h index 1e5e49aad54..10e183de96d 100644 --- a/arch/tile/include/asm/signal.h +++ b/arch/tile/include/asm/signal.h @@ -11,19 +11,11 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. */ - #ifndef _ASM_TILE_SIGNAL_H #define _ASM_TILE_SIGNAL_H -/* Do not notify a ptracer when this signal is handled. */ -#define SA_NOPTRACE 0x02000000u - -/* Used in earlier Tilera releases, so keeping for binary compatibility. */ -#define SA_RESTORER 0x04000000u - -#include <asm-generic/signal.h> +#include <uapi/asm/signal.h> -#if defined(__KERNEL__) #if !defined(__ASSEMBLY__) struct pt_regs; int restore_sigcontext(struct pt_regs *, struct sigcontext __user *); @@ -34,6 +26,4 @@ void signal_fault(const char *type, struct pt_regs *, void trace_unhandled_signal(const char *type, struct pt_regs *regs, unsigned long address, int signo); #endif -#endif - #endif /* _ASM_TILE_SIGNAL_H */ diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h index 532124ae4b1..9a326b64f7a 100644 --- a/arch/tile/include/asm/smp.h +++ b/arch/tile/include/asm/smp.h @@ -43,10 +43,6 @@ void evaluate_message(int tag); /* Boot a secondary cpu */ void online_secondary(void); -/* Call a function on a specified set of CPUs (may include this one). */ -extern void on_each_cpu_mask(const struct cpumask *mask, - void (*func)(void *), void *info, bool wait); - /* Topology of the supervisor tile grid, and coordinates of boot processor */ extern HV_Topology smp_topology; @@ -91,9 +87,6 @@ void print_disabled_cpus(void); #else /* !CONFIG_SMP */ -#define on_each_cpu_mask(mask, func, info, wait) \ - do { if (cpumask_test_cpu(0, (mask))) func(info); } while (0) - #define smp_master_cpu 0 #define smp_height 1 #define smp_width 1 @@ -108,10 +101,8 @@ void print_disabled_cpus(void); extern struct cpumask cpu_lotar_map; #define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map) -#if CHIP_HAS_CBOX_HOME_MAP() /* Which processors are used for hash-for-home mapping */ extern struct cpumask hash_for_home_map; -#endif /* Which cpus can have their cache flushed by hv_flush_remote(). */ extern struct cpumask cpu_cacheable_map; diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h index a5e4208d34f..c0a77b38d39 100644 --- a/arch/tile/include/asm/spinlock_32.h +++ b/arch/tile/include/asm/spinlock_32.h @@ -19,7 +19,6 @@ #include <linux/atomic.h> #include <asm/page.h> -#include <asm/system.h> #include <linux/compiler.h> /* diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h index 72be5904e02..9a12b9c7e5d 100644 --- a/arch/tile/include/asm/spinlock_64.h +++ b/arch/tile/include/asm/spinlock_64.h @@ -27,7 +27,7 @@ * Return the "current" portion of a ticket lock value, * i.e. the number that currently owns the lock. */ -static inline int arch_spin_current(u32 val) +static inline u32 arch_spin_current(u32 val) { return val >> __ARCH_SPIN_CURRENT_SHIFT; } @@ -36,7 +36,7 @@ static inline int arch_spin_current(u32 val) * Return the "next" portion of a ticket lock value, * i.e. the number that the next task to try to acquire the lock will get. */ -static inline int arch_spin_next(u32 val) +static inline u32 arch_spin_next(u32 val) { return val & __ARCH_SPIN_NEXT_MASK; } @@ -137,7 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw) { __insn_mf(); - rw->lock = 0; + __insn_exch4(&rw->lock, 0); /* Avoid waiting in the write buffer. */ } static inline int arch_read_trylock(arch_rwlock_t *rw) diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h index 4d97a2db932..0e9d382a2d4 100644 --- a/arch/tile/include/asm/stack.h +++ b/arch/tile/include/asm/stack.h @@ -25,7 +25,6 @@ struct KBacktraceIterator { BacktraceIterator it; struct task_struct *task; /* task we are backtracing */ - pte_t *pgtable; /* page table for user space access */ int end; /* iteration complete. */ int new_context; /* new context is starting */ int profile; /* profiling, so stop on async intrpt */ diff --git a/arch/tile/include/asm/string.h b/arch/tile/include/asm/string.h index 7535cf1a30e..92b271bd9eb 100644 --- a/arch/tile/include/asm/string.h +++ b/arch/tile/include/asm/string.h @@ -21,8 +21,10 @@ #define __HAVE_ARCH_MEMMOVE #define __HAVE_ARCH_STRCHR #define __HAVE_ARCH_STRLEN +#define __HAVE_ARCH_STRNLEN extern __kernel_size_t strlen(const char *); +extern __kernel_size_t strnlen(const char *, __kernel_size_t); extern char *strchr(const char *s, int c); extern void *memchr(const void *s, int c, size_t n); extern void *memset(void *, int, __kernel_size_t); diff --git a/arch/tile/include/asm/switch_to.h b/arch/tile/include/asm/switch_to.h new file mode 100644 index 00000000000..b8f888cbe6b --- /dev/null +++ b/arch/tile/include/asm/switch_to.h @@ -0,0 +1,79 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SWITCH_TO_H +#define _ASM_TILE_SWITCH_TO_H + +#include <arch/sim_def.h> + +/* + * switch_to(n) should switch tasks to task nr n, first + * checking that n isn't the current task, in which case it does nothing. + * The number of callee-saved registers saved on the kernel stack + * is defined here for use in copy_thread() and must agree with __switch_to(). + */ +#define CALLEE_SAVED_FIRST_REG 30 +#define CALLEE_SAVED_REGS_COUNT 24 /* r30 to r52, plus an empty to align */ + +#ifndef __ASSEMBLY__ + +struct task_struct; + +/* + * Pause the DMA engine and static network before task switching. + */ +#define prepare_arch_switch(next) _prepare_arch_switch(next) +void _prepare_arch_switch(struct task_struct *next); + +struct task_struct; +#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next))) +extern struct task_struct *_switch_to(struct task_struct *prev, + struct task_struct *next); + +/* Helper function for _switch_to(). */ +extern struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next, + unsigned long new_system_save_k_0); + +/* Address that switched-away from tasks are at. */ +extern unsigned long get_switch_to_pc(void); + +/* + * Kernel threads can check to see if they need to migrate their + * stack whenever they return from a context switch; for user + * threads, we defer until they are returning to user-space. + */ +#define finish_arch_switch(prev) do { \ + if (unlikely((prev)->state == TASK_DEAD)) \ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \ + ((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \ + (current->pid << _SIM_CONTROL_OPERATOR_BITS)); \ + if (current->mm == NULL && !kstack_hash && \ + current_thread_info()->homecache_cpu != smp_processor_id()) \ + homecache_migrate_kthread(); \ +} while (0) + +/* Support function for forking a new task. */ +void ret_from_fork(void); + +/* Support function for forking a new kernel thread. */ +void ret_from_kernel_thread(void *fn, void *arg); + +/* Called from ret_from_xxx() when a new process starts up. */ +struct task_struct *sim_notify_fork(struct task_struct *prev); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_TILE_SWITCH_TO_H */ diff --git a/arch/tile/include/asm/syscall.h b/arch/tile/include/asm/syscall.h index d35e0dcb67b..9644b88f133 100644 --- a/arch/tile/include/asm/syscall.h +++ b/arch/tile/include/asm/syscall.h @@ -22,6 +22,12 @@ #include <linux/err.h> #include <arch/abi.h> +/* The array of function pointers for syscalls. */ +extern void *sys_call_table[]; +#ifdef CONFIG_COMPAT +extern void *compat_sys_call_table[]; +#endif + /* * Only the low 32 bits of orig_r0 are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h index 3b5507c31ea..07b298450ef 100644 --- a/arch/tile/include/asm/syscalls.h +++ b/arch/tile/include/asm/syscalls.h @@ -24,12 +24,6 @@ #include <linux/types.h> #include <linux/compat.h> -/* The array of function pointers for syscalls. */ -extern void *sys_call_table[]; -#ifdef CONFIG_COMPAT -extern void *compat_sys_call_table[]; -#endif - /* * Note that by convention, any syscall which requires the current * register set takes an additional "struct pt_regs *" pointer; a @@ -43,15 +37,15 @@ long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi, u32 len, int advice); int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi, u32 len_lo, u32 len_hi, int advice); -long sys_flush_cache(void); +long sys_cacheflush(unsigned long addr, unsigned long len, + unsigned long flags); #ifndef __tilegx__ /* No mmap() in the 32-bit kernel. */ #define sys_mmap sys_mmap #endif #ifndef __tilegx__ /* mm/fault.c */ -long sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *); -long _sys_cmpxchg_badaddr(unsigned long address); +long sys_cmpxchg_badaddr(unsigned long address); #endif #ifdef CONFIG_COMPAT @@ -62,14 +56,14 @@ long sys_truncate64(const char __user *path, loff_t length); long sys_ftruncate64(unsigned int fd, loff_t length); #endif +/* Provide versions of standard syscalls that use current_pt_regs(). */ +long sys_rt_sigreturn(void); +#define sys_rt_sigreturn sys_rt_sigreturn + /* These are the intvec*.S trampolines. */ -long _sys_sigaltstack(const stack_t __user *, stack_t __user *); long _sys_rt_sigreturn(void); long _sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid); -long _sys_execve(const char __user *filename, - const char __user *const __user *argv, - const char __user *const __user *envp); #include <asm-generic/syscalls.h> diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h deleted file mode 100644 index 23d1842f483..00000000000 --- a/arch/tile/include/asm/system.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#ifndef _ASM_TILE_SYSTEM_H -#define _ASM_TILE_SYSTEM_H - -#ifndef __ASSEMBLY__ - -#include <linux/types.h> -#include <linux/irqflags.h> - -/* NOTE: we can't include <linux/ptrace.h> due to #include dependencies. */ -#include <asm/ptrace.h> - -#include <arch/chip.h> -#include <arch/sim_def.h> -#include <arch/spr_def.h> - -/* - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - */ - -#define read_barrier_depends() do { } while (0) - -#define __sync() __insn_mf() - -#if CHIP_HAS_SPLIT_CYCLE() -#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW) -#else -#define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */ -#endif - -#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() -#include <hv/syscall_public.h> -/* - * Issue an uncacheable load to each memory controller, then - * wait until those loads have completed. - */ -static inline void __mb_incoherent(void) -{ - long clobber_r10; - asm volatile("swint2" - : "=R10" (clobber_r10) - : "R10" (HV_SYS_fence_incoherent) - : "r0", "r1", "r2", "r3", "r4", - "r5", "r6", "r7", "r8", "r9", - "r11", "r12", "r13", "r14", - "r15", "r16", "r17", "r18", "r19", - "r20", "r21", "r22", "r23", "r24", - "r25", "r26", "r27", "r28", "r29"); -} -#endif - -/* Fence to guarantee visibility of stores to incoherent memory. */ -static inline void -mb_incoherent(void) -{ - __insn_mf(); - -#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() - { -#if CHIP_HAS_TILE_WRITE_PENDING() - const unsigned long WRITE_TIMEOUT_CYCLES = 400; - unsigned long start = get_cycles_low(); - do { - if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0) - return; - } while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES); -#endif /* CHIP_HAS_TILE_WRITE_PENDING() */ - (void) __mb_incoherent(); - } -#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */ -} - -#define fast_wmb() __sync() -#define fast_rmb() __sync() -#define fast_mb() __sync() -#define fast_iob() mb_incoherent() - -#define wmb() fast_wmb() -#define rmb() fast_rmb() -#define mb() fast_mb() -#define iob() fast_iob() - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define smp_read_barrier_depends() read_barrier_depends() -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) -#endif - -#define set_mb(var, value) \ - do { var = value; mb(); } while (0) - -/* - * Pause the DMA engine and static network before task switching. - */ -#define prepare_arch_switch(next) _prepare_arch_switch(next) -void _prepare_arch_switch(struct task_struct *next); - - -/* - * switch_to(n) should switch tasks to task nr n, first - * checking that n isn't the current task, in which case it does nothing. - * The number of callee-saved registers saved on the kernel stack - * is defined here for use in copy_thread() and must agree with __switch_to(). - */ -#endif /* !__ASSEMBLY__ */ -#define CALLEE_SAVED_FIRST_REG 30 -#define CALLEE_SAVED_REGS_COUNT 24 /* r30 to r52, plus an empty to align */ -#ifndef __ASSEMBLY__ -struct task_struct; -#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next))) -extern struct task_struct *_switch_to(struct task_struct *prev, - struct task_struct *next); - -/* Helper function for _switch_to(). */ -extern struct task_struct *__switch_to(struct task_struct *prev, - struct task_struct *next, - unsigned long new_system_save_k_0); - -/* Address that switched-away from tasks are at. */ -extern unsigned long get_switch_to_pc(void); - -/* - * On SMP systems, when the scheduler does migration-cost autodetection, - * it needs a way to flush as much of the CPU's caches as possible: - * - * TODO: fill this in! - */ -static inline void sched_cacheflush(void) -{ -} - -#define arch_align_stack(x) (x) - -/* - * Is the kernel doing fixups of unaligned accesses? If <0, no kernel - * intervention occurs and SIGBUS is delivered with no data address - * info. If 0, the kernel single-steps the instruction to discover - * the data address to provide with the SIGBUS. If 1, the kernel does - * a fixup. - */ -extern int unaligned_fixup; - -/* Is the kernel printing on each unaligned fixup? */ -extern int unaligned_printk; - -/* Number of unaligned fixups performed */ -extern unsigned int unaligned_fixup_count; - -/* Init-time routine to do tile-specific per-cpu setup. */ -void setup_cpu(int boot); - -/* User-level DMA management functions */ -void grant_dma_mpls(void); -void restrict_dma_mpls(void); - -#ifdef CONFIG_HARDWALL -/* User-level network management functions */ -void reset_network_state(void); -void grant_network_mpls(void); -void restrict_network_mpls(void); -int hardwall_deactivate(struct task_struct *task); - -/* Hook hardwall code into changes in affinity. */ -#define arch_set_cpus_allowed(p, new_mask) do { \ - if (p->thread.hardwall && !cpumask_equal(&p->cpus_allowed, new_mask)) \ - hardwall_deactivate(p); \ -} while (0) -#endif - -/* - * Kernel threads can check to see if they need to migrate their - * stack whenever they return from a context switch; for user - * threads, we defer until they are returning to user-space. - */ -#define finish_arch_switch(prev) do { \ - if (unlikely((prev)->state == TASK_DEAD)) \ - __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \ - ((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \ - __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \ - (current->pid << _SIM_CONTROL_OPERATOR_BITS)); \ - if (current->mm == NULL && !kstack_hash && \ - current_thread_info()->homecache_cpu != smp_processor_id()) \ - homecache_migrate_kthread(); \ -} while (0) - -/* Support function for forking a new task. */ -void ret_from_fork(void); - -/* Called from ret_from_fork() when a new process starts up. */ -struct task_struct *sim_notify_fork(struct task_struct *prev); - -#endif /* !__ASSEMBLY__ */ - -#endif /* _ASM_TILE_SYSTEM_H */ diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index bc4f562bd45..48e4fd0f38e 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -39,6 +39,11 @@ struct thread_info { struct restart_block restart_block; struct single_step_state *step_state; /* single step state (if non-zero) */ + int align_ctl; /* controls unaligned access */ +#ifdef __tilegx__ + unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */ + void __user *unalign_jit_base; /* unalign fixup JIT base */ +#endif }; /* @@ -56,6 +61,7 @@ struct thread_info { .fn = do_no_restart_syscall, \ }, \ .step_state = NULL, \ + .align_ctl = 0, \ } #define init_thread_info (init_thread_union.thread_info) @@ -77,40 +83,36 @@ struct thread_info { #ifndef __ASSEMBLY__ +void arch_release_thread_info(struct thread_info *info); + /* How to get the thread information struct from C. */ register unsigned long stack_pointer __asm__("sp"); #define current_thread_info() \ ((struct thread_info *)(stack_pointer & -THREAD_SIZE)) -#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR -extern struct thread_info *alloc_thread_info_node(struct task_struct *task, int node); -extern void free_thread_info(struct thread_info *info); - /* Sit on a nap instruction until interrupted. */ extern void smp_nap(void); -/* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */ +/* Enable interrupts racelessly and nap forever: helper for arch_cpu_idle(). */ extern void _cpu_idle(void); -/* Switch boot idle thread to a freshly-allocated stack and free old stack. */ -extern void cpu_idle_on_new_stack(struct thread_info *old_ti, - unsigned long new_sp, - unsigned long new_ss10); - #else /* __ASSEMBLY__ */ -/* how to get the thread information struct from ASM */ +/* + * How to get the thread information struct from assembly. + * Note that we use different macros since different architectures + * have different semantics in their "mm" instruction and we would + * like to guarantee that the macro expands to exactly one instruction. + */ #ifdef __tilegx__ -#define GET_THREAD_INFO(reg) move reg, sp; mm reg, zero, LOG2_THREAD_SIZE, 63 +#define EXTRACT_THREAD_INFO(reg) mm reg, zero, LOG2_THREAD_SIZE, 63 #else #define GET_THREAD_INFO(reg) mm reg, sp, zero, LOG2_THREAD_SIZE, 31 #endif #endif /* !__ASSEMBLY__ */ -#define PREEMPT_ACTIVE 0x10000000 - /* * Thread information flags that various assembly files may need to access. * Keep flags accessed frequently in low bits, particular since it makes @@ -126,6 +128,8 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti, #define TIF_SECCOMP 6 /* secure computing */ #define TIF_MEMDIE 7 /* OOM killer at work */ #define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ +#define TIF_SYSCALL_TRACEPOINT 9 /* syscall tracepoint instrumentation */ +#define TIF_POLLING_NRFLAG 10 /* idle is polling for TIF_NEED_RESCHED */ #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) @@ -136,12 +140,20 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti, #define _TIF_SECCOMP (1<<TIF_SECCOMP) #define _TIF_MEMDIE (1<<TIF_MEMDIE) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) +#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) +#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) /* Work to do on any return to user space. */ #define _TIF_ALLWORK_MASK \ (_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SINGLESTEP|\ _TIF_ASYNC_TLB|_TIF_NOTIFY_RESUME) +/* Work to do at syscall entry. */ +#define _TIF_SYSCALL_ENTRY_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT) + +/* Work to do at syscall exit. */ +#define _TIF_SYSCALL_EXIT_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT) + /* * Thread-synchronous status. * @@ -152,18 +164,31 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti, #ifdef __tilegx__ #define TS_COMPAT 0x0001 /* 32-bit compatibility mode */ #endif -#define TS_POLLING 0x0004 /* in idle loop but not sleeping */ #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal */ -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - #ifndef __ASSEMBLY__ #define HAVE_SET_RESTORE_SIGMASK 1 static inline void set_restore_sigmask(void) { struct thread_info *ti = current_thread_info(); ti->status |= TS_RESTORE_SIGMASK; - set_bit(TIF_SIGPENDING, &ti->flags); + WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags)); +} +static inline void clear_restore_sigmask(void) +{ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; +} +static inline bool test_restore_sigmask(void) +{ + return current_thread_info()->status & TS_RESTORE_SIGMASK; +} +static inline bool test_and_clear_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + if (!(ti->status & TS_RESTORE_SIGMASK)) + return false; + ti->status &= ~TS_RESTORE_SIGMASK; + return true; } #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h index 29921f0b86d..dc987d53e2a 100644 --- a/arch/tile/include/asm/timex.h +++ b/arch/tile/include/asm/timex.h @@ -29,11 +29,13 @@ typedef unsigned long long cycles_t; #if CHIP_HAS_SPLIT_CYCLE() cycles_t get_cycles(void); +#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW) #else static inline cycles_t get_cycles(void) { return __insn_mfspr(SPR_CYCLE); } +#define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */ #endif cycles_t get_clock_rate(void); diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h index 96199d214fb..dcf91b25a1e 100644 --- a/arch/tile/include/asm/tlbflush.h +++ b/arch/tile/include/asm/tlbflush.h @@ -38,16 +38,11 @@ DECLARE_PER_CPU(int, current_asid); /* The hypervisor tells us what ASIDs are available to us. */ extern int min_asid, max_asid; -static inline unsigned long hv_page_size(const struct vm_area_struct *vma) -{ - return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE; -} - /* Pass as vma pointer for non-executable mapping, if no vma available. */ -#define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL) +#define FLUSH_NONEXEC ((struct vm_area_struct *)-1UL) /* Flush a single user page on this cpu. */ -static inline void local_flush_tlb_page(const struct vm_area_struct *vma, +static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr, unsigned long page_size) { @@ -60,7 +55,7 @@ static inline void local_flush_tlb_page(const struct vm_area_struct *vma, } /* Flush range of user pages on this cpu. */ -static inline void local_flush_tlb_pages(const struct vm_area_struct *vma, +static inline void local_flush_tlb_pages(struct vm_area_struct *vma, unsigned long addr, unsigned long page_size, unsigned long len) @@ -117,10 +112,10 @@ extern void flush_tlb_all(void); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void flush_tlb_current_task(void); extern void flush_tlb_mm(struct mm_struct *); -extern void flush_tlb_page(const struct vm_area_struct *, unsigned long); -extern void flush_tlb_page_mm(const struct vm_area_struct *, +extern void flush_tlb_page(struct vm_area_struct *, unsigned long); +extern void flush_tlb_page_mm(struct vm_area_struct *, struct mm_struct *, unsigned long); -extern void flush_tlb_range(const struct vm_area_struct *, +extern void flush_tlb_range(struct vm_area_struct *, unsigned long start, unsigned long end); #define flush_tlb() flush_tlb_current_task() diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h index 6fdd0c86019..93831184423 100644 --- a/arch/tile/include/asm/topology.h +++ b/arch/tile/include/asm/topology.h @@ -44,66 +44,6 @@ static inline const struct cpumask *cpumask_of_node(int node) /* For now, use numa node -1 for global allocation. */ #define pcibus_to_node(bus) ((void)(bus), -1) -/* - * TILE architecture has many cores integrated in one processor, so we need - * setup bigger balance_interval for both CPU/NODE scheduling domains to - * reduce process scheduling costs. - */ - -/* sched_domains SD_CPU_INIT for TILE architecture */ -#define SD_CPU_INIT (struct sched_domain) { \ - .min_interval = 4, \ - .max_interval = 128, \ - .busy_factor = 64, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ - .busy_idx = 2, \ - .idle_idx = 1, \ - .newidle_idx = 0, \ - .wake_idx = 0, \ - .forkexec_idx = 0, \ - \ - .flags = 1*SD_LOAD_BALANCE \ - | 1*SD_BALANCE_NEWIDLE \ - | 1*SD_BALANCE_EXEC \ - | 1*SD_BALANCE_FORK \ - | 0*SD_BALANCE_WAKE \ - | 0*SD_WAKE_AFFINE \ - | 0*SD_PREFER_LOCAL \ - | 0*SD_SHARE_CPUPOWER \ - | 0*SD_SHARE_PKG_RESOURCES \ - | 0*SD_SERIALIZE \ - , \ - .last_balance = jiffies, \ - .balance_interval = 32, \ -} - -/* sched_domains SD_NODE_INIT for TILE architecture */ -#define SD_NODE_INIT (struct sched_domain) { \ - .min_interval = 16, \ - .max_interval = 512, \ - .busy_factor = 32, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ - .busy_idx = 3, \ - .idle_idx = 1, \ - .newidle_idx = 2, \ - .wake_idx = 1, \ - .flags = 1*SD_LOAD_BALANCE \ - | 1*SD_BALANCE_NEWIDLE \ - | 1*SD_BALANCE_EXEC \ - | 1*SD_BALANCE_FORK \ - | 0*SD_BALANCE_WAKE \ - | 0*SD_WAKE_AFFINE \ - | 0*SD_PREFER_LOCAL \ - | 0*SD_SHARE_CPUPOWER \ - | 0*SD_SHARE_PKG_RESOURCES \ - | 1*SD_SERIALIZE \ - , \ - .last_balance = jiffies, \ - .balance_interval = 128, \ -} - /* By definition, we create nodes based on online memory. */ #define node_has_online_mem(nid) 1 @@ -116,9 +56,6 @@ static inline const struct cpumask *cpumask_of_node(int node) #define topology_core_id(cpu) (cpu) #define topology_core_cpumask(cpu) ((void)(cpu), cpu_online_mask) #define topology_thread_cpumask(cpu) cpumask_of(cpu) - -/* indicates that pointers to the topology struct cpumask maps are valid */ -#define arch_provides_topology_pointers yes #endif #endif /* _ASM_TILE_TOPOLOGY_H */ diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index 5f20f920f93..4b99a1c3aab 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h @@ -15,12 +15,13 @@ #ifndef _ASM_TILE_TRAPS_H #define _ASM_TILE_TRAPS_H +#ifndef __ASSEMBLY__ #include <arch/chip.h> /* mm/fault.c */ void do_page_fault(struct pt_regs *, int fault_num, unsigned long address, unsigned long write); -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() +#if CHIP_HAS_TILE_DMA() void do_async_page_fault(struct pt_regs *); #endif @@ -64,7 +65,21 @@ void do_breakpoint(struct pt_regs *, int fault_num); #ifdef __tilegx__ +/* kernel/single_step.c */ void gx_singlestep_handle(struct pt_regs *, int fault_num); + +/* kernel/intvec_64.S */ +void fill_ra_stack(void); + +/* Handle unalign data fixup. */ +extern void do_unaligned(struct pt_regs *regs, int vecnum); +#endif + +#endif /* __ASSEMBLY__ */ + +#ifdef __tilegx__ +/* 128 byte JIT per unalign fixup. */ +#define UNALIGN_JIT_SHIFT 7 #endif -#endif /* _ASM_TILE_SYSCALLS_H */ +#endif /* _ASM_TILE_TRAPS_H */ diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index ef34d2caa5b..b6cde3209b9 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -114,45 +114,79 @@ struct exception_table_entry { extern int fixup_exception(struct pt_regs *regs); /* - * We return the __get_user_N function results in a structure, - * thus in r0 and r1. If "err" is zero, "val" is the result - * of the read; otherwise, "err" is -EFAULT. - * - * We rarely need 8-byte values on a 32-bit architecture, but - * we size the structure to accommodate. In practice, for the - * the smaller reads, we can zero the high word for free, and - * the caller will ignore it by virtue of casting anyway. + * Support macros for __get_user(). + * + * Implementation note: The "case 8" logic of casting to the type of + * the result of subtracting the value from itself is basically a way + * of keeping all integer types the same, but casting any pointers to + * ptrdiff_t, i.e. also an integer type. This way there are no + * questionable casts seen by the compiler on an ILP32 platform. + * + * Note that __get_user() and __put_user() assume proper alignment. */ -struct __get_user { - unsigned long long val; - int err; -}; -/* - * FIXME: we should express these as inline extended assembler, since - * they're fundamentally just a variable dereference and some - * supporting exception_table gunk. Note that (a la i386) we can - * extend the copy_to_user and copy_from_user routines to call into - * such extended assembler routines, though we will have to use a - * different return code in that case (1, 2, or 4, rather than -EFAULT). - */ -extern struct __get_user __get_user_1(const void __user *); -extern struct __get_user __get_user_2(const void __user *); -extern struct __get_user __get_user_4(const void __user *); -extern struct __get_user __get_user_8(const void __user *); -extern int __put_user_1(long, void __user *); -extern int __put_user_2(long, void __user *); -extern int __put_user_4(long, void __user *); -extern int __put_user_8(long long, void __user *); - -/* Unimplemented routines to cause linker failures */ -extern struct __get_user __get_user_bad(void); -extern int __put_user_bad(void); +#ifdef __LP64__ +#define _ASM_PTR ".quad" +#define _ASM_ALIGN ".align 8" +#else +#define _ASM_PTR ".long" +#define _ASM_ALIGN ".align 4" +#endif + +#define __get_user_asm(OP, x, ptr, ret) \ + asm volatile("1: {" #OP " %1, %2; movei %0, 0 }\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "0: { movei %1, 0; movei %0, %3 }\n" \ + "j 9f\n" \ + ".section __ex_table,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR " 1b, 0b\n" \ + ".popsection\n" \ + "9:" \ + : "=r" (ret), "=r" (x) \ + : "r" (ptr), "i" (-EFAULT)) + +#ifdef __tilegx__ +#define __get_user_1(x, ptr, ret) __get_user_asm(ld1u, x, ptr, ret) +#define __get_user_2(x, ptr, ret) __get_user_asm(ld2u, x, ptr, ret) +#define __get_user_4(x, ptr, ret) __get_user_asm(ld4s, x, ptr, ret) +#define __get_user_8(x, ptr, ret) __get_user_asm(ld, x, ptr, ret) +#else +#define __get_user_1(x, ptr, ret) __get_user_asm(lb_u, x, ptr, ret) +#define __get_user_2(x, ptr, ret) __get_user_asm(lh_u, x, ptr, ret) +#define __get_user_4(x, ptr, ret) __get_user_asm(lw, x, ptr, ret) +#ifdef __LITTLE_ENDIAN +#define __lo32(a, b) a +#define __hi32(a, b) b +#else +#define __lo32(a, b) b +#define __hi32(a, b) a +#endif +#define __get_user_8(x, ptr, ret) \ + ({ \ + unsigned int __a, __b; \ + asm volatile("1: { lw %1, %3; addi %2, %3, 4 }\n" \ + "2: { lw %2, %2; movei %0, 0 }\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "0: { movei %1, 0; movei %2, 0 }\n" \ + "{ movei %0, %4; j 9f }\n" \ + ".section __ex_table,\"a\"\n" \ + ".align 4\n" \ + ".word 1b, 0b\n" \ + ".word 2b, 0b\n" \ + ".popsection\n" \ + "9:" \ + : "=r" (ret), "=r" (__a), "=&r" (__b) \ + : "r" (ptr), "i" (-EFAULT)); \ + (x) = (__typeof(x))(__typeof((x)-(x))) \ + (((u64)__hi32(__a, __b) << 32) | \ + __lo32(__a, __b)); \ + }) +#endif + +extern int __get_user_bad(void) + __attribute__((warning("sizeof __get_user argument not 1, 2, 4 or 8"))); -/* - * Careful: we have to cast the result to the type of the pointer - * for sign reasons. - */ /** * __get_user: - Get a simple variable from user space, with less checking. * @x: Variable to store result. @@ -174,30 +208,64 @@ extern int __put_user_bad(void); * function. */ #define __get_user(x, ptr) \ -({ struct __get_user __ret; \ - __typeof__(*(ptr)) const __user *__gu_addr = (ptr); \ - __chk_user_ptr(__gu_addr); \ - switch (sizeof(*(__gu_addr))) { \ - case 1: \ - __ret = __get_user_1(__gu_addr); \ - break; \ - case 2: \ - __ret = __get_user_2(__gu_addr); \ - break; \ - case 4: \ - __ret = __get_user_4(__gu_addr); \ - break; \ - case 8: \ - __ret = __get_user_8(__gu_addr); \ - break; \ - default: \ - __ret = __get_user_bad(); \ - break; \ - } \ - (x) = (__typeof__(*__gu_addr)) (__typeof__(*__gu_addr - *__gu_addr)) \ - __ret.val; \ - __ret.err; \ -}) + ({ \ + int __ret; \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: __get_user_1(x, ptr, __ret); break; \ + case 2: __get_user_2(x, ptr, __ret); break; \ + case 4: __get_user_4(x, ptr, __ret); break; \ + case 8: __get_user_8(x, ptr, __ret); break; \ + default: __ret = __get_user_bad(); break; \ + } \ + __ret; \ + }) + +/* Support macros for __put_user(). */ + +#define __put_user_asm(OP, x, ptr, ret) \ + asm volatile("1: {" #OP " %1, %2; movei %0, 0 }\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "0: { movei %0, %3; j 9f }\n" \ + ".section __ex_table,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR " 1b, 0b\n" \ + ".popsection\n" \ + "9:" \ + : "=r" (ret) \ + : "r" (ptr), "r" (x), "i" (-EFAULT)) + +#ifdef __tilegx__ +#define __put_user_1(x, ptr, ret) __put_user_asm(st1, x, ptr, ret) +#define __put_user_2(x, ptr, ret) __put_user_asm(st2, x, ptr, ret) +#define __put_user_4(x, ptr, ret) __put_user_asm(st4, x, ptr, ret) +#define __put_user_8(x, ptr, ret) __put_user_asm(st, x, ptr, ret) +#else +#define __put_user_1(x, ptr, ret) __put_user_asm(sb, x, ptr, ret) +#define __put_user_2(x, ptr, ret) __put_user_asm(sh, x, ptr, ret) +#define __put_user_4(x, ptr, ret) __put_user_asm(sw, x, ptr, ret) +#define __put_user_8(x, ptr, ret) \ + ({ \ + u64 __x = (__typeof((x)-(x)))(x); \ + int __lo = (int) __x, __hi = (int) (__x >> 32); \ + asm volatile("1: { sw %1, %2; addi %0, %1, 4 }\n" \ + "2: { sw %0, %3; movei %0, 0 }\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "0: { movei %0, %4; j 9f }\n" \ + ".section __ex_table,\"a\"\n" \ + ".align 4\n" \ + ".word 1b, 0b\n" \ + ".word 2b, 0b\n" \ + ".popsection\n" \ + "9:" \ + : "=&r" (ret) \ + : "r" (ptr), "r" (__lo32(__lo, __hi)), \ + "r" (__hi32(__lo, __hi)), "i" (-EFAULT)); \ + }) +#endif + +extern int __put_user_bad(void) + __attribute__((warning("sizeof __put_user argument not 1, 2, 4 or 8"))); /** * __put_user: - Write a simple value into user space, with less checking. @@ -217,39 +285,19 @@ extern int __put_user_bad(void); * function. * * Returns zero on success, or -EFAULT on error. - * - * Implementation note: The "case 8" logic of casting to the type of - * the result of subtracting the value from itself is basically a way - * of keeping all integer types the same, but casting any pointers to - * ptrdiff_t, i.e. also an integer type. This way there are no - * questionable casts seen by the compiler on an ILP32 platform. */ #define __put_user(x, ptr) \ ({ \ - int __pu_err = 0; \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - typeof(*__pu_addr) __pu_val = (x); \ - __chk_user_ptr(__pu_addr); \ - switch (sizeof(__pu_val)) { \ - case 1: \ - __pu_err = __put_user_1((long)__pu_val, __pu_addr); \ - break; \ - case 2: \ - __pu_err = __put_user_2((long)__pu_val, __pu_addr); \ - break; \ - case 4: \ - __pu_err = __put_user_4((long)__pu_val, __pu_addr); \ - break; \ - case 8: \ - __pu_err = \ - __put_user_8((__typeof__(__pu_val - __pu_val))__pu_val,\ - __pu_addr); \ - break; \ - default: \ - __pu_err = __put_user_bad(); \ - break; \ + int __ret; \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: __put_user_1(x, ptr, __ret); break; \ + case 2: __put_user_2(x, ptr, __ret); break; \ + case 4: __put_user_4(x, ptr, __ret); break; \ + case 8: __put_user_8(x, ptr, __ret); break; \ + default: __ret = __put_user_bad(); break; \ } \ - __pu_err; \ + __ret; \ }) /* @@ -353,7 +401,12 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } -#ifdef CONFIG_DEBUG_COPY_FROM_USER +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS +/* + * There are still unprovable places in the generic code as of 2.6.34, so this + * option is not really compatible with -Werror, which is more useful in + * general. + */ extern void copy_from_user_overflow(void) __compiletime_warning("copy_from_user() size is not provably correct"); @@ -378,7 +431,7 @@ static inline unsigned long __must_check copy_from_user(void *to, /** * __copy_in_user() - copy data within user space, with less checking. * @to: Destination address, in user space. - * @from: Source address, in kernel space. + * @from: Source address, in user space. * @n: Number of bytes to copy. * * Context: User context only. This function may sleep. @@ -395,7 +448,7 @@ extern unsigned long __copy_in_user_inatomic( static inline unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n) { - might_sleep(); + might_fault(); return __copy_in_user_inatomic(to, from, n); } @@ -520,37 +573,6 @@ static inline unsigned long __must_check flush_user( } /** - * inv_user: - Invalidate a block of memory in user space from cache. - * @mem: Destination address, in user space. - * @len: Number of bytes to invalidate. - * - * Returns number of bytes that could not be invalidated. - * On success, this will be zero. - * - * Note that on Tile64, the "inv" operation is in fact a - * "flush and invalidate", so cache write-backs will occur prior - * to the cache being marked invalid. - */ -extern unsigned long inv_user_asm(void __user *mem, unsigned long len); -static inline unsigned long __must_check __inv_user( - void __user *mem, unsigned long len) -{ - int retval; - - might_fault(); - retval = inv_user_asm(mem, len); - mb_incoherent(); - return retval; -} -static inline unsigned long __must_check inv_user( - void __user *mem, unsigned long len) -{ - if (access_ok(VERIFY_WRITE, mem, len)) - return __inv_user(mem, len); - return len; -} - -/** * finv_user: - Flush-inval a block of memory in user space from cache. * @mem: Destination address, in user space. * @len: Number of bytes to invalidate. diff --git a/arch/tile/include/asm/unaligned.h b/arch/tile/include/asm/unaligned.h index 137e2de5b10..5a58a0d1144 100644 --- a/arch/tile/include/asm/unaligned.h +++ b/arch/tile/include/asm/unaligned.h @@ -15,10 +15,29 @@ #ifndef _ASM_TILE_UNALIGNED_H #define _ASM_TILE_UNALIGNED_H -#include <linux/unaligned/le_struct.h> -#include <linux/unaligned/be_byteshift.h> -#include <linux/unaligned/generic.h> -#define get_unaligned __get_unaligned_le -#define put_unaligned __put_unaligned_le +/* + * We could implement faster get_unaligned_[be/le]64 using the ldna + * instruction on tilegx; however, we need to either copy all of the + * other generic functions to here (which is pretty ugly) or else + * modify both the generic code and other arch code to allow arch + * specific unaligned data access functions. Given these functions + * are not often called, we'll stick with the generic version. + */ +#include <asm-generic/unaligned.h> + +/* + * Is the kernel doing fixups of unaligned accesses? If <0, no kernel + * intervention occurs and SIGBUS is delivered with no data address + * info. If 0, the kernel single-steps the instruction to discover + * the data address to provide with the SIGBUS. If 1, the kernel does + * a fixup. + */ +extern int unaligned_fixup; + +/* Is the kernel printing on each unaligned fixup? */ +extern int unaligned_printk; + +/* Number of unaligned fixups performed */ +extern unsigned int unaligned_fixup_count; #endif /* _ASM_TILE_UNALIGNED_H */ diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h index f70bf1c541f..940831fe9e9 100644 --- a/arch/tile/include/asm/unistd.h +++ b/arch/tile/include/asm/unistd.h @@ -11,37 +11,10 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. */ - -#if !defined(_ASM_TILE_UNISTD_H) || defined(__SYSCALL) -#define _ASM_TILE_UNISTD_H - -#if !defined(__LP64__) || defined(__SYSCALL_COMPAT) -/* Use the flavor of this syscall that matches the 32-bit API better. */ -#define __ARCH_WANT_SYNC_FILE_RANGE2 -#endif - -/* Use the standard ABI for syscalls. */ -#include <asm-generic/unistd.h> - -/* Additional Tilera-specific syscalls. */ -#define __NR_flush_cache (__NR_arch_specific_syscall + 1) -__SYSCALL(__NR_flush_cache, sys_flush_cache) - -#ifndef __tilegx__ -/* "Fast" syscalls provide atomic support for 32-bit chips. */ -#define __NR_FAST_cmpxchg -1 -#define __NR_FAST_atomic_update -2 -#define __NR_FAST_cmpxchg64 -3 -#define __NR_cmpxchg_badaddr (__NR_arch_specific_syscall + 0) -__SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr) -#endif - -#ifdef __KERNEL__ /* In compat mode, we use sys_llseek() for compat_sys_llseek(). */ #ifdef CONFIG_COMPAT #define __ARCH_WANT_SYS_LLSEEK #endif #define __ARCH_WANT_SYS_NEWFSTATAT -#endif - -#endif /* _ASM_TILE_UNISTD_H */ +#define __ARCH_WANT_SYS_CLONE +#include <uapi/asm/unistd.h> diff --git a/arch/tile/include/asm/vdso.h b/arch/tile/include/asm/vdso.h new file mode 100644 index 00000000000..9f6a78d665f --- /dev/null +++ b/arch/tile/include/asm/vdso.h @@ -0,0 +1,49 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __TILE_VDSO_H__ +#define __TILE_VDSO_H__ + +#include <linux/types.h> + +/* + * Note about the vdso_data structure: + * + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the + * structure is supposed to be known only to the function in the vdso + * itself and may change without notice. + */ + +struct vdso_data { + __u64 tz_update_count; /* Timezone atomicity ctr */ + __u64 tb_update_count; /* Timebase atomicity ctr */ + __u64 xtime_tod_stamp; /* TOD clock for xtime */ + __u64 xtime_clock_sec; /* Kernel time second */ + __u64 xtime_clock_nsec; /* Kernel time nanosecond */ + __u64 wtom_clock_sec; /* Wall to monotonic clock second */ + __u64 wtom_clock_nsec; /* Wall to monotonic clock nanosecond */ + __u32 mult; /* Cycle to nanosecond multiplier */ + __u32 shift; /* Cycle to nanosecond divisor (power of two) */ + __u32 tz_minuteswest; /* Minutes west of Greenwich */ + __u32 tz_dsttime; /* Type of dst correction */ +}; + +extern struct vdso_data *vdso_data; + +/* __vdso_rt_sigreturn is defined with the addresses in the vdso page. */ +extern void __vdso_rt_sigreturn(void); + +extern int setup_vdso_pages(void); + +#endif /* __TILE_VDSO_H__ */ diff --git a/arch/tile/include/gxio/common.h b/arch/tile/include/gxio/common.h new file mode 100644 index 00000000000..724595a24d0 --- /dev/null +++ b/arch/tile/include/gxio/common.h @@ -0,0 +1,40 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _GXIO_COMMON_H_ +#define _GXIO_COMMON_H_ + +/* + * Routines shared between the various GXIO device components. + */ + +#include <hv/iorpc.h> + +#include <linux/types.h> +#include <linux/compiler.h> +#include <linux/io.h> + +/* Define the standard gxio MMIO functions using kernel functions. */ +#define __gxio_mmio_read8(addr) readb(addr) +#define __gxio_mmio_read16(addr) readw(addr) +#define __gxio_mmio_read32(addr) readl(addr) +#define __gxio_mmio_read64(addr) readq(addr) +#define __gxio_mmio_write8(addr, val) writeb((val), (addr)) +#define __gxio_mmio_write16(addr, val) writew((val), (addr)) +#define __gxio_mmio_write32(addr, val) writel((val), (addr)) +#define __gxio_mmio_write64(addr, val) writeq((val), (addr)) +#define __gxio_mmio_read(addr) __gxio_mmio_read64(addr) +#define __gxio_mmio_write(addr, val) __gxio_mmio_write64((addr), (val)) + +#endif /* !_GXIO_COMMON_H_ */ diff --git a/arch/tile/include/gxio/dma_queue.h b/arch/tile/include/gxio/dma_queue.h new file mode 100644 index 00000000000..b9e45e37649 --- /dev/null +++ b/arch/tile/include/gxio/dma_queue.h @@ -0,0 +1,161 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _GXIO_DMA_QUEUE_H_ +#define _GXIO_DMA_QUEUE_H_ + +/* + * DMA queue management APIs shared between TRIO and mPIPE. + */ + +#include <gxio/common.h> + +/* The credit counter lives in the high 32 bits. */ +#define DMA_QUEUE_CREDIT_SHIFT 32 + +/* + * State object that tracks a DMA queue's head and tail indices, as + * well as the number of commands posted and completed. The + * structure is accessed via a thread-safe, lock-free algorithm. + */ +typedef struct { + /* + * Address of a MPIPE_EDMA_POST_REGION_VAL_t, + * TRIO_PUSH_DMA_REGION_VAL_t, or TRIO_PULL_DMA_REGION_VAL_t + * register. These register have identical encodings and provide + * information about how many commands have been processed. + */ + void *post_region_addr; + + /* + * A lazily-updated count of how many edescs the hardware has + * completed. + */ + uint64_t hw_complete_count __attribute__ ((aligned(64))); + + /* + * High 32 bits are a count of available egress command credits, + * low 24 bits are the next egress "slot". + */ + int64_t credits_and_next_index; + +} __gxio_dma_queue_t; + +/* Initialize a dma queue. */ +extern void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue, + void *post_region_addr, + unsigned int num_entries); + +/* + * Update the "credits_and_next_index" and "hw_complete_count" fields + * based on pending hardware completions. Note that some other thread + * may have already done this and, importantly, may still be in the + * process of updating "credits_and_next_index". + */ +extern void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue); + +/* Wait for credits to become available. */ +extern int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue, + int64_t modifier); + +/* Reserve slots in the queue, optionally waiting for slots to become + * available, and optionally returning a "completion_slot" suitable for + * direct comparison to "hw_complete_count". + */ +static inline int64_t __gxio_dma_queue_reserve(__gxio_dma_queue_t *dma_queue, + unsigned int num, bool wait, + bool completion) +{ + uint64_t slot; + + /* + * Try to reserve 'num' egress command slots. We do this by + * constructing a constant that subtracts N credits and adds N to + * the index, and using fetchaddgez to only apply it if the credits + * count doesn't go negative. + */ + int64_t modifier = (((int64_t)(-num)) << DMA_QUEUE_CREDIT_SHIFT) | num; + int64_t old = + __insn_fetchaddgez(&dma_queue->credits_and_next_index, + modifier); + + if (unlikely(old + modifier < 0)) { + /* + * We're out of credits. Try once to get more by checking for + * completed egress commands. If that fails, wait or fail. + */ + __gxio_dma_queue_update_credits(dma_queue); + old = __insn_fetchaddgez(&dma_queue->credits_and_next_index, + modifier); + if (old + modifier < 0) { + if (wait) + old = __gxio_dma_queue_wait_for_credits + (dma_queue, modifier); + else + return GXIO_ERR_DMA_CREDITS; + } + } + + /* The bottom 24 bits of old encode the "slot". */ + slot = (old & 0xffffff); + + if (completion) { + /* + * A "completion_slot" is a "slot" which can be compared to + * "hw_complete_count" at any time in the future. To convert + * "slot" into a "completion_slot", we access "hw_complete_count" + * once (knowing that we have reserved a slot, and thus, it will + * be "basically" accurate), and combine its high 40 bits with + * the 24 bit "slot", and handle "wrapping" by adding "1 << 24" + * if the result is LESS than "hw_complete_count". + */ + uint64_t complete; + complete = ACCESS_ONCE(dma_queue->hw_complete_count); + slot |= (complete & 0xffffffffff000000); + if (slot < complete) + slot += 0x1000000; + } + + /* + * If any of our slots mod 256 were equivalent to 0, go ahead and + * collect some egress credits, and update "hw_complete_count", and + * make sure the index doesn't overflow into the credits. + */ + if (unlikely(((old + num) & 0xff) < num)) { + __gxio_dma_queue_update_credits(dma_queue); + + /* Make sure the index doesn't overflow into the credits. */ +#ifdef __BIG_ENDIAN__ + *(((uint8_t *)&dma_queue->credits_and_next_index) + 4) = 0; +#else + *(((uint8_t *)&dma_queue->credits_and_next_index) + 3) = 0; +#endif + } + + return slot; +} + +/* Non-inlinable "__gxio_dma_queue_reserve(..., true)". */ +extern int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue, + unsigned int num, int wait); + +/* Check whether a particular "completion slot" has completed. + * + * Note that this function requires a "completion slot", and thus + * cannot be used with the result of any "reserve_fast" function. + */ +extern int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, + int64_t completion_slot, int update); + +#endif /* !_GXIO_DMA_QUEUE_H_ */ diff --git a/arch/tile/include/gxio/iorpc_globals.h b/arch/tile/include/gxio/iorpc_globals.h new file mode 100644 index 00000000000..52c721f8dad --- /dev/null +++ b/arch/tile/include/gxio/iorpc_globals.h @@ -0,0 +1,38 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __IORPC_LINUX_RPC_H__ +#define __IORPC_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + +#define IORPC_OP_ARM_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9000) +#define IORPC_OP_CLOSE_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9001) +#define IORPC_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define IORPC_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + +int __iorpc_arm_pollfd(int fd, int pollfd_cookie); + +int __iorpc_close_pollfd(int fd, int pollfd_cookie); + +int __iorpc_get_mmio_base(int fd, HV_PTE *base); + +int __iorpc_check_mmio_offset(int fd, unsigned long offset, unsigned long size); + +#endif /* !__IORPC_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_mpipe.h b/arch/tile/include/gxio/iorpc_mpipe.h new file mode 100644 index 00000000000..4cda03de734 --- /dev/null +++ b/arch/tile/include/gxio/iorpc_mpipe.h @@ -0,0 +1,144 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __GXIO_MPIPE_LINUX_RPC_H__ +#define __GXIO_MPIPE_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <hv/drv_mpipe_intf.h> +#include <asm/page.h> +#include <gxio/kiorpc.h> +#include <gxio/mpipe.h> +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + +#define GXIO_MPIPE_OP_ALLOC_BUFFER_STACKS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1200) +#define GXIO_MPIPE_OP_INIT_BUFFER_STACK_AUX IORPC_OPCODE(IORPC_FORMAT_KERNEL_MEM, 0x1201) + +#define GXIO_MPIPE_OP_ALLOC_NOTIF_RINGS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1203) +#define GXIO_MPIPE_OP_INIT_NOTIF_RING_AUX IORPC_OPCODE(IORPC_FORMAT_KERNEL_MEM, 0x1204) +#define GXIO_MPIPE_OP_REQUEST_NOTIF_RING_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1205) +#define GXIO_MPIPE_OP_ENABLE_NOTIF_RING_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1206) +#define GXIO_MPIPE_OP_ALLOC_NOTIF_GROUPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1207) +#define GXIO_MPIPE_OP_INIT_NOTIF_GROUP IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1208) +#define GXIO_MPIPE_OP_ALLOC_BUCKETS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1209) +#define GXIO_MPIPE_OP_INIT_BUCKET IORPC_OPCODE(IORPC_FORMAT_NONE, 0x120a) +#define GXIO_MPIPE_OP_ALLOC_EDMA_RINGS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x120b) +#define GXIO_MPIPE_OP_INIT_EDMA_RING_AUX IORPC_OPCODE(IORPC_FORMAT_KERNEL_MEM, 0x120c) + +#define GXIO_MPIPE_OP_COMMIT_RULES IORPC_OPCODE(IORPC_FORMAT_NONE, 0x120f) +#define GXIO_MPIPE_OP_REGISTER_CLIENT_MEMORY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1210) +#define GXIO_MPIPE_OP_LINK_OPEN_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1211) +#define GXIO_MPIPE_OP_LINK_CLOSE_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1212) +#define GXIO_MPIPE_OP_LINK_SET_ATTR_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1213) + +#define GXIO_MPIPE_OP_GET_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x121e) +#define GXIO_MPIPE_OP_SET_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x121f) +#define GXIO_MPIPE_OP_ADJUST_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1220) +#define GXIO_MPIPE_OP_CONFIG_EDMA_RING_BLKS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1221) +#define GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1222) +#define GXIO_MPIPE_OP_ARM_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9000) +#define GXIO_MPIPE_OP_CLOSE_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9001) +#define GXIO_MPIPE_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define GXIO_MPIPE_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context, + void *mem_va, size_t mem_size, + unsigned int mem_flags, unsigned int stack, + unsigned int buffer_size_enum); + + +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va, + size_t mem_size, unsigned int mem_flags, + unsigned int ring); + +int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context, + int inter_x, int inter_y, + int inter_ipi, int inter_event, + unsigned int ring); + +int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context, + unsigned int ring); + +int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, + unsigned int group, + gxio_mpipe_notif_group_bits_t bits); + +int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count, + unsigned int first, unsigned int flags); + +int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket, + MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info); + +int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va, + size_t mem_size, unsigned int mem_flags, + unsigned int ring, unsigned int channel); + + +int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob, + size_t blob_size); + +int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, + unsigned int iotlb, HV_PTE pte, + unsigned int flags); + +int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context, + _gxio_mpipe_link_name_t name, unsigned int flags); + +int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac); + +int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac, + uint32_t attr, int64_t val); + +int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec, + uint64_t *nsec, uint64_t *cycles); + +int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec, + uint64_t nsec, uint64_t cycles); + +int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, + int64_t nsec); + +int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, + int32_t ppb); + +int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); + +int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); + +int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base); + +int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context, + unsigned long offset, unsigned long size); + +#endif /* !__GXIO_MPIPE_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_mpipe_info.h b/arch/tile/include/gxio/iorpc_mpipe_info.h new file mode 100644 index 00000000000..f0b04284468 --- /dev/null +++ b/arch/tile/include/gxio/iorpc_mpipe_info.h @@ -0,0 +1,50 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __GXIO_MPIPE_INFO_LINUX_RPC_H__ +#define __GXIO_MPIPE_INFO_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <hv/drv_mpipe_intf.h> +#include <asm/page.h> +#include <gxio/kiorpc.h> +#include <gxio/mpipe.h> +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + + +#define GXIO_MPIPE_INFO_OP_INSTANCE_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1250) +#define GXIO_MPIPE_INFO_OP_ENUMERATE_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1251) +#define GXIO_MPIPE_INFO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define GXIO_MPIPE_INFO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + + +int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context, + _gxio_mpipe_link_name_t name); + +int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context, + unsigned int idx, + _gxio_mpipe_link_name_t *name, + _gxio_mpipe_link_mac_t *mac); + +int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context, + HV_PTE *base); + +int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context, + unsigned long offset, unsigned long size); + +#endif /* !__GXIO_MPIPE_INFO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h new file mode 100644 index 00000000000..376a4f77116 --- /dev/null +++ b/arch/tile/include/gxio/iorpc_trio.h @@ -0,0 +1,104 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __GXIO_TRIO_LINUX_RPC_H__ +#define __GXIO_TRIO_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <hv/drv_trio_intf.h> +#include <gxio/trio.h> +#include <gxio/kiorpc.h> +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + +#define GXIO_TRIO_OP_DEALLOC_ASID IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400) +#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1401) + +#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1404) + +#define GXIO_TRIO_OP_ALLOC_SCATTER_QUEUES IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140e) +#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1412) + +#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1414) + +#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e) +#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141f) +#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1420) +#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1421) + +#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1423) +#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1424) +#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1425) +#define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + +int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count, + unsigned int first, unsigned int flags); + + +int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + + +int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context, + unsigned int pio_region, unsigned int mac, + uint32_t bus_address_hi, unsigned int flags); + + +int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context, + unsigned int map, unsigned long va, + uint64_t size, unsigned int asid, + unsigned int mac, uint64_t bus_address, + unsigned int node, + unsigned int order_mode); + +int gxio_trio_get_port_property(gxio_trio_context_t *context, + struct pcie_trio_ports_property *trio_ports); + +int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event, + unsigned int mac, unsigned int intx); + +int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event, + unsigned int mac, unsigned int mem_map, + uint64_t mem_map_base, uint64_t mem_map_limit, + unsigned int asid); + + +int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps, + uint16_t mrs, unsigned int mac); + +int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac); + +int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac); + +int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base); + +int gxio_trio_check_mmio_offset(gxio_trio_context_t *context, + unsigned long offset, unsigned long size); + +#endif /* !__GXIO_TRIO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_uart.h b/arch/tile/include/gxio/iorpc_uart.h new file mode 100644 index 00000000000..55429d48ea5 --- /dev/null +++ b/arch/tile/include/gxio/iorpc_uart.h @@ -0,0 +1,40 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __GXIO_UART_LINUX_RPC_H__ +#define __GXIO_UART_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <hv/drv_uart_intf.h> +#include <gxio/uart.h> +#include <gxio/kiorpc.h> +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + +#define GXIO_UART_OP_CFG_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1900) +#define GXIO_UART_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define GXIO_UART_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + +int gxio_uart_cfg_interrupt(gxio_uart_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event); + +int gxio_uart_get_mmio_base(gxio_uart_context_t *context, HV_PTE *base); + +int gxio_uart_check_mmio_offset(gxio_uart_context_t *context, + unsigned long offset, unsigned long size); + +#endif /* !__GXIO_UART_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_usb_host.h b/arch/tile/include/gxio/iorpc_usb_host.h new file mode 100644 index 00000000000..79962a97de8 --- /dev/null +++ b/arch/tile/include/gxio/iorpc_usb_host.h @@ -0,0 +1,46 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __GXIO_USB_HOST_LINUX_RPC_H__ +#define __GXIO_USB_HOST_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <hv/drv_usb_host_intf.h> +#include <asm/page.h> +#include <gxio/kiorpc.h> +#include <gxio/usb_host.h> +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + +#define GXIO_USB_HOST_OP_CFG_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1800) +#define GXIO_USB_HOST_OP_REGISTER_CLIENT_MEMORY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1801) +#define GXIO_USB_HOST_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define GXIO_USB_HOST_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + +int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event); + +int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context, + HV_PTE pte, unsigned int flags); + +int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, + HV_PTE *base); + +int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context, + unsigned long offset, unsigned long size); + +#endif /* !__GXIO_USB_HOST_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/kiorpc.h b/arch/tile/include/gxio/kiorpc.h new file mode 100644 index 00000000000..ee5820979ff --- /dev/null +++ b/arch/tile/include/gxio/kiorpc.h @@ -0,0 +1,29 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Support routines for kernel IORPC drivers. + */ + +#ifndef _GXIO_KIORPC_H +#define _GXIO_KIORPC_H + +#include <linux/types.h> +#include <asm/page.h> +#include <arch/chip.h> + +#if CHIP_HAS_MMIO() +void __iomem *iorpc_ioremap(int hv_fd, resource_size_t offset, + unsigned long size); +#endif + +#endif /* _GXIO_KIORPC_H */ diff --git a/arch/tile/include/gxio/mpipe.h b/arch/tile/include/gxio/mpipe.h new file mode 100644 index 00000000000..e37cf4f0cff --- /dev/null +++ b/arch/tile/include/gxio/mpipe.h @@ -0,0 +1,1871 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _GXIO_MPIPE_H_ +#define _GXIO_MPIPE_H_ + +/* + * + * An API for allocating, configuring, and manipulating mPIPE hardware + * resources. + */ + +#include <gxio/common.h> +#include <gxio/dma_queue.h> + +#include <linux/time.h> + +#include <arch/mpipe_def.h> +#include <arch/mpipe_shm.h> + +#include <hv/drv_mpipe_intf.h> +#include <hv/iorpc.h> + +/* + * + * The TILE-Gx mPIPE&tm; shim provides Ethernet connectivity, packet + * classification, and packet load balancing services. The + * gxio_mpipe_ API, declared in <gxio/mpipe.h>, allows applications to + * allocate mPIPE IO channels, configure packet distribution + * parameters, and send and receive Ethernet packets. The API is + * designed to be a minimal wrapper around the mPIPE hardware, making + * system calls only where necessary to preserve inter-process + * protection guarantees. + * + * The APIs described below allow the programmer to allocate and + * configure mPIPE resources. As described below, the mPIPE is a + * single shared hardware device that provides partitionable resources + * that are shared between all applications in the system. The + * gxio_mpipe_ API allows userspace code to make resource request + * calls to the hypervisor, which in turns keeps track of the + * resources in use by all applications, maintains protection + * guarantees, and resets resources upon application shutdown. + * + * We strongly recommend reading the mPIPE section of the IO Device + * Guide (UG404) before working with this API. Most functions in the + * gxio_mpipe_ API are directly analogous to hardware interfaces and + * the documentation assumes that the reader understands those + * hardware interfaces. + * + * @section mpipe__ingress mPIPE Ingress Hardware Resources + * + * The mPIPE ingress hardware provides extensive hardware offload for + * tasks like packet header parsing, load balancing, and memory + * management. This section provides a brief introduction to the + * hardware components and the gxio_mpipe_ calls used to manage them; + * see the IO Device Guide for a much more detailed description of the + * mPIPE's capabilities. + * + * When a packet arrives at one of the mPIPE's Ethernet MACs, it is + * assigned a channel number indicating which MAC received it. It + * then proceeds through the following hardware pipeline: + * + * @subsection mpipe__classification Classification + * + * A set of classification processors run header parsing code on each + * incoming packet, extracting information including the destination + * MAC address, VLAN, Ethernet type, and five-tuple hash. Some of + * this information is then used to choose which buffer stack will be + * used to hold the packet, and which bucket will be used by the load + * balancer to determine which application will receive the packet. + * + * The rules by which the buffer stack and bucket are chosen can be + * configured via the @ref gxio_mpipe_classifier API. A given app can + * specify multiple rules, each one specifying a bucket range, and a + * set of buffer stacks, to be used for packets matching the rule. + * Each rule can optionally specify a restricted set of channels, + * VLANs, and/or dMACs, in which it is interested. By default, a + * given rule starts out matching all channels associated with the + * mPIPE context's set of open links; all VLANs; and all dMACs. + * Subsequent restrictions can then be added. + * + * @subsection mpipe__load_balancing Load Balancing + * + * The mPIPE load balancer is responsible for choosing the NotifRing + * to which the packet will be delivered. This decision is based on + * the bucket number indicated by the classification program. In + * general, the bucket number is based on some number of low bits of + * the packet's flow hash (applications that aren't interested in flow + * hashing use a single bucket). Each load balancer bucket keeps a + * record of the NotifRing to which packets directed to that bucket + * are currently being delivered. Based on the bucket's load + * balancing mode (@ref gxio_mpipe_bucket_mode_t), the load balancer + * either forwards the packet to the previously assigned NotifRing or + * decides to choose a new NotifRing. If a new NotifRing is required, + * the load balancer chooses the least loaded ring in the NotifGroup + * associated with the bucket. + * + * The load balancer is a shared resource. Each application needs to + * explicitly allocate NotifRings, NotifGroups, and buckets, using + * gxio_mpipe_alloc_notif_rings(), gxio_mpipe_alloc_notif_groups(), + * and gxio_mpipe_alloc_buckets(). Then the application needs to + * configure them using gxio_mpipe_init_notif_ring() and + * gxio_mpipe_init_notif_group_and_buckets(). + * + * @subsection mpipe__buffers Buffer Selection and Packet Delivery + * + * Once the load balancer has chosen the destination NotifRing, the + * mPIPE DMA engine pops at least one buffer off of the 'buffer stack' + * chosen by the classification program and DMAs the packet data into + * that buffer. Each buffer stack provides a hardware-accelerated + * stack of data buffers with the same size. If the packet data is + * larger than the buffers provided by the chosen buffer stack, the + * mPIPE hardware pops off multiple buffers and chains the packet data + * through a multi-buffer linked list. Once the packet data is + * delivered to the buffer(s), the mPIPE hardware writes the + * ::gxio_mpipe_idesc_t metadata object (calculated by the classifier) + * into the NotifRing and increments the number of packets delivered + * to that ring. + * + * Applications can push buffers onto a buffer stack by calling + * gxio_mpipe_push_buffer() or by egressing a packet with the + * ::gxio_mpipe_edesc_t::hwb bit set, indicating that the egressed + * buffers should be returned to the stack. + * + * Applications can allocate and initialize buffer stacks with the + * gxio_mpipe_alloc_buffer_stacks() and gxio_mpipe_init_buffer_stack() + * APIs. + * + * The application must also register the memory pages that will hold + * packets. This requires calling gxio_mpipe_register_page() for each + * memory page that will hold packets allocated by the application for + * a given buffer stack. Since each buffer stack is limited to 16 + * registered pages, it may be necessary to use huge pages, or even + * extremely huge pages, to hold all the buffers. + * + * @subsection mpipe__iqueue NotifRings + * + * Each NotifRing is a region of shared memory, allocated by the + * application, to which the mPIPE delivers packet descriptors + * (::gxio_mpipe_idesc_t). The application can allocate them via + * gxio_mpipe_alloc_notif_rings(). The application can then either + * explicitly initialize them with gxio_mpipe_init_notif_ring() and + * then read from them manually, or can make use of the convenience + * wrappers provided by @ref gxio_mpipe_wrappers. + * + * @section mpipe__egress mPIPE Egress Hardware + * + * Applications use eDMA rings to queue packets for egress. The + * application can allocate them via gxio_mpipe_alloc_edma_rings(). + * The application can then either explicitly initialize them with + * gxio_mpipe_init_edma_ring() and then write to them manually, or + * can make use of the convenience wrappers provided by + * @ref gxio_mpipe_wrappers. + * + * @section gxio__shortcomings Plans for Future API Revisions + * + * The API defined here is only an initial version of the mPIPE API. + * Future plans include: + * + * - Higher level wrapper functions to provide common initialization + * patterns. This should help users start writing mPIPE programs + * without having to learn the details of the hardware. + * + * - Support for reset and deallocation of resources, including + * cleanup upon application shutdown. + * + * - Support for calling these APIs in the BME. + * + * - Support for IO interrupts. + * + * - Clearer definitions of thread safety guarantees. + * + * @section gxio__mpipe_examples Examples + * + * See the following mPIPE example programs for more information about + * allocating mPIPE resources and using them in real applications: + * + * - @ref mpipe/ingress/app.c : Receiving packets. + * + * - @ref mpipe/forward/app.c : Forwarding packets. + * + * Note that there are several more examples. + */ + +/* Flags that can be passed to resource allocation functions. */ +enum gxio_mpipe_alloc_flags_e { + /* Require an allocation to start at a specified resource index. */ + GXIO_MPIPE_ALLOC_FIXED = HV_MPIPE_ALLOC_FIXED, +}; + +/* Flags that can be passed to memory registration functions. */ +enum gxio_mpipe_mem_flags_e { + /* Do not fill L3 when writing, and invalidate lines upon egress. */ + GXIO_MPIPE_MEM_FLAG_NT_HINT = IORPC_MEM_BUFFER_FLAG_NT_HINT, + + /* L3 cache fills should only populate IO cache ways. */ + GXIO_MPIPE_MEM_FLAG_IO_PIN = IORPC_MEM_BUFFER_FLAG_IO_PIN, +}; + +/* An ingress packet descriptor. When a packet arrives, the mPIPE + * hardware generates this structure and writes it into a NotifRing. + */ +typedef MPIPE_PDESC_t gxio_mpipe_idesc_t; + +/* An egress command descriptor. Applications write this structure + * into eDMA rings and the hardware performs the indicated operation + * (normally involving egressing some bytes). Note that egressing a + * single packet may involve multiple egress command descriptors. + */ +typedef MPIPE_EDMA_DESC_t gxio_mpipe_edesc_t; + +/* + * Max # of mpipe instances. 2 currently. + */ +#define GXIO_MPIPE_INSTANCE_MAX HV_MPIPE_INSTANCE_MAX + +#define NR_MPIPE_MAX GXIO_MPIPE_INSTANCE_MAX + +/* Get the "va" field from an "idesc". + * + * This is the address at which the ingress hardware copied the first + * byte of the packet. + * + * If the classifier detected a custom header, then this will point to + * the custom header, and gxio_mpipe_idesc_get_l2_start() will point + * to the actual L2 header. + * + * Note that this value may be misleading if "idesc->be" is set. + * + * @param idesc An ingress packet descriptor. + */ +static inline unsigned char *gxio_mpipe_idesc_get_va(gxio_mpipe_idesc_t *idesc) +{ + return (unsigned char *)(long)idesc->va; +} + +/* Get the "xfer_size" from an "idesc". + * + * This is the actual number of packet bytes transferred into memory + * by the hardware. + * + * Note that this value may be misleading if "idesc->be" is set. + * + * @param idesc An ingress packet descriptor. + * + * ISSUE: Is this the best name for this? + * FIXME: Add more docs about chaining, clipping, etc. + */ +static inline unsigned int gxio_mpipe_idesc_get_xfer_size(gxio_mpipe_idesc_t + *idesc) +{ + return idesc->l2_size; +} + +/* Get the "l2_offset" from an "idesc". + * + * Extremely customized classifiers might not support this function. + * + * This is the number of bytes between the "va" and the L2 header. + * + * The L2 header consists of a destination mac address, a source mac + * address, and an initial ethertype. Various initial ethertypes + * allow encoding extra information in the L2 header, often including + * a vlan, and/or a new ethertype. + * + * Note that the "l2_offset" will be non-zero if (and only if) the + * classifier processed a custom header for the packet. + * + * @param idesc An ingress packet descriptor. + */ +static inline uint8_t gxio_mpipe_idesc_get_l2_offset(gxio_mpipe_idesc_t *idesc) +{ + return (idesc->custom1 >> 32) & 0xFF; +} + +/* Get the "l2_start" from an "idesc". + * + * This is simply gxio_mpipe_idesc_get_va() plus + * gxio_mpipe_idesc_get_l2_offset(). + * + * @param idesc An ingress packet descriptor. + */ +static inline unsigned char *gxio_mpipe_idesc_get_l2_start(gxio_mpipe_idesc_t + *idesc) +{ + unsigned char *va = gxio_mpipe_idesc_get_va(idesc); + return va + gxio_mpipe_idesc_get_l2_offset(idesc); +} + +/* Get the "l2_length" from an "idesc". + * + * This is simply gxio_mpipe_idesc_get_xfer_size() minus + * gxio_mpipe_idesc_get_l2_offset(). + * + * @param idesc An ingress packet descriptor. + */ +static inline unsigned int gxio_mpipe_idesc_get_l2_length(gxio_mpipe_idesc_t + *idesc) +{ + unsigned int xfer_size = idesc->l2_size; + return xfer_size - gxio_mpipe_idesc_get_l2_offset(idesc); +} + +/* A context object used to manage mPIPE hardware resources. */ +typedef struct { + + /* File descriptor for calling up to Linux (and thus the HV). */ + int fd; + + /* Corresponding mpipe instance #. */ + int instance; + + /* The VA at which configuration registers are mapped. */ + char *mmio_cfg_base; + + /* The VA at which IDMA, EDMA, and buffer manager are mapped. */ + char *mmio_fast_base; + + /* The "initialized" buffer stacks. */ + gxio_mpipe_rules_stacks_t __stacks; + +} gxio_mpipe_context_t; + +/* This is only used internally, but it's most easily made visible here. */ +typedef gxio_mpipe_context_t gxio_mpipe_info_context_t; + +/* Initialize an mPIPE context. + * + * This function allocates an mPIPE "service domain" and maps the MMIO + * registers into the caller's VA space. + * + * @param context Context object to be initialized. + * @param mpipe_instance Instance number of mPIPE shim to be controlled via + * context. + */ +extern int gxio_mpipe_init(gxio_mpipe_context_t *context, + unsigned int mpipe_instance); + +/* Destroy an mPIPE context. + * + * This function frees the mPIPE "service domain" and unmaps the MMIO + * registers from the caller's VA space. + * + * If a user process exits without calling this routine, the kernel + * will destroy the mPIPE context as part of process teardown. + * + * @param context Context object to be destroyed. + */ +extern int gxio_mpipe_destroy(gxio_mpipe_context_t *context); + +/***************************************************************** + * Buffer Stacks * + ******************************************************************/ + +/* Allocate a set of buffer stacks. + * + * The return value is NOT interesting if count is zero. + * + * @param context An initialized mPIPE context. + * @param count Number of stacks required. + * @param first Index of first stack if ::GXIO_MPIPE_ALLOC_FIXED flag is set, + * otherwise ignored. + * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e. + * @return Index of first allocated buffer stack, or + * ::GXIO_MPIPE_ERR_NO_BUFFER_STACK if allocation failed. + */ +extern int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, + unsigned int count, + unsigned int first, + unsigned int flags); + +/* Enum codes for buffer sizes supported by mPIPE. */ +typedef enum { + /* 128 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_128 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_128, + /* 256 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_256 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_256, + /* 512 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_512 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_512, + /* 1024 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_1024 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1024, + /* 1664 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_1664 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1664, + /* 4096 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_4096 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_4096, + /* 10368 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_10368 = + MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_10368, + /* 16384 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_16384 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_16384 +} gxio_mpipe_buffer_size_enum_t; + +/* Convert a buffer size in bytes into a buffer size enum. */ +extern gxio_mpipe_buffer_size_enum_t +gxio_mpipe_buffer_size_to_buffer_size_enum(size_t size); + +/* Convert a buffer size enum into a buffer size in bytes. */ +extern size_t +gxio_mpipe_buffer_size_enum_to_buffer_size(gxio_mpipe_buffer_size_enum_t + buffer_size_enum); + +/* Calculate the number of bytes required to store a given number of + * buffers in the memory registered with a buffer stack via + * gxio_mpipe_init_buffer_stack(). + */ +extern size_t gxio_mpipe_calc_buffer_stack_bytes(unsigned long buffers); + +/* Initialize a buffer stack. This function binds a region of memory + * to be used by the hardware for storing buffer addresses pushed via + * gxio_mpipe_push_buffer() or as the result of sending a buffer out + * the egress with the 'push to stack when done' bit set. Once this + * function returns, the memory region's contents may be arbitrarily + * modified by the hardware at any time and software should not access + * the memory region again. + * + * @param context An initialized mPIPE context. + * @param stack The buffer stack index. + * @param buffer_size_enum The size of each buffer in the buffer stack, + * as an enum. + * @param mem The address of the buffer stack. This memory must be + * physically contiguous and aligned to a 64kB boundary. + * @param mem_size The size of the buffer stack, in bytes. + * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags. + * @return Zero on success, ::GXIO_MPIPE_ERR_INVAL_BUFFER_SIZE if + * buffer_size_enum is invalid, ::GXIO_MPIPE_ERR_BAD_BUFFER_STACK if + * stack has not been allocated. + */ +extern int gxio_mpipe_init_buffer_stack(gxio_mpipe_context_t *context, + unsigned int stack, + gxio_mpipe_buffer_size_enum_t + buffer_size_enum, void *mem, + size_t mem_size, + unsigned int mem_flags); + +/* Push a buffer onto a previously initialized buffer stack. + * + * The size of the buffer being pushed must match the size that was + * registered with gxio_mpipe_init_buffer_stack(). All packet buffer + * addresses are 128-byte aligned; the low 7 bits of the specified + * buffer address will be ignored. + * + * @param context An initialized mPIPE context. + * @param stack The buffer stack index. + * @param buffer The buffer (the low seven bits are ignored). + */ +static inline void gxio_mpipe_push_buffer(gxio_mpipe_context_t *context, + unsigned int stack, void *buffer) +{ + MPIPE_BSM_REGION_ADDR_t offset = { {0} }; + MPIPE_BSM_REGION_VAL_t val = { {0} }; + + /* + * The mmio_fast_base region starts at the IDMA region, so subtract + * off that initial offset. + */ + offset.region = + MPIPE_MMIO_ADDR__REGION_VAL_BSM - + MPIPE_MMIO_ADDR__REGION_VAL_IDMA; + offset.stack = stack; + +#if __SIZEOF_POINTER__ == 4 + val.va = ((ulong) buffer) >> MPIPE_BSM_REGION_VAL__VA_SHIFT; +#else + val.va = ((long)buffer) >> MPIPE_BSM_REGION_VAL__VA_SHIFT; +#endif + + __gxio_mmio_write(context->mmio_fast_base + offset.word, val.word); +} + +/* Pop a buffer off of a previously initialized buffer stack. + * + * @param context An initialized mPIPE context. + * @param stack The buffer stack index. + * @return The buffer, or NULL if the stack is empty. + */ +static inline void *gxio_mpipe_pop_buffer(gxio_mpipe_context_t *context, + unsigned int stack) +{ + MPIPE_BSM_REGION_ADDR_t offset = { {0} }; + + /* + * The mmio_fast_base region starts at the IDMA region, so subtract + * off that initial offset. + */ + offset.region = + MPIPE_MMIO_ADDR__REGION_VAL_BSM - + MPIPE_MMIO_ADDR__REGION_VAL_IDMA; + offset.stack = stack; + + while (1) { + /* + * Case 1: val.c == ..._UNCHAINED, va is non-zero. + * Case 2: val.c == ..._INVALID, va is zero. + * Case 3: val.c == ..._NOT_RDY, va is zero. + */ + MPIPE_BSM_REGION_VAL_t val; + val.word = + __gxio_mmio_read(context->mmio_fast_base + + offset.word); + + /* + * Handle case 1 and 2 by returning the buffer (or NULL). + * Handle case 3 by waiting for the prefetch buffer to refill. + */ + if (val.c != MPIPE_EDMA_DESC_WORD1__C_VAL_NOT_RDY) + return (void *)((unsigned long)val. + va << MPIPE_BSM_REGION_VAL__VA_SHIFT); + } +} + +/***************************************************************** + * NotifRings * + ******************************************************************/ + +/* Allocate a set of NotifRings. + * + * The return value is NOT interesting if count is zero. + * + * Note that NotifRings are allocated in chunks, so allocating one at + * a time is much less efficient than allocating several at once. + * + * @param context An initialized mPIPE context. + * @param count Number of NotifRings required. + * @param first Index of first NotifRing if ::GXIO_MPIPE_ALLOC_FIXED flag + * is set, otherwise ignored. + * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e. + * @return Index of first allocated buffer NotifRing, or + * ::GXIO_MPIPE_ERR_NO_NOTIF_RING if allocation failed. + */ +extern int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +/* Initialize a NotifRing, using the given memory and size. + * + * @param context An initialized mPIPE context. + * @param ring The NotifRing index. + * @param mem A physically contiguous region of memory to be filled + * with a ring of ::gxio_mpipe_idesc_t structures. + * @param mem_size Number of bytes in the ring. Must be 128, 512, + * 2048, or 65536 * sizeof(gxio_mpipe_idesc_t). + * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags. + * + * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_NOTIF_RING or + * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure. + */ +extern int gxio_mpipe_init_notif_ring(gxio_mpipe_context_t *context, + unsigned int ring, + void *mem, size_t mem_size, + unsigned int mem_flags); + +/* Configure an interrupt to be sent to a tile on incoming NotifRing + * traffic. Once an interrupt is sent for a particular ring, no more + * will be sent until gxio_mica_enable_notif_ring_interrupt() is called. + * + * @param context An initialized mPIPE context. + * @param x X coordinate of interrupt target tile. + * @param y Y coordinate of interrupt target tile. + * @param i Index of the IPI register which will receive the interrupt. + * @param e Specific event which will be set in the target IPI register when + * the interrupt occurs. + * @param ring The NotifRing index. + * @return Zero on success, GXIO_ERR_INVAL if params are out of range. + */ +extern int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t + *context, int x, int y, + int i, int e, + unsigned int ring); + +/* Enable an interrupt on incoming NotifRing traffic. + * + * @param context An initialized mPIPE context. + * @param ring The NotifRing index. + * @return Zero on success, GXIO_ERR_INVAL if params are out of range. + */ +extern int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t + *context, unsigned int ring); + +/* Map all of a client's memory via the given IOTLB. + * @param context An initialized mPIPE context. + * @param iotlb IOTLB index. + * @param pte Page table entry. + * @param flags Flags. + * @return Zero on success, or a negative error code. + */ +extern int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, + unsigned int iotlb, HV_PTE pte, + unsigned int flags); + +/***************************************************************** + * Notif Groups * + ******************************************************************/ + +/* Allocate a set of NotifGroups. + * + * The return value is NOT interesting if count is zero. + * + * @param context An initialized mPIPE context. + * @param count Number of NotifGroups required. + * @param first Index of first NotifGroup if ::GXIO_MPIPE_ALLOC_FIXED flag + * is set, otherwise ignored. + * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e. + * @return Index of first allocated buffer NotifGroup, or + * ::GXIO_MPIPE_ERR_NO_NOTIF_GROUP if allocation failed. + */ +extern int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, + unsigned int count, + unsigned int first, + unsigned int flags); + +/* Add a NotifRing to a NotifGroup. This only sets a bit in the + * application's 'group' object; the hardware NotifGroup can be + * initialized by passing 'group' to gxio_mpipe_init_notif_group() or + * gxio_mpipe_init_notif_group_and_buckets(). + */ +static inline void +gxio_mpipe_notif_group_add_ring(gxio_mpipe_notif_group_bits_t *bits, int ring) +{ + bits->ring_mask[ring / 64] |= (1ull << (ring % 64)); +} + +/* Set a particular NotifGroup bitmask. Since the load balancer + * makes decisions based on both bucket and NotifGroup state, most + * applications should use gxio_mpipe_init_notif_group_and_buckets() + * rather than using this function to configure just a NotifGroup. + */ +extern int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, + unsigned int group, + gxio_mpipe_notif_group_bits_t bits); + +/***************************************************************** + * Load Balancer * + ******************************************************************/ + +/* Allocate a set of load balancer buckets. + * + * The return value is NOT interesting if count is zero. + * + * Note that buckets are allocated in chunks, so allocating one at + * a time is much less efficient than allocating several at once. + * + * Note that the buckets are actually divided into two sub-ranges, of + * different sizes, and different chunk sizes, and the range you get + * by default is determined by the size of the request. Allocations + * cannot span the two sub-ranges. + * + * @param context An initialized mPIPE context. + * @param count Number of buckets required. + * @param first Index of first bucket if ::GXIO_MPIPE_ALLOC_FIXED flag is set, + * otherwise ignored. + * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e. + * @return Index of first allocated buffer bucket, or + * ::GXIO_MPIPE_ERR_NO_BUCKET if allocation failed. + */ +extern int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +/* The legal modes for gxio_mpipe_bucket_info_t and + * gxio_mpipe_init_notif_group_and_buckets(). + * + * All modes except ::GXIO_MPIPE_BUCKET_ROUND_ROBIN expect that the user + * will allocate a power-of-two number of buckets and initialize them + * to the same mode. The classifier program then uses the appropriate + * number of low bits from the incoming packet's flow hash to choose a + * load balancer bucket. Based on that bucket's load balancing mode, + * reference count, and currently active NotifRing, the load balancer + * chooses the NotifRing to which the packet will be delivered. + */ +typedef enum { + /* All packets for a bucket go to the same NotifRing unless the + * NotifRing gets full, in which case packets will be dropped. If + * the bucket reference count ever reaches zero, a new NotifRing may + * be chosen. + */ + GXIO_MPIPE_BUCKET_DYNAMIC_FLOW_AFFINITY = + MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_DFA, + + /* All packets for a bucket always go to the same NotifRing. + */ + GXIO_MPIPE_BUCKET_STATIC_FLOW_AFFINITY = + MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_FIXED, + + /* All packets for a bucket go to the least full NotifRing in the + * group, providing load balancing round robin behavior. + */ + GXIO_MPIPE_BUCKET_ROUND_ROBIN = + MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_ALWAYS_PICK, + + /* All packets for a bucket go to the same NotifRing unless the + * NotifRing gets full, at which point the bucket starts using the + * least full NotifRing in the group. If all NotifRings in the + * group are full, packets will be dropped. + */ + GXIO_MPIPE_BUCKET_STICKY_FLOW_LOCALITY = + MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY, + + /* All packets for a bucket go to the same NotifRing unless the + * NotifRing gets full, or a random timer fires, at which point the + * bucket starts using the least full NotifRing in the group. If + * all NotifRings in the group are full, packets will be dropped. + * WARNING: This mode is BROKEN on chips with fewer than 64 tiles. + */ + GXIO_MPIPE_BUCKET_PREFER_FLOW_LOCALITY = + MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY_RAND, + +} gxio_mpipe_bucket_mode_t; + +/* Copy a set of bucket initialization values into the mPIPE + * hardware. Since the load balancer makes decisions based on both + * bucket and NotifGroup state, most applications should use + * gxio_mpipe_init_notif_group_and_buckets() rather than using this + * function to configure a single bucket. + * + * @param context An initialized mPIPE context. + * @param bucket Bucket index to be initialized. + * @param bucket_info Initial reference count, NotifRing index, and mode. + * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_BUCKET on failure. + */ +extern int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, + unsigned int bucket, + gxio_mpipe_bucket_info_t bucket_info); + +/* Initializes a group and range of buckets and range of rings such + * that the load balancer runs a particular load balancing function. + * + * First, the group is initialized with the given rings. + * + * Second, each bucket is initialized with the mode and group, and a + * ring chosen round-robin from the given rings. + * + * Normally, the classifier picks a bucket, and then the load balancer + * picks a ring, based on the bucket's mode, group, and current ring, + * possibly updating the bucket's ring. + * + * @param context An initialized mPIPE context. + * @param group The group. + * @param ring The first ring. + * @param num_rings The number of rings. + * @param bucket The first bucket. + * @param num_buckets The number of buckets. + * @param mode The load balancing mode. + * + * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_BUCKET, + * ::GXIO_MPIPE_ERR_BAD_NOTIF_GROUP, or + * ::GXIO_MPIPE_ERR_BAD_NOTIF_RING on failure. + */ +extern int gxio_mpipe_init_notif_group_and_buckets(gxio_mpipe_context_t + *context, + unsigned int group, + unsigned int ring, + unsigned int num_rings, + unsigned int bucket, + unsigned int num_buckets, + gxio_mpipe_bucket_mode_t + mode); + +/* Return credits to a NotifRing and/or bucket. + * + * @param context An initialized mPIPE context. + * @param ring The NotifRing index, or -1. + * @param bucket The bucket, or -1. + * @param count The number of credits to return. + */ +static inline void gxio_mpipe_credit(gxio_mpipe_context_t *context, + int ring, int bucket, unsigned int count) +{ + /* NOTE: Fancy struct initialization would break "C89" header test. */ + + MPIPE_IDMA_RELEASE_REGION_ADDR_t offset = { {0} }; + MPIPE_IDMA_RELEASE_REGION_VAL_t val = { {0} }; + + /* + * The mmio_fast_base region starts at the IDMA region, so subtract + * off that initial offset. + */ + offset.region = + MPIPE_MMIO_ADDR__REGION_VAL_IDMA - + MPIPE_MMIO_ADDR__REGION_VAL_IDMA; + offset.ring = ring; + offset.bucket = bucket; + offset.ring_enable = (ring >= 0); + offset.bucket_enable = (bucket >= 0); + val.count = count; + + __gxio_mmio_write(context->mmio_fast_base + offset.word, val.word); +} + +/***************************************************************** + * Egress Rings * + ******************************************************************/ + +/* Allocate a set of eDMA rings. + * + * The return value is NOT interesting if count is zero. + * + * @param context An initialized mPIPE context. + * @param count Number of eDMA rings required. + * @param first Index of first eDMA ring if ::GXIO_MPIPE_ALLOC_FIXED flag + * is set, otherwise ignored. + * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e. + * @return Index of first allocated buffer eDMA ring, or + * ::GXIO_MPIPE_ERR_NO_EDMA_RING if allocation failed. + */ +extern int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +/* Initialize an eDMA ring, using the given memory and size. + * + * @param context An initialized mPIPE context. + * @param ering The eDMA ring index. + * @param channel The channel to use. This must be one of the channels + * associated with the context's set of open links. + * @param mem A physically contiguous region of memory to be filled + * with a ring of ::gxio_mpipe_edesc_t structures. + * @param mem_size Number of bytes in the ring. Must be 512, 2048, + * 8192 or 65536, times 16 (i.e. sizeof(gxio_mpipe_edesc_t)). + * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags. + * + * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_EDMA_RING or + * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure. + */ +extern int gxio_mpipe_init_edma_ring(gxio_mpipe_context_t *context, + unsigned int ering, unsigned int channel, + void *mem, size_t mem_size, + unsigned int mem_flags); + +/* Set the "max_blks", "min_snf_blks", and "db" fields of + * ::MPIPE_EDMA_RG_INIT_DAT_THRESH_t for a given edma ring. + * + * The global pool of dynamic blocks will be automatically adjusted. + * + * This function should not be called after any egress has been done + * on the edma ring. + * + * Most applications should just use gxio_mpipe_equeue_set_snf_size(). + * + * @param context An initialized mPIPE context. + * @param ering The eDMA ring index. + * @param max_blks The number of blocks to dedicate to the ring + * (normally min_snf_blks + 1). Must be greater than min_snf_blocks. + * @param min_snf_blks The number of blocks which must be stored + * prior to starting to send the packet (normally 12). + * @param db Whether to allow use of dynamic blocks by the ring + * (normally 1). + * + * @return 0 on success, negative on error. + */ +extern int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context, + unsigned int ering, + unsigned int max_blks, + unsigned int min_snf_blks, + unsigned int db); + +/***************************************************************** + * Classifier Program * + ******************************************************************/ + +/* + * + * Functions for loading or configuring the mPIPE classifier program. + * + * The mPIPE classification processors all run a special "classifier" + * program which, for each incoming packet, parses the packet headers, + * encodes some packet metadata in the "idesc", and either drops the + * packet, or picks a notif ring to handle the packet, and a buffer + * stack to contain the packet, usually based on the channel, VLAN, + * dMAC, flow hash, and packet size, under the guidance of the "rules" + * API described below. + * + * @section gxio_mpipe_classifier_default Default Classifier + * + * The MDE provides a simple "default" classifier program. It is + * shipped as source in "$TILERA_ROOT/src/sys/mpipe/classifier.c", + * which serves as its official documentation. It is shipped as a + * binary program in "$TILERA_ROOT/tile/boot/classifier", which is + * automatically included in bootroms created by "tile-monitor", and + * is automatically loaded by the hypervisor at boot time. + * + * The L2 analysis handles LLC packets, SNAP packets, and "VLAN + * wrappers" (keeping the outer VLAN). + * + * The L3 analysis handles IPv4 and IPv6, dropping packets with bad + * IPv4 header checksums, requesting computation of a TCP/UDP checksum + * if appropriate, and hashing the dest and src IP addresses, plus the + * ports for TCP/UDP packets, into the flow hash. No special analysis + * is done for "fragmented" packets or "tunneling" protocols. Thus, + * the first fragment of a fragmented TCP/UDP packet is hashed using + * src/dest IP address and ports and all subsequent fragments are only + * hashed according to src/dest IP address. + * + * The L3 analysis handles other packets too, hashing the dMAC + * smac into a flow hash. + * + * The channel, VLAN, and dMAC used to pick a "rule" (see the + * "rules" APIs below), which in turn is used to pick a buffer stack + * (based on the packet size) and a bucket (based on the flow hash). + * + * To receive traffic matching a particular (channel/VLAN/dMAC + * pattern, an application should allocate its own buffer stacks and + * load balancer buckets, and map traffic to those stacks and buckets, + * as decribed by the "rules" API below. + * + * Various packet metadata is encoded in the idesc. The flow hash is + * four bytes at 0x0C. The VLAN is two bytes at 0x10. The ethtype is + * two bytes at 0x12. The l3 start is one byte at 0x14. The l4 start + * is one byte at 0x15 for IPv4 and IPv6 packets, and otherwise zero. + * The protocol is one byte at 0x16 for IPv4 and IPv6 packets, and + * otherwise zero. + * + * @section gxio_mpipe_classifier_custom Custom Classifiers. + * + * A custom classifier may be created using "tile-mpipe-cc" with a + * customized version of the default classifier sources. + * + * The custom classifier may be included in bootroms using the + * "--classifier" option to "tile-monitor", or loaded dynamically + * using gxio_mpipe_classifier_load_from_file(). + * + * Be aware that "extreme" customizations may break the assumptions of + * the "rules" APIs described below, but simple customizations, such + * as adding new packet metadata, should be fine. + */ + +/* A set of classifier rules, plus a context. */ +typedef struct { + + /* The context. */ + gxio_mpipe_context_t *context; + + /* The actual rules. */ + gxio_mpipe_rules_list_t list; + +} gxio_mpipe_rules_t; + +/* Initialize a classifier program rules list. + * + * This function can be called on a previously initialized rules list + * to discard any previously added rules. + * + * @param rules Rules list to initialize. + * @param context An initialized mPIPE context. + */ +extern void gxio_mpipe_rules_init(gxio_mpipe_rules_t *rules, + gxio_mpipe_context_t *context); + +/* Begin a new rule on the indicated rules list. + * + * Note that an empty rule matches all packets, but an empty rule list + * matches no packets. + * + * @param rules Rules list to which new rule is appended. + * @param bucket First load balancer bucket to which packets will be + * delivered. + * @param num_buckets Number of buckets (must be a power of two) across + * which packets will be distributed based on the "flow hash". + * @param stacks Either NULL, to assign each packet to the smallest + * initialized buffer stack which does not induce chaining (and to + * drop packets which exceed the largest initialized buffer stack + * buffer size), or an array, with each entry indicating which buffer + * stack should be used for packets up to that size (with 255 + * indicating that those packets should be dropped). + * @return 0 on success, or a negative error code on failure. + */ +extern int gxio_mpipe_rules_begin(gxio_mpipe_rules_t *rules, + unsigned int bucket, + unsigned int num_buckets, + gxio_mpipe_rules_stacks_t *stacks); + +/* Set the headroom of the current rule. + * + * @param rules Rules list whose current rule will be modified. + * @param headroom The headroom. + * @return 0 on success, or a negative error code on failure. + */ +extern int gxio_mpipe_rules_set_headroom(gxio_mpipe_rules_t *rules, + uint8_t headroom); + +/* Indicate that packets from a particular channel can be delivered + * to the buckets and buffer stacks associated with the current rule. + * + * Channels added must be associated with links opened by the mPIPE context + * used in gxio_mpipe_rules_init(). A rule with no channels is equivalent + * to a rule naming all such associated channels. + * + * @param rules Rules list whose current rule will be modified. + * @param channel The channel to add. + * @return 0 on success, or a negative error code on failure. + */ +extern int gxio_mpipe_rules_add_channel(gxio_mpipe_rules_t *rules, + unsigned int channel); + +/* Commit rules. + * + * The rules are sent to the hypervisor, where they are combined with + * the rules from other apps, and used to program the hardware classifier. + * + * Note that if this function returns an error, then the rules will NOT + * have been committed, even if the error is due to interactions with + * rules from another app. + * + * @param rules Rules list to commit. + * @return 0 on success, or a negative error code on failure. + */ +extern int gxio_mpipe_rules_commit(gxio_mpipe_rules_t *rules); + +/***************************************************************** + * Ingress Queue Wrapper * + ******************************************************************/ + +/* + * + * Convenience functions for receiving packets from a NotifRing and + * sending packets via an eDMA ring. + * + * The mpipe ingress and egress hardware uses shared memory packet + * descriptors to describe packets that have arrived on ingress or + * are destined for egress. These descriptors are stored in shared + * memory ring buffers and written or read by hardware as necessary. + * The gxio library provides wrapper functions that manage the head and + * tail pointers for these rings, allowing the user to easily read or + * write packet descriptors. + * + * The initialization interface for ingress and egress rings is quite + * similar. For example, to create an ingress queue, the user passes + * a ::gxio_mpipe_iqueue_t state object, a ring number from + * gxio_mpipe_alloc_notif_rings(), and the address of memory to hold a + * ring buffer to the gxio_mpipe_iqueue_init() function. The function + * returns success when the state object has been initialized and the + * hardware configured to deliver packets to the specified ring + * buffer. Similarly, gxio_mpipe_equeue_init() takes a + * ::gxio_mpipe_equeue_t state object, a ring number from + * gxio_mpipe_alloc_edma_rings(), and a shared memory buffer. + * + * @section gxio_mpipe_iqueue Working with Ingress Queues + * + * Once initialized, the gxio_mpipe_iqueue_t API provides two flows + * for getting the ::gxio_mpipe_idesc_t packet descriptor associated + * with incoming packets. The simplest is to call + * gxio_mpipe_iqueue_get() or gxio_mpipe_iqueue_try_get(). These + * functions copy the oldest packet descriptor out of the NotifRing and + * into a descriptor provided by the caller. They also immediately + * inform the hardware that a descriptor has been processed. + * + * For applications with stringent performance requirements, higher + * efficiency can be achieved by avoiding the packet descriptor copy + * and processing multiple descriptors at once. The + * gxio_mpipe_iqueue_peek() and gxio_mpipe_iqueue_try_peek() functions + * allow such optimizations. These functions provide a pointer to the + * next valid ingress descriptor in the NotifRing's shared memory ring + * buffer, and a count of how many contiguous descriptors are ready to + * be processed. The application can then process any number of those + * descriptors in place, calling gxio_mpipe_iqueue_consume() to inform + * the hardware after each one has been processed. + * + * @section gxio_mpipe_equeue Working with Egress Queues + * + * Similarly, the egress queue API provides a high-performance + * interface plus a simple wrapper for use in posting + * ::gxio_mpipe_edesc_t egress packet descriptors. The simple + * version, gxio_mpipe_equeue_put(), allows the programmer to wait for + * an eDMA ring slot to become available and write a single descriptor + * into the ring. + * + * Alternatively, you can reserve slots in the eDMA ring using + * gxio_mpipe_equeue_reserve() or gxio_mpipe_equeue_try_reserve(), and + * then fill in each slot using gxio_mpipe_equeue_put_at(). This + * capability can be used to amortize the cost of reserving slots + * across several packets. It also allows gather operations to be + * performed on a shared equeue, by ensuring that the edescs for all + * the fragments are all contiguous in the eDMA ring. + * + * The gxio_mpipe_equeue_reserve() and gxio_mpipe_equeue_try_reserve() + * functions return a 63-bit "completion slot", which is actually a + * sequence number, the low bits of which indicate the ring buffer + * index and the high bits the number of times the application has + * gone around the egress ring buffer. The extra bits allow an + * application to check for egress completion by calling + * gxio_mpipe_equeue_is_complete() to see whether a particular 'slot' + * number has finished. Given the maximum packet rates of the Gx + * processor, the 63-bit slot number will never wrap. + * + * In practice, most applications use the ::gxio_mpipe_edesc_t::hwb + * bit to indicate that the buffers containing egress packet data + * should be pushed onto a buffer stack when egress is complete. Such + * applications generally do not need to know when an egress operation + * completes (since there is no need to free a buffer post-egress), + * and thus can use the optimized gxio_mpipe_equeue_reserve_fast() or + * gxio_mpipe_equeue_try_reserve_fast() functions, which return a 24 + * bit "slot", instead of a 63-bit "completion slot". + * + * Once a slot has been "reserved", it MUST be filled. If the + * application reserves a slot and then decides that it does not + * actually need it, it can set the ::gxio_mpipe_edesc_t::ns (no send) + * bit on the descriptor passed to gxio_mpipe_equeue_put_at() to + * indicate that no data should be sent. This technique can also be + * used to drop an incoming packet, instead of forwarding it, since + * any buffer will still be pushed onto the buffer stack when the + * egress descriptor is processed. + */ + +/* A convenient interface to a NotifRing, for use by a single thread. + */ +typedef struct { + + /* The context. */ + gxio_mpipe_context_t *context; + + /* The actual NotifRing. */ + gxio_mpipe_idesc_t *idescs; + + /* The number of entries. */ + unsigned long num_entries; + + /* The number of entries minus one. */ + unsigned long mask_num_entries; + + /* The log2() of the number of entries. */ + unsigned long log2_num_entries; + + /* The next entry. */ + unsigned int head; + + /* The NotifRing id. */ + unsigned int ring; + +#ifdef __BIG_ENDIAN__ + /* The number of byteswapped entries. */ + unsigned int swapped; +#endif + +} gxio_mpipe_iqueue_t; + +/* Initialize an "iqueue". + * + * Takes the iqueue plus the same args as gxio_mpipe_init_notif_ring(). + */ +extern int gxio_mpipe_iqueue_init(gxio_mpipe_iqueue_t *iqueue, + gxio_mpipe_context_t *context, + unsigned int ring, + void *mem, size_t mem_size, + unsigned int mem_flags); + +/* Advance over some old entries in an iqueue. + * + * Please see the documentation for gxio_mpipe_iqueue_consume(). + * + * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init(). + * @param count The number of entries to advance over. + */ +static inline void gxio_mpipe_iqueue_advance(gxio_mpipe_iqueue_t *iqueue, + int count) +{ + /* Advance with proper wrap. */ + int head = iqueue->head + count; + iqueue->head = + (head & iqueue->mask_num_entries) + + (head >> iqueue->log2_num_entries); + +#ifdef __BIG_ENDIAN__ + /* HACK: Track swapped entries. */ + iqueue->swapped -= count; +#endif +} + +/* Release the ring and bucket for an old entry in an iqueue. + * + * Releasing the ring allows more packets to be delivered to the ring. + * + * Releasing the bucket allows flows using the bucket to be moved to a + * new ring when using GXIO_MPIPE_BUCKET_DYNAMIC_FLOW_AFFINITY. + * + * This function is shorthand for "gxio_mpipe_credit(iqueue->context, + * iqueue->ring, idesc->bucket_id, 1)", and it may be more convenient + * to make that underlying call, using those values, instead of + * tracking the entire "idesc". + * + * If packet processing is deferred, optimal performance requires that + * the releasing be deferred as well. + * + * Please see the documentation for gxio_mpipe_iqueue_consume(). + * + * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init(). + * @param idesc The descriptor which was processed. + */ +static inline void gxio_mpipe_iqueue_release(gxio_mpipe_iqueue_t *iqueue, + gxio_mpipe_idesc_t *idesc) +{ + gxio_mpipe_credit(iqueue->context, iqueue->ring, idesc->bucket_id, 1); +} + +/* Consume a packet from an "iqueue". + * + * After processing packets peeked at via gxio_mpipe_iqueue_peek() + * or gxio_mpipe_iqueue_try_peek(), you must call this function, or + * gxio_mpipe_iqueue_advance() plus gxio_mpipe_iqueue_release(), to + * advance over those entries, and release their rings and buckets. + * + * You may call this function as each packet is processed, or you can + * wait until several packets have been processed. + * + * Note that if you are using a single bucket, and you are handling + * batches of N packets, then you can replace several calls to this + * function with calls to "gxio_mpipe_iqueue_advance(iqueue, N)" and + * "gxio_mpipe_credit(iqueue->context, iqueue->ring, bucket, N)". + * + * Note that if your classifier sets "idesc->nr", then you should + * explicitly call "gxio_mpipe_iqueue_advance(iqueue, idesc)" plus + * "gxio_mpipe_credit(iqueue->context, iqueue->ring, -1, 1)", to + * avoid incorrectly crediting the (unused) bucket. + * + * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init(). + * @param idesc The descriptor which was processed. + */ +static inline void gxio_mpipe_iqueue_consume(gxio_mpipe_iqueue_t *iqueue, + gxio_mpipe_idesc_t *idesc) +{ + gxio_mpipe_iqueue_advance(iqueue, 1); + gxio_mpipe_iqueue_release(iqueue, idesc); +} + +/* Peek at the next packet(s) in an "iqueue", without waiting. + * + * If no packets are available, fills idesc_ref with NULL, and then + * returns ::GXIO_MPIPE_ERR_IQUEUE_EMPTY. Otherwise, fills idesc_ref + * with the address of the next valid packet descriptor, and returns + * the maximum number of valid descriptors which can be processed. + * You may process fewer descriptors if desired. + * + * Call gxio_mpipe_iqueue_consume() on each packet once it has been + * processed (or dropped), to allow more packets to be delivered. + * + * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init(). + * @param idesc_ref A pointer to a packet descriptor pointer. + * @return The (positive) number of packets which can be processed, + * or ::GXIO_MPIPE_ERR_IQUEUE_EMPTY if no packets are available. + */ +static inline int gxio_mpipe_iqueue_try_peek(gxio_mpipe_iqueue_t *iqueue, + gxio_mpipe_idesc_t **idesc_ref) +{ + gxio_mpipe_idesc_t *next; + + uint64_t head = iqueue->head; + uint64_t tail = __gxio_mmio_read(iqueue->idescs); + + /* Available entries. */ + uint64_t avail = + (tail >= head) ? (tail - head) : (iqueue->num_entries - head); + + if (avail == 0) { + *idesc_ref = NULL; + return GXIO_MPIPE_ERR_IQUEUE_EMPTY; + } + + next = &iqueue->idescs[head]; + + /* ISSUE: Is this helpful? */ + __insn_prefetch(next); + +#ifdef __BIG_ENDIAN__ + /* HACK: Swap new entries directly in memory. */ + { + int i, j; + for (i = iqueue->swapped; i < avail; i++) { + for (j = 0; j < 8; j++) + next[i].words[j] = + __builtin_bswap64(next[i].words[j]); + } + iqueue->swapped = avail; + } +#endif + + *idesc_ref = next; + + return avail; +} + +/* Drop a packet by pushing its buffer (if appropriate). + * + * NOTE: The caller must still call gxio_mpipe_iqueue_consume() if idesc + * came from gxio_mpipe_iqueue_try_peek() or gxio_mpipe_iqueue_peek(). + * + * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init(). + * @param idesc A packet descriptor. + */ +static inline void gxio_mpipe_iqueue_drop(gxio_mpipe_iqueue_t *iqueue, + gxio_mpipe_idesc_t *idesc) +{ + /* FIXME: Handle "chaining" properly. */ + + if (!idesc->be) { + unsigned char *va = gxio_mpipe_idesc_get_va(idesc); + gxio_mpipe_push_buffer(iqueue->context, idesc->stack_idx, va); + } +} + +/***************************************************************** + * Egress Queue Wrapper * + ******************************************************************/ + +/* A convenient, thread-safe interface to an eDMA ring. */ +typedef struct { + + /* State object for tracking head and tail pointers. */ + __gxio_dma_queue_t dma_queue; + + /* The ring entries. */ + gxio_mpipe_edesc_t *edescs; + + /* The number of entries minus one. */ + unsigned long mask_num_entries; + + /* The log2() of the number of entries. */ + unsigned long log2_num_entries; + + /* The context. */ + gxio_mpipe_context_t *context; + + /* The ering. */ + unsigned int ering; + + /* The channel. */ + unsigned int channel; + +} gxio_mpipe_equeue_t; + +/* Initialize an "equeue". + * + * This function uses gxio_mpipe_init_edma_ring() to initialize the + * underlying edma_ring using the provided arguments. + * + * @param equeue An egress queue to be initialized. + * @param context An initialized mPIPE context. + * @param ering The eDMA ring index. + * @param channel The channel to use. This must be one of the channels + * associated with the context's set of open links. + * @param mem A physically contiguous region of memory to be filled + * with a ring of ::gxio_mpipe_edesc_t structures. + * @param mem_size Number of bytes in the ring. Must be 512, 2048, + * 8192 or 65536, times 16 (i.e. sizeof(gxio_mpipe_edesc_t)). + * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags. + * + * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_EDMA_RING or + * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure. + */ +extern int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue, + gxio_mpipe_context_t *context, + unsigned int ering, + unsigned int channel, + void *mem, unsigned int mem_size, + unsigned int mem_flags); + +/* Reserve completion slots for edescs. + * + * Use gxio_mpipe_equeue_put_at() to actually populate the slots. + * + * This function is slower than gxio_mpipe_equeue_reserve_fast(), but + * returns a full 64 bit completion slot, which can be used with + * gxio_mpipe_equeue_is_complete(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param num Number of slots to reserve (must be non-zero). + * @return The first reserved completion slot, or a negative error code. + */ +static inline int64_t gxio_mpipe_equeue_reserve(gxio_mpipe_equeue_t *equeue, + unsigned int num) +{ + return __gxio_dma_queue_reserve_aux(&equeue->dma_queue, num, true); +} + +/* Reserve completion slots for edescs, if possible. + * + * Use gxio_mpipe_equeue_put_at() to actually populate the slots. + * + * This function is slower than gxio_mpipe_equeue_try_reserve_fast(), + * but returns a full 64 bit completion slot, which can be used with + * gxio_mpipe_equeue_is_complete(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param num Number of slots to reserve (must be non-zero). + * @return The first reserved completion slot, or a negative error code. + */ +static inline int64_t gxio_mpipe_equeue_try_reserve(gxio_mpipe_equeue_t + *equeue, unsigned int num) +{ + return __gxio_dma_queue_reserve_aux(&equeue->dma_queue, num, false); +} + +/* Reserve slots for edescs. + * + * Use gxio_mpipe_equeue_put_at() to actually populate the slots. + * + * This function is faster than gxio_mpipe_equeue_reserve(), but + * returns a 24 bit slot (instead of a 64 bit completion slot), which + * thus cannot be used with gxio_mpipe_equeue_is_complete(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param num Number of slots to reserve (should be non-zero). + * @return The first reserved slot, or a negative error code. + */ +static inline int64_t gxio_mpipe_equeue_reserve_fast(gxio_mpipe_equeue_t + *equeue, unsigned int num) +{ + return __gxio_dma_queue_reserve(&equeue->dma_queue, num, true, false); +} + +/* Reserve slots for edescs, if possible. + * + * Use gxio_mpipe_equeue_put_at() to actually populate the slots. + * + * This function is faster than gxio_mpipe_equeue_try_reserve(), but + * returns a 24 bit slot (instead of a 64 bit completion slot), which + * thus cannot be used with gxio_mpipe_equeue_is_complete(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param num Number of slots to reserve (should be non-zero). + * @return The first reserved slot, or a negative error code. + */ +static inline int64_t gxio_mpipe_equeue_try_reserve_fast(gxio_mpipe_equeue_t + *equeue, + unsigned int num) +{ + return __gxio_dma_queue_reserve(&equeue->dma_queue, num, false, false); +} + +/* + * HACK: This helper function tricks gcc 4.6 into avoiding saving + * a copy of "edesc->words[0]" on the stack for no obvious reason. + */ + +static inline void gxio_mpipe_equeue_put_at_aux(gxio_mpipe_equeue_t *equeue, + uint_reg_t ew[2], + unsigned long slot) +{ + unsigned long edma_slot = slot & equeue->mask_num_entries; + gxio_mpipe_edesc_t *edesc_p = &equeue->edescs[edma_slot]; + + /* + * ISSUE: Could set eDMA ring to be on generation 1 at start, which + * would avoid the negation here, perhaps allowing "__insn_bfins()". + */ + ew[0] |= !((slot >> equeue->log2_num_entries) & 1); + + /* + * NOTE: We use "__gxio_mpipe_write()", plus the fact that the eDMA + * queue alignment restrictions ensure that these two words are on + * the same cacheline, to force proper ordering between the stores. + */ + __gxio_mmio_write64(&edesc_p->words[1], ew[1]); + __gxio_mmio_write64(&edesc_p->words[0], ew[0]); +} + +/* Post an edesc to a given slot in an equeue. + * + * This function copies the supplied edesc into entry "slot mod N" in + * the underlying ring, setting the "gen" bit to the appropriate value + * based on "(slot mod N*2)", where "N" is the size of the ring. Note + * that the higher bits of slot are unused, and thus, this function + * can handle "slots" as well as "completion slots". + * + * Normally this function is used to fill in slots reserved by + * gxio_mpipe_equeue_try_reserve(), gxio_mpipe_equeue_reserve(), + * gxio_mpipe_equeue_try_reserve_fast(), or + * gxio_mpipe_equeue_reserve_fast(), + * + * This function can also be used without "reserving" slots, if the + * application KNOWS that the ring can never overflow, for example, by + * pushing fewer buffers into the buffer stacks than there are total + * slots in the equeue, but this is NOT recommended. + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param edesc The egress descriptor to be posted. + * @param slot An egress slot (only the low bits are actually used). + */ +static inline void gxio_mpipe_equeue_put_at(gxio_mpipe_equeue_t *equeue, + gxio_mpipe_edesc_t edesc, + unsigned long slot) +{ + gxio_mpipe_equeue_put_at_aux(equeue, edesc.words, slot); +} + +/* Post an edesc to the next slot in an equeue. + * + * This is a convenience wrapper around + * gxio_mpipe_equeue_reserve_fast() and gxio_mpipe_equeue_put_at(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param edesc The egress descriptor to be posted. + * @return 0 on success. + */ +static inline int gxio_mpipe_equeue_put(gxio_mpipe_equeue_t *equeue, + gxio_mpipe_edesc_t edesc) +{ + int64_t slot = gxio_mpipe_equeue_reserve_fast(equeue, 1); + if (slot < 0) + return (int)slot; + + gxio_mpipe_equeue_put_at(equeue, edesc, slot); + + return 0; +} + +/* Ask the mPIPE hardware to egress outstanding packets immediately. + * + * This call is not necessary, but may slightly reduce overall latency. + * + * Technically, you should flush all gxio_mpipe_equeue_put_at() writes + * to memory before calling this function, to ensure the descriptors + * are visible in memory before the mPIPE hardware actually looks for + * them. But this should be very rare, and the only side effect would + * be increased latency, so it is up to the caller to decide whether + * or not to flush memory. + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + */ +static inline void gxio_mpipe_equeue_flush(gxio_mpipe_equeue_t *equeue) +{ + /* Use "ring_idx = 0" and "count = 0" to "wake up" the eDMA ring. */ + MPIPE_EDMA_POST_REGION_VAL_t val = { {0} }; + /* Flush the write buffers. */ + __insn_flushwb(); + __gxio_mmio_write(equeue->dma_queue.post_region_addr, val.word); +} + +/* Determine if a given edesc has been completed. + * + * Note that this function requires a "completion slot", and thus may + * NOT be used with a "slot" from gxio_mpipe_equeue_reserve_fast() or + * gxio_mpipe_equeue_try_reserve_fast(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param completion_slot The completion slot used by the edesc. + * @param update If true, and the desc does not appear to have completed + * yet, then update any software cache of the hardware completion counter, + * and check again. This should normally be true. + * @return True iff the given edesc has been completed. + */ +static inline int gxio_mpipe_equeue_is_complete(gxio_mpipe_equeue_t *equeue, + int64_t completion_slot, + int update) +{ + return __gxio_dma_queue_is_complete(&equeue->dma_queue, + completion_slot, update); +} + +/* Set the snf (store and forward) size for an equeue. + * + * The snf size for an equeue defaults to 1536, and encodes the size + * of the largest packet for which egress is guaranteed to avoid + * transmission underruns and/or corrupt checksums under heavy load. + * + * The snf size affects a global resource pool which cannot support, + * for example, all 24 equeues each requesting an snf size of 8K. + * + * To ensure that jumbo packets can be egressed properly, the snf size + * should be set to the size of the largest possible packet, which + * will usually be limited by the size of the app's largest buffer. + * + * This is a convenience wrapper around + * gxio_mpipe_config_edma_ring_blks(). + * + * This function should not be called after any egress has been done + * on the equeue. + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param size The snf size, in bytes. + * @return Zero on success, negative error otherwise. + */ +static inline int gxio_mpipe_equeue_set_snf_size(gxio_mpipe_equeue_t *equeue, + size_t size) +{ + int blks = (size + 127) / 128; + return gxio_mpipe_config_edma_ring_blks(equeue->context, equeue->ering, + blks + 1, blks, 1); +} + +/***************************************************************** + * Link Management * + ******************************************************************/ + +/* + * + * Functions for manipulating and sensing the state and configuration + * of physical network links. + * + * @section gxio_mpipe_link_perm Link Permissions + * + * Opening a link (with gxio_mpipe_link_open()) requests a set of link + * permissions, which control what may be done with the link, and potentially + * what permissions may be granted to other processes. + * + * Data permission allows the process to receive packets from the link by + * specifying the link's channel number in mPIPE packet distribution rules, + * and to send packets to the link by using the link's channel number as + * the target for an eDMA ring. + * + * Stats permission allows the process to retrieve link attributes (such as + * the speeds it is capable of running at, or whether it is currently up), and + * to read and write certain statistics-related registers in the link's MAC. + * + * Control permission allows the process to retrieve and modify link attributes + * (so that it may, for example, bring the link up and take it down), and + * read and write many registers in the link's MAC and PHY. + * + * Any permission may be requested as shared, which allows other processes + * to also request shared permission, or exclusive, which prevents other + * processes from requesting it. In keeping with GXIO's typical usage in + * an embedded environment, the defaults for all permissions are shared. + * + * Permissions are granted on a first-come, first-served basis, so if two + * applications request an exclusive permission on the same link, the one + * to run first will win. Note, however, that some system components, like + * the kernel Ethernet driver, may get an opportunity to open links before + * any applications run. + * + * @section gxio_mpipe_link_names Link Names + * + * Link names are of the form gbe<em>number</em> (for Gigabit Ethernet), + * xgbe<em>number</em> (for 10 Gigabit Ethernet), loop<em>number</em> (for + * internal mPIPE loopback), or ilk<em>number</em>/<em>channel</em> + * (for Interlaken links); for instance, gbe0, xgbe1, loop3, and + * ilk0/12 are all possible link names. The correspondence between + * the link name and an mPIPE instance number or mPIPE channel number is + * system-dependent; all links will not exist on all systems, and the set + * of numbers used for a particular link type may not start at zero and may + * not be contiguous. Use gxio_mpipe_link_enumerate() to retrieve the set of + * links which exist on a system, and always use gxio_mpipe_link_instance() + * to determine which mPIPE controls a particular link. + * + * Note that in some cases, links may share hardware, such as PHYs, or + * internal mPIPE buffers; in these cases, only one of the links may be + * opened at a time. This is especially common with xgbe and gbe ports, + * since each xgbe port uses 4 SERDES lanes, each of which may also be + * configured as one gbe port. + * + * @section gxio_mpipe_link_states Link States + * + * The mPIPE link management model revolves around three different states, + * which are maintained for each link: + * + * 1. The <em>current</em> link state: is the link up now, and if so, at + * what speed? + * + * 2. The <em>desired</em> link state: what do we want the link state to be? + * The system is always working to make this state the current state; + * thus, if the desired state is up, and the link is down, we'll be + * constantly trying to bring it up, automatically. + * + * 3. The <em>possible</em> link state: what speeds are valid for this + * particular link? Or, in other words, what are the capabilities of + * the link hardware? + * + * These link states are not, strictly speaking, related to application + * state; they may be manipulated at any time, whether or not the link + * is currently being used for data transfer. However, for convenience, + * gxio_mpipe_link_open() and gxio_mpipe_link_close() (or application exit) + * can affect the link state. These implicit link management operations + * may be modified or disabled by the use of link open flags. + * + * From an application, you can use gxio_mpipe_link_get_attr() + * and gxio_mpipe_link_set_attr() to manipulate the link states. + * gxio_mpipe_link_get_attr() with ::GXIO_MPIPE_LINK_POSSIBLE_STATE + * gets you the possible link state. gxio_mpipe_link_get_attr() with + * ::GXIO_MPIPE_LINK_CURRENT_STATE gets you the current link state. + * Finally, gxio_mpipe_link_set_attr() and gxio_mpipe_link_get_attr() + * with ::GXIO_MPIPE_LINK_DESIRED_STATE allow you to modify or retrieve + * the desired link state. + * + * If you want to manage a link from a part of your application which isn't + * involved in packet processing, you can use the ::GXIO_MPIPE_LINK_NO_DATA + * flags on a gxio_mpipe_link_open() call. This opens the link, but does + * not request data permission, so it does not conflict with any exclusive + * permissions which may be held by other processes. You can then can use + * gxio_mpipe_link_get_attr() and gxio_mpipe_link_set_attr() on this link + * object to bring up or take down the link. + * + * Some links support link state bits which support various loopback + * modes. ::GXIO_MPIPE_LINK_LOOP_MAC tests datapaths within the Tile + * Processor itself; ::GXIO_MPIPE_LINK_LOOP_PHY tests the datapath between + * the Tile Processor and the external physical layer interface chip; and + * ::GXIO_MPIPE_LINK_LOOP_EXT tests the entire network datapath with the + * aid of an external loopback connector. In addition to enabling hardware + * testing, such configuration can be useful for software testing, as well. + * + * When LOOP_MAC or LOOP_PHY is enabled, packets transmitted on a channel + * will be received by that channel, instead of being emitted on the + * physical link, and packets received on the physical link will be ignored. + * Other than that, all standard GXIO operations work as you might expect. + * Note that loopback operation requires that the link be brought up using + * one or more of the GXIO_MPIPE_LINK_SPEED_xxx link state bits. + * + * Those familiar with previous versions of the MDE on TILEPro hardware + * will notice significant similarities between the NetIO link management + * model and the mPIPE link management model. However, the NetIO model + * was developed in stages, and some of its features -- for instance, + * the default setting of certain flags -- were shaped by the need to be + * compatible with previous versions of NetIO. Since the features provided + * by the mPIPE hardware and the mPIPE GXIO library are significantly + * different than those provided by NetIO, in some cases, we have made + * different choices in the mPIPE link management API. Thus, please read + * this documentation carefully before assuming that mPIPE link management + * operations are exactly equivalent to their NetIO counterparts. + */ + +/* An object used to manage mPIPE link state and resources. */ +typedef struct { + /* The overall mPIPE context. */ + gxio_mpipe_context_t *context; + + /* The channel number used by this link. */ + uint8_t channel; + + /* The MAC index used by this link. */ + uint8_t mac; +} gxio_mpipe_link_t; + +/* Translate a link name to the instance number of the mPIPE shim which is + * connected to that link. This call does not verify whether the link is + * currently available, and does not reserve any link resources; + * gxio_mpipe_link_open() must be called to perform those functions. + * + * Typically applications will call this function to translate a link name + * to an mPIPE instance number; call gxio_mpipe_init(), passing it that + * instance number, to initialize the mPIPE shim; and then call + * gxio_mpipe_link_open(), passing it the same link name plus the mPIPE + * context, to configure the link. + * + * @param link_name Name of the link; see @ref gxio_mpipe_link_names. + * @return The mPIPE instance number which is associated with the named + * link, or a negative error code (::GXIO_ERR_NO_DEVICE) if the link does + * not exist. + */ +extern int gxio_mpipe_link_instance(const char *link_name); + +/* Retrieve one of this system's legal link names, and its MAC address. + * + * @param index Link name index. If a system supports N legal link names, + * then indices between 0 and N - 1, inclusive, each correspond to one of + * those names. Thus, to retrieve all of a system's legal link names, + * call this function in a loop, starting with an index of zero, and + * incrementing it once per iteration until -1 is returned. + * @param link_name Pointer to the buffer which will receive the retrieved + * link name. The buffer should contain space for at least + * ::GXIO_MPIPE_LINK_NAME_LEN bytes; the returned name, including the + * terminating null byte, will be no longer than that. + * @param link_name Pointer to the buffer which will receive the retrieved + * MAC address. The buffer should contain space for at least 6 bytes. + * @return Zero if a link name was successfully retrieved; -1 if one was + * not. + */ +extern int gxio_mpipe_link_enumerate_mac(int index, char *link_name, + uint8_t *mac_addr); + +/* Open an mPIPE link. + * + * A link must be opened before it may be used to send or receive packets, + * and before its state may be examined or changed. Depending up on the + * link's intended use, one or more link permissions may be requested via + * the flags parameter; see @ref gxio_mpipe_link_perm. In addition, flags + * may request that the link's state be modified at open time. See @ref + * gxio_mpipe_link_states and @ref gxio_mpipe_link_open_flags for more detail. + * + * @param link A link state object, which will be initialized if this + * function completes successfully. + * @param context An initialized mPIPE context. + * @param link_name Name of the link. + * @param flags Zero or more @ref gxio_mpipe_link_open_flags, ORed together. + * @return 0 if the link was successfully opened, or a negative error code. + * + */ +extern int gxio_mpipe_link_open(gxio_mpipe_link_t *link, + gxio_mpipe_context_t *context, + const char *link_name, unsigned int flags); + +/* Close an mPIPE link. + * + * Closing a link makes it available for use by other processes. Once + * a link has been closed, packets may no longer be sent on or received + * from the link, and its state may not be examined or changed. + * + * @param link A link state object, which will no longer be initialized + * if this function completes successfully. + * @return 0 if the link was successfully closed, or a negative error code. + * + */ +extern int gxio_mpipe_link_close(gxio_mpipe_link_t *link); + +/* Return a link's channel number. + * + * @param link A properly initialized link state object. + * @return The channel number for the link. + */ +static inline int gxio_mpipe_link_channel(gxio_mpipe_link_t *link) +{ + return link->channel; +} + +/* Set a link attribute. + * + * @param link A properly initialized link state object. + * @param attr An attribute from the set of @ref gxio_mpipe_link_attrs. + * @param val New value of the attribute. + * @return 0 if the attribute was successfully set, or a negative error + * code. + */ +extern int gxio_mpipe_link_set_attr(gxio_mpipe_link_t *link, uint32_t attr, + int64_t val); + +/////////////////////////////////////////////////////////////////// +// Timestamp // +/////////////////////////////////////////////////////////////////// + +/* Get the timestamp of mPIPE when this routine is called. + * + * @param context An initialized mPIPE context. + * @param ts A timespec structure to store the current clock. + * @return If the call was successful, zero; otherwise, a negative error + * code. + */ +extern int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context, + struct timespec *ts); + +/* Set the timestamp of mPIPE. + * + * @param context An initialized mPIPE context. + * @param ts A timespec structure to store the requested clock. + * @return If the call was successful, zero; otherwise, a negative error + * code. + */ +extern int gxio_mpipe_set_timestamp(gxio_mpipe_context_t *context, + const struct timespec *ts); + +/* Adjust the timestamp of mPIPE. + * + * @param context An initialized mPIPE context. + * @param delta A signed time offset to adjust, in nanoseconds. + * The absolute value of this parameter must be less than or + * equal to 1000000000. + * @return If the call was successful, zero; otherwise, a negative error + * code. + */ +extern int gxio_mpipe_adjust_timestamp(gxio_mpipe_context_t *context, + int64_t delta); + +/** Adjust the mPIPE timestamp clock frequency. + * + * @param context An initialized mPIPE context. + * @param ppb A 32-bit signed PPB (Parts Per Billion) value to adjust. + * The absolute value of ppb must be less than or equal to 1000000000. + * Values less than about 30000 will generally cause a GXIO_ERR_INVAL + * return due to the granularity of the hardware that converts reference + * clock cycles into seconds and nanoseconds. + * @return If the call was successful, zero; otherwise, a negative error + * code. + */ +extern int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t* context, + int32_t ppb); + +#endif /* !_GXIO_MPIPE_H_ */ diff --git a/arch/tile/include/gxio/trio.h b/arch/tile/include/gxio/trio.h new file mode 100644 index 00000000000..df10a662cc2 --- /dev/null +++ b/arch/tile/include/gxio/trio.h @@ -0,0 +1,298 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * + * An API for allocating, configuring, and manipulating TRIO hardware + * resources + */ + +/* + * + * The TILE-Gx TRIO shim provides connections to external devices via + * PCIe or other transaction IO standards. The gxio_trio_ API, + * declared in <gxio/trio.h>, allows applications to allocate and + * configure TRIO IO resources like DMA command rings, memory map + * windows, and device interrupts. The following sections introduce + * the various components of the API. We strongly recommend reading + * the TRIO section of the IO Device Guide (UG404) before working with + * this API. + * + * @section trio__ingress TRIO Ingress Hardware Resources + * + * The TRIO ingress hardware is responsible for examining incoming + * PCIe or StreamIO packets and choosing a processing mechanism based + * on the packets' bus address. The gxio_trio_ API can be used to + * configure different handlers for different ranges of bus address + * space. The user can configure "mapped memory" and "scatter queue" + * regions to match incoming packets within 4kB-aligned ranges of bus + * addresses. Each range specifies a different set of mapping + * parameters to be applied when handling the ingress packet. The + * following sections describe how to work with MapMem and scatter + * queue regions. + * + * @subsection trio__mapmem TRIO MapMem Regions + * + * TRIO mapped memory (or MapMem) regions allow the user to map + * incoming read and write requests directly to the application's + * memory space. MapMem regions are allocated via + * gxio_trio_alloc_memory_maps(). Given an integer MapMem number, + * applications can use gxio_trio_init_memory_map() to specify the + * range of bus addresses that will match the region and the range of + * virtual addresses to which those packets will be applied. + * + * As with many other gxio APIs, the programmer must be sure to + * register memory pages that will be used with MapMem regions. Pages + * can be registered with TRIO by allocating an ASID (address space + * identifier) and then using gxio_trio_register_page() to register up to + * 16 pages with the hardware. The initialization functions for + * resources that require registered memory (MapMem, scatter queues, + * push DMA, and pull DMA) then take an 'asid' parameter in order to + * configure which set of registered pages is used by each resource. + * + * @subsection trio__scatter_queue TRIO Scatter Queues + * + * The TRIO shim's scatter queue regions allow users to dynamically + * map buffers from a large address space into a small range of bus + * addresses. This is particularly helpful for PCIe endpoint devices, + * where the host generally limits the size of BARs to tens of + * megabytes. + * + * Each scatter queue consists of a memory map region, a queue of + * tile-side buffer VAs to be mapped to that region, and a bus-mapped + * "doorbell" register that the remote endpoint can write to trigger a + * dequeue of the current buffer VA, thus swapping in a new buffer. + * The VAs pushed onto a scatter queue must be 4kB aligned, so + * applications may need to use higher-level protocols to inform + * remote entities that they should apply some additional, sub-4kB + * offset when reading or writing the scatter queue region. For more + * information, see the IO Device Guide (UG404). + * + * @section trio__egress TRIO Egress Hardware Resources + * + * The TRIO shim supports two mechanisms for egress packet generation: + * programmed IO (PIO) and push/pull DMA. PIO allows applications to + * create MMIO mappings for PCIe or StreamIO address space, such that + * the application can generate word-sized read or write transactions + * by issuing load or store instructions. Push and pull DMA are tuned + * for larger transactions; they use specialized hardware engines to + * transfer large blocks of data at line rate. + * + * @subsection trio__pio TRIO Programmed IO + * + * Programmed IO allows applications to create MMIO mappings for PCIe + * or StreamIO address space. The hardware PIO regions support access + * to PCIe configuration, IO, and memory space, but the gxio_trio API + * only supports memory space accesses. PIO regions are allocated + * with gxio_trio_alloc_pio_regions() and initialized via + * gxio_trio_init_pio_region(). Once a region is bound to a range of + * bus address via the initialization function, the application can + * use gxio_trio_map_pio_region() to create MMIO mappings from its VA + * space onto the range of bus addresses supported by the PIO region. + * + * @subsection trio_dma TRIO Push and Pull DMA + * + * The TRIO push and pull DMA engines allow users to copy blocks of + * data between application memory and the bus. Push DMA generates + * write packets that copy from application memory to the bus and pull + * DMA generates read packets that copy from the bus into application + * memory. The DMA engines are managed via an API that is very + * similar to the mPIPE eDMA interface. For a detailed explanation of + * the eDMA queue API, see @ref gxio_mpipe_wrappers. + * + * Push and pull DMA queues are allocated via + * gxio_trio_alloc_push_dma_ring() / gxio_trio_alloc_pull_dma_ring(). + * Once allocated, users generally use a ::gxio_trio_dma_queue_t + * object to manage the queue, providing easy wrappers for reserving + * command slots in the DMA command ring, filling those slots, and + * waiting for commands to complete. DMA queues can be initialized + * via gxio_trio_init_push_dma_queue() or + * gxio_trio_init_pull_dma_queue(). + * + * See @ref trio/push_dma/app.c for an example of how to use push DMA. + * + * @section trio_shortcomings Plans for Future API Revisions + * + * The simulation framework is incomplete. Future features include: + * + * - Support for reset and deallocation of resources. + * + * - Support for pull DMA. + * + * - Support for interrupt regions and user-space interrupt delivery. + * + * - Support for getting BAR mappings and reserving regions of BAR + * address space. + */ +#ifndef _GXIO_TRIO_H_ +#define _GXIO_TRIO_H_ + +#include <linux/types.h> + +#include <gxio/common.h> +#include <gxio/dma_queue.h> + +#include <arch/trio_constants.h> +#include <arch/trio.h> +#include <arch/trio_pcie_intfc.h> +#include <arch/trio_pcie_rc.h> +#include <arch/trio_shm.h> +#include <hv/drv_trio_intf.h> +#include <hv/iorpc.h> + +/* A context object used to manage TRIO hardware resources. */ +typedef struct { + + /* File descriptor for calling up to Linux (and thus the HV). */ + int fd; + + /* The VA at which the MAC MMIO registers are mapped. */ + char *mmio_base_mac; + + /* The VA at which the PIO config space are mapped for each PCIe MAC. + Gx36 has max 3 PCIe MACs per TRIO shim. */ + char *mmio_base_pio_cfg[TILEGX_TRIO_PCIES]; + +#ifdef USE_SHARED_PCIE_CONFIG_REGION + /* Index of the shared PIO region for PCI config access. */ + int pio_cfg_index; +#else + /* Index of the PIO region for PCI config access per MAC. */ + int pio_cfg_index[TILEGX_TRIO_PCIES]; +#endif + + /* The VA at which the push DMA MMIO registers are mapped. */ + char *mmio_push_dma[TRIO_NUM_PUSH_DMA_RINGS]; + + /* The VA at which the pull DMA MMIO registers are mapped. */ + char *mmio_pull_dma[TRIO_NUM_PUSH_DMA_RINGS]; + + /* Application space ID. */ + unsigned int asid; + +} gxio_trio_context_t; + +/* Command descriptor for push or pull DMA. */ +typedef TRIO_DMA_DESC_t gxio_trio_dma_desc_t; + +/* A convenient, thread-safe interface to an eDMA ring. */ +typedef struct { + + /* State object for tracking head and tail pointers. */ + __gxio_dma_queue_t dma_queue; + + /* The ring entries. */ + gxio_trio_dma_desc_t *dma_descs; + + /* The number of entries minus one. */ + unsigned long mask_num_entries; + + /* The log2() of the number of entries. */ + unsigned int log2_num_entries; + +} gxio_trio_dma_queue_t; + +/* Initialize a TRIO context. + * + * This function allocates a TRIO "service domain" and maps the MMIO + * registers into the the caller's VA space. + * + * @param trio_index Which TRIO shim; Gx36 must pass 0. + * @param context Context object to be initialized. + */ +extern int gxio_trio_init(gxio_trio_context_t *context, + unsigned int trio_index); + +/* This indicates that an ASID hasn't been allocated. */ +#define GXIO_ASID_NULL -1 + +/* Ordering modes for map memory regions and scatter queue regions. */ +typedef enum gxio_trio_order_mode_e { + /* Writes are not ordered. Reads always wait for previous writes. */ + GXIO_TRIO_ORDER_MODE_UNORDERED = + TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_UNORDERED, + /* Both writes and reads wait for previous transactions to complete. */ + GXIO_TRIO_ORDER_MODE_STRICT = + TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_STRICT, + /* Writes are ordered unless the incoming packet has the + relaxed-ordering attributes set. */ + GXIO_TRIO_ORDER_MODE_OBEY_PACKET = + TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_REL_ORD +} gxio_trio_order_mode_t; + +/* Initialize a memory mapping region. + * + * @param context An initialized TRIO context. + * @param map A Memory map region allocated by gxio_trio_alloc_memory_map(). + * @param target_mem VA of backing memory, should be registered via + * gxio_trio_register_page() and aligned to 4kB. + * @param target_size Length of the memory mapping, must be a multiple + * of 4kB. + * @param asid ASID to be used for Tile-side address translation. + * @param mac MAC number. + * @param bus_address Bus address at which the mapping starts. + * @param order_mode Memory ordering mode for this mapping. + * @return Zero on success, else ::GXIO_TRIO_ERR_BAD_MEMORY_MAP, + * GXIO_TRIO_ERR_BAD_ASID, or ::GXIO_TRIO_ERR_BAD_BUS_RANGE. + */ +extern int gxio_trio_init_memory_map(gxio_trio_context_t *context, + unsigned int map, void *target_mem, + size_t target_size, unsigned int asid, + unsigned int mac, uint64_t bus_address, + gxio_trio_order_mode_t order_mode); + +/* Flags that can be passed to resource allocation functions. */ +enum gxio_trio_alloc_flags_e { + GXIO_TRIO_ALLOC_FIXED = HV_TRIO_ALLOC_FIXED, +}; + +/* Flags that can be passed to memory registration functions. */ +enum gxio_trio_mem_flags_e { + /* Do not fill L3 when writing, and invalidate lines upon egress. */ + GXIO_TRIO_MEM_FLAG_NT_HINT = IORPC_MEM_BUFFER_FLAG_NT_HINT, + + /* L3 cache fills should only populate IO cache ways. */ + GXIO_TRIO_MEM_FLAG_IO_PIN = IORPC_MEM_BUFFER_FLAG_IO_PIN, +}; + +/* Flag indicating a request generator uses a special traffic + class. */ +#define GXIO_TRIO_FLAG_TRAFFIC_CLASS(N) HV_TRIO_FLAG_TC(N) + +/* Flag indicating a request generator uses a virtual function + number. */ +#define GXIO_TRIO_FLAG_VFUNC(N) HV_TRIO_FLAG_VFUNC(N) + +/***************************************************************** + * Memory Registration * + ******************************************************************/ + +/* Allocate Application Space Identifiers (ASIDs). Each ASID can + * register up to 16 page translations. ASIDs are used by memory map + * regions, scatter queues, and DMA queues to translate application + * VAs into memory system PAs. + * + * @param context An initialized TRIO context. + * @param count Number of ASIDs required. + * @param first Index of first ASID if ::GXIO_TRIO_ALLOC_FIXED flag + * is set, otherwise ignored. + * @param flags Flag bits, including bits from ::gxio_trio_alloc_flags_e. + * @return Index of first ASID, or ::GXIO_TRIO_ERR_NO_ASID if allocation + * failed. + */ +extern int gxio_trio_alloc_asids(gxio_trio_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +#endif /* ! _GXIO_TRIO_H_ */ diff --git a/arch/tile/include/gxio/uart.h b/arch/tile/include/gxio/uart.h new file mode 100644 index 00000000000..438ee7e46c7 --- /dev/null +++ b/arch/tile/include/gxio/uart.h @@ -0,0 +1,105 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _GXIO_UART_H_ +#define _GXIO_UART_H_ + +#include "common.h" + +#include <hv/drv_uart_intf.h> +#include <hv/iorpc.h> + +/* + * + * An API for manipulating UART interface. + */ + +/* + * + * The Rshim allows access to the processor's UART interface. + */ + +/* A context object used to manage UART resources. */ +typedef struct { + + /* File descriptor for calling up to the hypervisor. */ + int fd; + + /* The VA at which our MMIO registers are mapped. */ + char *mmio_base; + +} gxio_uart_context_t; + +/* Request UART interrupts. + * + * Request that interrupts be delivered to a tile when the UART's + * Receive FIFO is written, or the Write FIFO is read. + * + * @param context Pointer to a properly initialized gxio_uart_context_t. + * @param bind_cpu_x X coordinate of CPU to which interrupt will be delivered. + * @param bind_cpu_y Y coordinate of CPU to which interrupt will be delivered. + * @param bind_interrupt IPI interrupt number. + * @param bind_event Sub-interrupt event bit number; a negative value can + * disable the interrupt. + * @return Zero if all of the requested UART events were successfully + * configured to interrupt. + */ +extern int gxio_uart_cfg_interrupt(gxio_uart_context_t *context, + int bind_cpu_x, + int bind_cpu_y, + int bind_interrupt, int bind_event); + +/* Initialize a UART context. + * + * A properly initialized context must be obtained before any of the other + * gxio_uart routines may be used. + * + * @param context Pointer to a gxio_uart_context_t, which will be initialized + * by this routine, if it succeeds. + * @param uart_index Index of the UART to use. + * @return Zero if the context was successfully initialized, else a + * GXIO_ERR_xxx error code. + */ +extern int gxio_uart_init(gxio_uart_context_t *context, int uart_index); + +/* Destroy a UART context. + * + * Once destroyed, a context may not be used with any gxio_uart routines + * other than gxio_uart_init(). After this routine returns, no further + * interrupts requested on this context will be delivered. The state and + * configuration of the pins which had been attached to this context are + * unchanged by this operation. + * + * @param context Pointer to a gxio_uart_context_t. + * @return Zero if the context was successfully destroyed, else a + * GXIO_ERR_xxx error code. + */ +extern int gxio_uart_destroy(gxio_uart_context_t *context); + +/* Write UART register. + * @param context Pointer to a gxio_uart_context_t. + * @param offset UART register offset. + * @param word Data will be wrote to UART reigister. + */ +extern void gxio_uart_write(gxio_uart_context_t *context, uint64_t offset, + uint64_t word); + +/* Read UART register. + * @param context Pointer to a gxio_uart_context_t. + * @param offset UART register offset. + * @return Data read from UART register. + */ +extern uint64_t gxio_uart_read(gxio_uart_context_t *context, uint64_t offset); + +#endif /* _GXIO_UART_H_ */ diff --git a/arch/tile/include/gxio/usb_host.h b/arch/tile/include/gxio/usb_host.h new file mode 100644 index 00000000000..93c9636d2dd --- /dev/null +++ b/arch/tile/include/gxio/usb_host.h @@ -0,0 +1,87 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ +#ifndef _GXIO_USB_H_ +#define _GXIO_USB_H_ + +#include <gxio/common.h> + +#include <hv/drv_usb_host_intf.h> +#include <hv/iorpc.h> + +/* + * + * An API for manipulating general-purpose I/O pins. + */ + +/* + * + * The USB shim allows access to the processor's Universal Serial Bus + * connections. + */ + +/* A context object used to manage USB hardware resources. */ +typedef struct { + + /* File descriptor for calling up to the hypervisor. */ + int fd; + + /* The VA at which our MMIO registers are mapped. */ + char *mmio_base; +} gxio_usb_host_context_t; + +/* Initialize a USB context. + * + * A properly initialized context must be obtained before any of the other + * gxio_usb_host routines may be used. + * + * @param context Pointer to a gxio_usb_host_context_t, which will be + * initialized by this routine, if it succeeds. + * @param usb_index Index of the USB shim to use. + * @param is_ehci Nonzero to use the EHCI interface; zero to use the OHCI + * intereface. + * @return Zero if the context was successfully initialized, else a + * GXIO_ERR_xxx error code. + */ +extern int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index, + int is_ehci); + +/* Destroy a USB context. + * + * Once destroyed, a context may not be used with any gxio_usb_host routines + * other than gxio_usb_host_init(). After this routine returns, no further + * interrupts or signals requested on this context will be delivered. The + * state and configuration of the pins which had been attached to this + * context are unchanged by this operation. + * + * @param context Pointer to a gxio_usb_host_context_t. + * @return Zero if the context was successfully destroyed, else a + * GXIO_ERR_xxx error code. + */ +extern int gxio_usb_host_destroy(gxio_usb_host_context_t *context); + +/* Retrieve the address of the shim's MMIO registers. + * + * @param context Pointer to a properly initialized gxio_usb_host_context_t. + * @return The address of the shim's MMIO registers. + */ +extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context); + +/* Retrieve the length of the shim's MMIO registers. + * + * @param context Pointer to a properly initialized gxio_usb_host_context_t. + * @return The length of the shim's MMIO registers. + */ +extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context); + +#endif /* _GXIO_USB_H_ */ diff --git a/arch/tile/include/hv/drv_mpipe_intf.h b/arch/tile/include/hv/drv_mpipe_intf.h new file mode 100644 index 00000000000..c97e416dd96 --- /dev/null +++ b/arch/tile/include/hv/drv_mpipe_intf.h @@ -0,0 +1,605 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the mpipe driver. + */ + +#ifndef _SYS_HV_DRV_MPIPE_INTF_H +#define _SYS_HV_DRV_MPIPE_INTF_H + +#include <arch/mpipe.h> +#include <arch/mpipe_constants.h> + + +/** Number of mPIPE instances supported */ +#define HV_MPIPE_INSTANCE_MAX (2) + +/** Number of buffer stacks (32). */ +#define HV_MPIPE_NUM_BUFFER_STACKS \ + (MPIPE_MMIO_INIT_DAT_GX36_1__BUFFER_STACK_MASK_WIDTH) + +/** Number of NotifRings (256). */ +#define HV_MPIPE_NUM_NOTIF_RINGS (MPIPE_NUM_NOTIF_RINGS) + +/** Number of NotifGroups (32). */ +#define HV_MPIPE_NUM_NOTIF_GROUPS (MPIPE_NUM_NOTIF_GROUPS) + +/** Number of buckets (4160). */ +#define HV_MPIPE_NUM_BUCKETS (MPIPE_NUM_BUCKETS) + +/** Number of "lo" buckets (4096). */ +#define HV_MPIPE_NUM_LO_BUCKETS 4096 + +/** Number of "hi" buckets (64). */ +#define HV_MPIPE_NUM_HI_BUCKETS \ + (HV_MPIPE_NUM_BUCKETS - HV_MPIPE_NUM_LO_BUCKETS) + +/** Number of edma rings (24). */ +#define HV_MPIPE_NUM_EDMA_RINGS \ + (MPIPE_MMIO_INIT_DAT_GX36_1__EDMA_POST_MASK_WIDTH) + + + + +/** A flag bit indicating a fixed resource allocation. */ +#define HV_MPIPE_ALLOC_FIXED 0x01 + +/** Offset for the config register MMIO region. */ +#define HV_MPIPE_CONFIG_MMIO_OFFSET \ + (MPIPE_MMIO_ADDR__REGION_VAL_CFG << MPIPE_MMIO_ADDR__REGION_SHIFT) + +/** Size of the config register MMIO region. */ +#define HV_MPIPE_CONFIG_MMIO_SIZE (64 * 1024) + +/** Offset for the config register MMIO region. */ +#define HV_MPIPE_FAST_MMIO_OFFSET \ + (MPIPE_MMIO_ADDR__REGION_VAL_IDMA << MPIPE_MMIO_ADDR__REGION_SHIFT) + +/** Size of the fast register MMIO region (IDMA, EDMA, buffer stack). */ +#define HV_MPIPE_FAST_MMIO_SIZE \ + ((MPIPE_MMIO_ADDR__REGION_VAL_BSM + 1 - MPIPE_MMIO_ADDR__REGION_VAL_IDMA) \ + << MPIPE_MMIO_ADDR__REGION_SHIFT) + + +/* + * Each type of resource allocation comes in quantized chunks, where + * XXX_BITS is the number of chunks, and XXX_RES_PER_BIT is the number + * of resources in each chunk. + */ + +/** Number of buffer stack chunks available (32). */ +#define HV_MPIPE_ALLOC_BUFFER_STACKS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_1__BUFFER_STACK_MASK_WIDTH + +/** Granularity of buffer stack allocation (1). */ +#define HV_MPIPE_ALLOC_BUFFER_STACKS_RES_PER_BIT \ + (HV_MPIPE_NUM_BUFFER_STACKS / HV_MPIPE_ALLOC_BUFFER_STACKS_BITS) + +/** Number of NotifRing chunks available (32). */ +#define HV_MPIPE_ALLOC_NOTIF_RINGS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_0__NOTIF_RING_MASK_WIDTH + +/** Granularity of NotifRing allocation (8). */ +#define HV_MPIPE_ALLOC_NOTIF_RINGS_RES_PER_BIT \ + (HV_MPIPE_NUM_NOTIF_RINGS / HV_MPIPE_ALLOC_NOTIF_RINGS_BITS) + +/** Number of NotifGroup chunks available (32). */ +#define HV_MPIPE_ALLOC_NOTIF_GROUPS_BITS \ + HV_MPIPE_NUM_NOTIF_GROUPS + +/** Granularity of NotifGroup allocation (1). */ +#define HV_MPIPE_ALLOC_NOTIF_GROUPS_RES_PER_BIT \ + (HV_MPIPE_NUM_NOTIF_GROUPS / HV_MPIPE_ALLOC_NOTIF_GROUPS_BITS) + +/** Number of lo bucket chunks available (16). */ +#define HV_MPIPE_ALLOC_LO_BUCKETS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_0__BUCKET_RELEASE_MASK_LO_WIDTH + +/** Granularity of lo bucket allocation (256). */ +#define HV_MPIPE_ALLOC_LO_BUCKETS_RES_PER_BIT \ + (HV_MPIPE_NUM_LO_BUCKETS / HV_MPIPE_ALLOC_LO_BUCKETS_BITS) + +/** Number of hi bucket chunks available (16). */ +#define HV_MPIPE_ALLOC_HI_BUCKETS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_0__BUCKET_RELEASE_MASK_HI_WIDTH + +/** Granularity of hi bucket allocation (4). */ +#define HV_MPIPE_ALLOC_HI_BUCKETS_RES_PER_BIT \ + (HV_MPIPE_NUM_HI_BUCKETS / HV_MPIPE_ALLOC_HI_BUCKETS_BITS) + +/** Number of eDMA ring chunks available (24). */ +#define HV_MPIPE_ALLOC_EDMA_RINGS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_1__EDMA_POST_MASK_WIDTH + +/** Granularity of eDMA ring allocation (1). */ +#define HV_MPIPE_ALLOC_EDMA_RINGS_RES_PER_BIT \ + (HV_MPIPE_NUM_EDMA_RINGS / HV_MPIPE_ALLOC_EDMA_RINGS_BITS) + + + + +/** Bit vector encoding which NotifRings are in a NotifGroup. */ +typedef struct +{ + /** The actual bits. */ + uint64_t ring_mask[4]; + +} gxio_mpipe_notif_group_bits_t; + + +/** Another name for MPIPE_LBL_INIT_DAT_BSTS_TBL_t. */ +typedef MPIPE_LBL_INIT_DAT_BSTS_TBL_t gxio_mpipe_bucket_info_t; + + + +/** Eight buffer stack ids. */ +typedef struct +{ + /** The stacks. */ + uint8_t stacks[8]; + +} gxio_mpipe_rules_stacks_t; + + +/** A destination mac address. */ +typedef struct +{ + /** The octets. */ + uint8_t octets[6]; + +} gxio_mpipe_rules_dmac_t; + + +/** A vlan. */ +typedef uint16_t gxio_mpipe_rules_vlan_t; + + + +/** Maximum number of characters in a link name. */ +#define GXIO_MPIPE_LINK_NAME_LEN 32 + + +/** Structure holding a link name. Only needed, and only typedef'ed, + * because the IORPC stub generator only handles types which are single + * words coming before the parameter name. */ +typedef struct +{ + /** The name itself. */ + char name[GXIO_MPIPE_LINK_NAME_LEN]; +} +_gxio_mpipe_link_name_t; + +/** Maximum number of characters in a symbol name. */ +#define GXIO_MPIPE_SYMBOL_NAME_LEN 128 + + +/** Structure holding a symbol name. Only needed, and only typedef'ed, + * because the IORPC stub generator only handles types which are single + * words coming before the parameter name. */ +typedef struct +{ + /** The name itself. */ + char name[GXIO_MPIPE_SYMBOL_NAME_LEN]; +} +_gxio_mpipe_symbol_name_t; + + +/** Structure holding a MAC address. */ +typedef struct +{ + /** The address. */ + uint8_t mac[6]; +} +_gxio_mpipe_link_mac_t; + + + +/** Request shared data permission -- that is, the ability to send and + * receive packets -- on the specified link. Other processes may also + * request shared data permission on the same link. + * + * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA, + * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed. + */ +#define GXIO_MPIPE_LINK_DATA 0x00000001UL + +/** Do not request data permission on the specified link. + * + * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA, + * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed. + */ +#define GXIO_MPIPE_LINK_NO_DATA 0x00000002UL + +/** Request exclusive data permission -- that is, the ability to send and + * receive packets -- on the specified link. No other processes may + * request data permission on this link, and if any process already has + * data permission on it, this open will fail. + * + * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA, + * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed. + */ +#define GXIO_MPIPE_LINK_EXCL_DATA 0x00000004UL + +/** Request shared stats permission -- that is, the ability to read and write + * registers which contain link statistics, and to get link attributes -- + * on the specified link. Other processes may also request shared stats + * permission on the same link. + * + * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS, + * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed. + */ +#define GXIO_MPIPE_LINK_STATS 0x00000008UL + +/** Do not request stats permission on the specified link. + * + * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS, + * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed. + */ +#define GXIO_MPIPE_LINK_NO_STATS 0x00000010UL + +/** Request exclusive stats permission -- that is, the ability to read and + * write registers which contain link statistics, and to get link + * attributes -- on the specified link. No other processes may request + * stats permission on this link, and if any process already + * has stats permission on it, this open will fail. + * + * Requesting exclusive stats permission is normally a very bad idea, since + * it prevents programs like mpipe-stat from providing information on this + * link. Applications should only do this if they use MAC statistics + * registers, and cannot tolerate any of the clear-on-read registers being + * reset by other statistics programs. + * + * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS, + * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed. + */ +#define GXIO_MPIPE_LINK_EXCL_STATS 0x00000020UL + +/** Request shared control permission -- that is, the ability to modify link + * attributes, and read and write MAC and MDIO registers -- on the + * specified link. Other processes may also request shared control + * permission on the same link. + * + * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL, + * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed. + */ +#define GXIO_MPIPE_LINK_CTL 0x00000040UL + +/** Do not request control permission on the specified link. + * + * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL, + * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed. + */ +#define GXIO_MPIPE_LINK_NO_CTL 0x00000080UL + +/** Request exclusive control permission -- that is, the ability to modify + * link attributes, and read and write MAC and MDIO registers -- on the + * specified link. No other processes may request control permission on + * this link, and if any process already has control permission on it, + * this open will fail. + * + * Requesting exclusive control permission is not always a good idea, since + * it prevents programs like mpipe-link from configuring the link. + * + * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL, + * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed. + */ +#define GXIO_MPIPE_LINK_EXCL_CTL 0x00000100UL + +/** Set the desired state of the link to up, allowing any speeds which are + * supported by the link hardware, as part of this open operation; do not + * change the desired state of the link when it is closed or the process + * exits. No more than one of ::GXIO_MPIPE_LINK_AUTO_UP, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN, ::GXIO_MPIPE_LINK_AUTO_DOWN, or + * ::GXIO_MPIPE_LINK_AUTO_NONE may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_UP 0x00000200UL + +/** Set the desired state of the link to up, allowing any speeds which are + * supported by the link hardware, as part of this open operation; when the + * link is closed or this process exits, if no other process has the link + * open, set the desired state of the link to down. No more than one of + * ::GXIO_MPIPE_LINK_AUTO_UP, ::GXIO_MPIPE_LINK_AUTO_UPDOWN, + * ::GXIO_MPIPE_LINK_AUTO_DOWN, or ::GXIO_MPIPE_LINK_AUTO_NONE may be + * specifed in a gxio_mpipe_link_open() call. If none are specified, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_UPDOWN 0x00000400UL + +/** Do not change the desired state of the link as part of the open + * operation; when the link is closed or this process exits, if no other + * process has the link open, set the desired state of the link to down. + * No more than one of ::GXIO_MPIPE_LINK_AUTO_UP, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN, ::GXIO_MPIPE_LINK_AUTO_DOWN, or + * ::GXIO_MPIPE_LINK_AUTO_NONE may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_DOWN 0x00000800UL + +/** Do not change the desired state of the link as part of the open + * operation; do not change the desired state of the link when it is + * closed or the process exits. No more than one of + * ::GXIO_MPIPE_LINK_AUTO_UP, ::GXIO_MPIPE_LINK_AUTO_UPDOWN, + * ::GXIO_MPIPE_LINK_AUTO_DOWN, or ::GXIO_MPIPE_LINK_AUTO_NONE may be + * specifed in a gxio_mpipe_link_open() call. If none are specified, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_NONE 0x00001000UL + +/** Request that this open call not complete until the network link is up. + * The process will wait as long as necessary for this to happen; + * applications which wish to abandon waiting for the link after a + * specific time period should not specify this flag when opening a link, + * but should instead call gxio_mpipe_link_wait() afterward. The link + * must be opened with stats permission. Note that this flag by itself + * does not change the desired link state; if other open flags or previous + * link state changes have not requested a desired state of up, the open + * call will never complete. This flag is not available to kernel + * clients. + */ +#define GXIO_MPIPE_LINK_WAIT 0x00002000UL + + +/* + * Note: link attributes must fit in 24 bits, since we use the top 8 bits + * of the IORPC offset word for the channel number. + */ + +/** Determine whether jumbo frames may be received. If this attribute's + * value value is nonzero, the MAC will accept frames of up to 10240 bytes. + * If the value is zero, the MAC will only accept frames of up to 1544 + * bytes. The default value is zero. */ +#define GXIO_MPIPE_LINK_RECEIVE_JUMBO 0x010000 + +/** Determine whether to send pause frames on this link if the mPIPE packet + * FIFO is nearly full. If the value is zero, pause frames are not sent. + * If the value is nonzero, it is the delay value which will be sent in any + * pause frames which are output, in units of 512 bit times. + * + * Bear in mind that in almost all circumstances, the mPIPE packet FIFO + * will never fill up, since mPIPE will empty it as fast as or faster than + * the incoming data rate, by either delivering or dropping packets. The + * only situation in which this is not true is if the memory and cache + * subsystem is extremely heavily loaded, and mPIPE cannot perform DMA of + * packet data to memory in a timely fashion. In particular, pause frames + * will <em>not</em> be sent if packets cannot be delivered because + * NotifRings are full, buckets are full, or buffers are not available in + * a buffer stack. */ +#define GXIO_MPIPE_LINK_SEND_PAUSE 0x020000 + +/** Determine whether to suspend output on the receipt of pause frames. + * If the value is nonzero, mPIPE shim will suspend output on the link's + * channel when a pause frame is received. If the value is zero, pause + * frames will be ignored. The default value is zero. */ +#define GXIO_MPIPE_LINK_RECEIVE_PAUSE 0x030000 + +/** Interface MAC address. The value is a 6-byte MAC address, in the least + * significant 48 bits of the value; in other words, an address which would + * be printed as '12:34:56:78:90:AB' in IEEE 802 canonical format would + * be returned as 0x12345678ab. + * + * Depending upon the overall system design, a MAC address may or may not + * be available for each interface. Note that the interface's MAC address + * does not limit the packets received on its channel, although the + * classifier's rules could be configured to do that. Similarly, the MAC + * address is not used when transmitting packets, although applications + * could certainly decide to use the assigned address as a source MAC + * address when doing so. This attribute may only be retrieved with + * gxio_mpipe_link_get_attr(); it may not be modified. + */ +#define GXIO_MPIPE_LINK_MAC 0x040000 + +/** Determine whether to discard egress packets on link down. If this value + * is nonzero, packets sent on this link while the link is down will be + * discarded. If this value is zero, no packets will be sent on this link + * while it is down. The default value is one. */ +#define GXIO_MPIPE_LINK_DISCARD_IF_DOWN 0x050000 + +/** Possible link state. The value is a combination of link state flags, + * ORed together, that indicate link modes which are actually supported by + * the hardware. This attribute may only be retrieved with + * gxio_mpipe_link_get_attr(); it may not be modified. */ +#define GXIO_MPIPE_LINK_POSSIBLE_STATE 0x060000 + +/** Current link state. The value is a combination of link state flags, + * ORed together, that indicate the current state of the hardware. If the + * link is down, the value ANDed with ::GXIO_MPIPE_LINK_SPEED will be zero; + * if the link is up, the value ANDed with ::GXIO_MPIPE_LINK_SPEED will + * result in exactly one of the speed values, indicating the current speed. + * This attribute may only be retrieved with gxio_mpipe_link_get_attr(); it + * may not be modified. */ +#define GXIO_MPIPE_LINK_CURRENT_STATE 0x070000 + +/** Desired link state. The value is a conbination of flags, which specify + * the desired state for the link. With gxio_mpipe_link_set_attr(), this + * will, in the background, attempt to bring up the link using whichever of + * the requested flags are reasonable, or take down the link if the flags + * are zero. The actual link up or down operation may happen after this + * call completes. If the link state changes in the future, the system + * will continue to try to get back to the desired link state; for + * instance, if the link is brought up successfully, and then the network + * cable is disconnected, the link will go down. However, the desired + * state of the link is still up, so if the cable is reconnected, the link + * will be brought up again. + * + * With gxio_mpipe_link_set_attr(), this will indicate the desired state + * for the link, as set with a previous gxio_mpipe_link_set_attr() call, + * or implicitly by a gxio_mpipe_link_open() or link close operation. + * This may not reflect the current state of the link; to get that, use + * ::GXIO_MPIPE_LINK_CURRENT_STATE. + */ +#define GXIO_MPIPE_LINK_DESIRED_STATE 0x080000 + + + +/** Link can run, should run, or is running at 10 Mbps. */ +#define GXIO_MPIPE_LINK_10M 0x0000000000000001UL + +/** Link can run, should run, or is running at 100 Mbps. */ +#define GXIO_MPIPE_LINK_100M 0x0000000000000002UL + +/** Link can run, should run, or is running at 1 Gbps. */ +#define GXIO_MPIPE_LINK_1G 0x0000000000000004UL + +/** Link can run, should run, or is running at 10 Gbps. */ +#define GXIO_MPIPE_LINK_10G 0x0000000000000008UL + +/** Link can run, should run, or is running at 20 Gbps. */ +#define GXIO_MPIPE_LINK_20G 0x0000000000000010UL + +/** Link can run, should run, or is running at 25 Gbps. */ +#define GXIO_MPIPE_LINK_25G 0x0000000000000020UL + +/** Link can run, should run, or is running at 50 Gbps. */ +#define GXIO_MPIPE_LINK_50G 0x0000000000000040UL + +/** Link should run at the highest speed supported by the link and by + * the device connected to the link. Only usable as a value for + * the link's desired state; never returned as a value for the current + * or possible states. */ +#define GXIO_MPIPE_LINK_ANYSPEED 0x0000000000000800UL + +/** All legal link speeds. This value is provided for use in extracting + * the speed-related subset of the link state flags; it is not intended + * to be set directly as a value for one of the GXIO_MPIPE_LINK_xxx_STATE + * attributes. A link is up or is requested to be up if its current or + * desired state, respectively, ANDED with this value, is nonzero. */ +#define GXIO_MPIPE_LINK_SPEED_MASK 0x0000000000000FFFUL + +/** Link can run, should run, or is running in MAC loopback mode. This + * loops transmitted packets back to the receiver, inside the Tile + * Processor. */ +#define GXIO_MPIPE_LINK_LOOP_MAC 0x0000000000001000UL + +/** Link can run, should run, or is running in PHY loopback mode. This + * loops transmitted packets back to the receiver, inside the external + * PHY chip. */ +#define GXIO_MPIPE_LINK_LOOP_PHY 0x0000000000002000UL + +/** Link can run, should run, or is running in external loopback mode. + * This requires that an external loopback plug be installed on the + * Ethernet port. Note that only some links require that this be + * configured via the gxio_mpipe_link routines; other links can do + * external loopack with the plug and no special configuration. */ +#define GXIO_MPIPE_LINK_LOOP_EXT 0x0000000000004000UL + +/** All legal loopback types. */ +#define GXIO_MPIPE_LINK_LOOP_MASK 0x000000000000F000UL + +/** Link can run, should run, or is running in full-duplex mode. + * If neither ::GXIO_MPIPE_LINK_FDX nor ::GXIO_MPIPE_LINK_HDX are + * specified in a set of desired state flags, both are assumed. */ +#define GXIO_MPIPE_LINK_FDX 0x0000000000010000UL + +/** Link can run, should run, or is running in half-duplex mode. + * If neither ::GXIO_MPIPE_LINK_FDX nor ::GXIO_MPIPE_LINK_HDX are + * specified in a set of desired state flags, both are assumed. */ +#define GXIO_MPIPE_LINK_HDX 0x0000000000020000UL + + +/** An individual rule. */ +typedef struct +{ + /** The total size. */ + uint16_t size; + + /** The priority. */ + int16_t priority; + + /** The "headroom" in each buffer. */ + uint8_t headroom; + + /** The "tailroom" in each buffer. */ + uint8_t tailroom; + + /** The "capacity" of the largest buffer. */ + uint16_t capacity; + + /** The mask for converting a flow hash into a bucket. */ + uint16_t bucket_mask; + + /** The offset for converting a flow hash into a bucket. */ + uint16_t bucket_first; + + /** The buffer stack ids. */ + gxio_mpipe_rules_stacks_t stacks; + + /** The actual channels. */ + uint32_t channel_bits; + + /** The number of dmacs. */ + uint16_t num_dmacs; + + /** The number of vlans. */ + uint16_t num_vlans; + + /** The actual dmacs and vlans. */ + uint8_t dmacs_and_vlans[]; + +} gxio_mpipe_rules_rule_t; + + +/** A list of classifier rules. */ +typedef struct +{ + /** The offset to the end of the current rule. */ + uint16_t tail; + + /** The offset to the start of the current rule. */ + uint16_t head; + + /** The actual rules. */ + uint8_t rules[4096 - 4]; + +} gxio_mpipe_rules_list_t; + + + + +/** mPIPE statistics structure. These counters include all relevant + * events occurring on all links within the mPIPE shim. */ +typedef struct +{ + /** Number of ingress packets dropped for any reason. */ + uint64_t ingress_drops; + /** Number of ingress packets dropped because a buffer stack was empty. */ + uint64_t ingress_drops_no_buf; + /** Number of ingress packets dropped or truncated due to lack of space in + * the iPkt buffer. */ + uint64_t ingress_drops_ipkt; + /** Number of ingress packets dropped by the classifier or load balancer */ + uint64_t ingress_drops_cls_lb; + /** Total number of ingress packets. */ + uint64_t ingress_packets; + /** Total number of egress packets. */ + uint64_t egress_packets; + /** Total number of ingress bytes. */ + uint64_t ingress_bytes; + /** Total number of egress bytes. */ + uint64_t egress_bytes; +} +gxio_mpipe_stats_t; + + +#endif /* _SYS_HV_DRV_MPIPE_INTF_H */ diff --git a/arch/tile/include/hv/drv_trio_intf.h b/arch/tile/include/hv/drv_trio_intf.h new file mode 100644 index 00000000000..237e04dee66 --- /dev/null +++ b/arch/tile/include/hv/drv_trio_intf.h @@ -0,0 +1,199 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the trio driver. + */ + +#ifndef _SYS_HV_DRV_TRIO_INTF_H +#define _SYS_HV_DRV_TRIO_INTF_H + +#include <arch/trio.h> + +/** The vendor ID for all Tilera processors. */ +#define TILERA_VENDOR_ID 0x1a41 + +/** The device ID for the Gx36 processor. */ +#define TILERA_GX36_DEV_ID 0x0200 + +/** Device ID for our internal bridge when running as RC. */ +#define TILERA_GX36_RC_DEV_ID 0x2000 + +/** Maximum number of TRIO interfaces. */ +#define TILEGX_NUM_TRIO 2 + +/** Gx36 has max 3 PCIe MACs per TRIO interface. */ +#define TILEGX_TRIO_PCIES 3 + +/** Specify port properties for a PCIe MAC. */ +struct pcie_port_property +{ + /** If true, the link can be configured in PCIe root complex mode. */ + uint8_t allow_rc: 1; + + /** If true, the link can be configured in PCIe endpoint mode. */ + uint8_t allow_ep: 1; + + /** If true, the link can be configured in StreamIO mode. */ + uint8_t allow_sio: 1; + + /** If true, the link is allowed to support 1-lane operation. Software + * will not consider it an error if the link comes up as a x1 link. */ + uint8_t allow_x1: 1; + + /** If true, the link is allowed to support 2-lane operation. Software + * will not consider it an error if the link comes up as a x2 link. */ + uint8_t allow_x2: 1; + + /** If true, the link is allowed to support 4-lane operation. Software + * will not consider it an error if the link comes up as a x4 link. */ + uint8_t allow_x4: 1; + + /** If true, the link is allowed to support 8-lane operation. Software + * will not consider it an error if the link comes up as a x8 link. */ + uint8_t allow_x8: 1; + + /** If true, this link is connected to a device which may or may not + * be present. */ + uint8_t removable: 1; + +}; + +/** Configurations can be issued to configure a char stream interrupt. */ +typedef enum pcie_stream_intr_config_sel_e +{ + /** Interrupt configuration for memory map regions. */ + MEM_MAP_SEL, + + /** Interrupt configuration for push DMAs. */ + PUSH_DMA_SEL, + + /** Interrupt configuration for pull DMAs. */ + PULL_DMA_SEL, +} +pcie_stream_intr_config_sel_t; + + +/** The mmap file offset (PA) of the TRIO config region. */ +#define HV_TRIO_CONFIG_OFFSET \ + ((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_CFG << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) + +/** The maximum size of the TRIO config region. */ +#define HV_TRIO_CONFIG_SIZE \ + (1ULL << TRIO_CFG_REGION_ADDR__REGION_SHIFT) + +/** Size of the config region mapped into client. We can't use + * TRIO_MMIO_ADDRESS_SPACE__OFFSET_WIDTH because it + * will require the kernel to allocate 4GB VA space + * from the VMALLOC region which has a total range + * of 4GB. + */ +#define HV_TRIO_CONFIG_IOREMAP_SIZE \ + ((uint64_t) 1 << TRIO_CFG_REGION_ADDR__PROT_SHIFT) + +/** The mmap file offset (PA) of a scatter queue region. */ +#define HV_TRIO_SQ_OFFSET(queue) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_MAP_SQ << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((queue) << TRIO_MAP_SQ_REGION_ADDR__SQ_SEL_SHIFT)) + +/** The maximum size of a scatter queue region. */ +#define HV_TRIO_SQ_SIZE \ + (1ULL << TRIO_MAP_SQ_REGION_ADDR__SQ_SEL_SHIFT) + + +/** The "hardware MMIO region" of the first PIO region. */ +#define HV_TRIO_FIRST_PIO_REGION 8 + +/** The mmap file offset (PA) of a PIO region. */ +#define HV_TRIO_PIO_OFFSET(region) \ + (((unsigned long long)(region) + HV_TRIO_FIRST_PIO_REGION) \ + << TRIO_PIO_REGIONS_ADDR__REGION_SHIFT) + +/** The maximum size of a PIO region. */ +#define HV_TRIO_PIO_SIZE (1ULL << TRIO_PIO_REGIONS_ADDR__ADDR_WIDTH) + + +/** The mmap file offset (PA) of a push DMA region. */ +#define HV_TRIO_PUSH_DMA_OFFSET(ring) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_PUSH_DMA << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((ring) << TRIO_PUSH_DMA_REGION_ADDR__RING_SEL_SHIFT)) + +/** The mmap file offset (PA) of a pull DMA region. */ +#define HV_TRIO_PULL_DMA_OFFSET(ring) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_PULL_DMA << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((ring) << TRIO_PULL_DMA_REGION_ADDR__RING_SEL_SHIFT)) + +/** The maximum size of a DMA region. */ +#define HV_TRIO_DMA_REGION_SIZE \ + (1ULL << TRIO_PUSH_DMA_REGION_ADDR__RING_SEL_SHIFT) + + +/** The mmap file offset (PA) of a Mem-Map interrupt region. */ +#define HV_TRIO_MEM_MAP_INTR_OFFSET(map) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_MAP_MEM << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((map) << TRIO_MAP_MEM_REGION_ADDR__MAP_SEL_SHIFT)) + +/** The maximum size of a Mem-Map interrupt region. */ +#define HV_TRIO_MEM_MAP_INTR_SIZE \ + (1ULL << TRIO_MAP_MEM_REGION_ADDR__MAP_SEL_SHIFT) + + +/** A flag bit indicating a fixed resource allocation. */ +#define HV_TRIO_ALLOC_FIXED 0x01 + +/** TRIO requires that all mappings have 4kB aligned start addresses. */ +#define HV_TRIO_PAGE_SHIFT 12 + +/** TRIO requires that all mappings have 4kB aligned start addresses. */ +#define HV_TRIO_PAGE_SIZE (1ull << HV_TRIO_PAGE_SHIFT) + + +/* Specify all PCIe port properties for a TRIO. */ +struct pcie_trio_ports_property +{ + struct pcie_port_property ports[TILEGX_TRIO_PCIES]; + + /** Set if this TRIO belongs to a Gx72 device. */ + uint8_t is_gx72; +}; + +/* Flags indicating traffic class. */ +#define HV_TRIO_FLAG_TC_SHIFT 4 +#define HV_TRIO_FLAG_TC_RMASK 0xf +#define HV_TRIO_FLAG_TC(N) \ + ((((N) & HV_TRIO_FLAG_TC_RMASK) + 1) << HV_TRIO_FLAG_TC_SHIFT) + +/* Flags indicating virtual functions. */ +#define HV_TRIO_FLAG_VFUNC_SHIFT 8 +#define HV_TRIO_FLAG_VFUNC_RMASK 0xff +#define HV_TRIO_FLAG_VFUNC(N) \ + ((((N) & HV_TRIO_FLAG_VFUNC_RMASK) + 1) << HV_TRIO_FLAG_VFUNC_SHIFT) + + +/* Flag indicating an ordered PIO region. */ +#define HV_TRIO_PIO_FLAG_ORDERED (1 << 16) + +/* Flags indicating special types of PIO regions. */ +#define HV_TRIO_PIO_FLAG_SPACE_SHIFT 17 +#define HV_TRIO_PIO_FLAG_SPACE_MASK (0x3 << HV_TRIO_PIO_FLAG_SPACE_SHIFT) +#define HV_TRIO_PIO_FLAG_CONFIG_SPACE (0x1 << HV_TRIO_PIO_FLAG_SPACE_SHIFT) +#define HV_TRIO_PIO_FLAG_IO_SPACE (0x2 << HV_TRIO_PIO_FLAG_SPACE_SHIFT) + + +#endif /* _SYS_HV_DRV_TRIO_INTF_H */ diff --git a/arch/tile/include/hv/drv_uart_intf.h b/arch/tile/include/hv/drv_uart_intf.h new file mode 100644 index 00000000000..f5379e2404f --- /dev/null +++ b/arch/tile/include/hv/drv_uart_intf.h @@ -0,0 +1,33 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the UART driver. + */ + +#ifndef _SYS_HV_DRV_UART_INTF_H +#define _SYS_HV_DRV_UART_INTF_H + +#include <arch/uart.h> + +/** Number of UART ports supported. */ +#define TILEGX_UART_NR 2 + +/** The mmap file offset (PA) of the UART MMIO region. */ +#define HV_UART_MMIO_OFFSET 0 + +/** The maximum size of the UARTs MMIO region (64K Bytes). */ +#define HV_UART_MMIO_SIZE (1UL << 16) + +#endif /* _SYS_HV_DRV_UART_INTF_H */ diff --git a/arch/tile/include/hv/drv_usb_host_intf.h b/arch/tile/include/hv/drv_usb_host_intf.h new file mode 100644 index 00000000000..24ce774a3f1 --- /dev/null +++ b/arch/tile/include/hv/drv_usb_host_intf.h @@ -0,0 +1,39 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the USB host driver. + */ + +#ifndef _SYS_HV_DRV_USB_HOST_INTF_H +#define _SYS_HV_DRV_USB_HOST_INTF_H + +#include <arch/usb_host.h> + + +/** Offset for the EHCI register MMIO region. */ +#define HV_USB_HOST_MMIO_OFFSET_EHCI ((uint64_t) USB_HOST_HCCAPBASE_REG) + +/** Offset for the OHCI register MMIO region. */ +#define HV_USB_HOST_MMIO_OFFSET_OHCI ((uint64_t) USB_HOST_OHCD_HC_REVISION_REG) + +/** Size of the register MMIO region. This turns out to be the same for + * both EHCI and OHCI. */ +#define HV_USB_HOST_MMIO_SIZE ((uint64_t) 0x1000) + +/** The number of service domains supported by the USB host shim. */ +#define HV_USB_HOST_NUM_SVC_DOM 1 + + +#endif /* _SYS_HV_DRV_USB_HOST_INTF_H */ diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h index f13188ac281..2a20b266d94 100644 --- a/arch/tile/include/hv/drv_xgbe_intf.h +++ b/arch/tile/include/hv/drv_xgbe_intf.h @@ -460,7 +460,7 @@ typedef void* lepp_comp_t; * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for * our page size of exactly 65536. We add one for a "body" fragment. */ -#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1) +#define LEPP_MAX_FRAGS (65536 / HV_DEFAULT_PAGE_SIZE_SMALL + 2 + 1) /** Total number of bytes needed for an lepp_tso_cmd_t. */ #define LEPP_TSO_CMD_SIZE(num_frags, header_size) \ diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h index 72ec1e972f1..dfcdeb61ba3 100644 --- a/arch/tile/include/hv/hypervisor.h +++ b/arch/tile/include/hv/hypervisor.h @@ -17,8 +17,8 @@ * The hypervisor's public API. */ -#ifndef _TILE_HV_H -#define _TILE_HV_H +#ifndef _HV_HV_H +#define _HV_HV_H #include <arch/chip.h> @@ -42,25 +42,45 @@ */ #define HV_L1_SPAN (__HV_SIZE_ONE << HV_LOG2_L1_SPAN) -/** The log2 of the size of small pages, in bytes. This value should - * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL). +/** The log2 of the initial size of small pages, in bytes. + * See HV_DEFAULT_PAGE_SIZE_SMALL. */ -#define HV_LOG2_PAGE_SIZE_SMALL 16 +#define HV_LOG2_DEFAULT_PAGE_SIZE_SMALL 16 -/** The size of small pages, in bytes. This value should be verified +/** The initial size of small pages, in bytes. This value should be verified * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL). + * It may also be modified when installing a new context. */ -#define HV_PAGE_SIZE_SMALL (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_SMALL) +#define HV_DEFAULT_PAGE_SIZE_SMALL \ + (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_SMALL) -/** The log2 of the size of large pages, in bytes. This value should be - * verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE). +/** The log2 of the initial size of large pages, in bytes. + * See HV_DEFAULT_PAGE_SIZE_LARGE. */ -#define HV_LOG2_PAGE_SIZE_LARGE 24 +#define HV_LOG2_DEFAULT_PAGE_SIZE_LARGE 24 -/** The size of large pages, in bytes. This value should be verified +/** The initial size of large pages, in bytes. This value should be verified * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE). + * It may also be modified when installing a new context. */ -#define HV_PAGE_SIZE_LARGE (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_LARGE) +#define HV_DEFAULT_PAGE_SIZE_LARGE \ + (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_LARGE) + +#if CHIP_VA_WIDTH() > 32 + +/** The log2 of the initial size of jumbo pages, in bytes. + * See HV_DEFAULT_PAGE_SIZE_JUMBO. + */ +#define HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO 32 + +/** The initial size of jumbo pages, in bytes. This value should + * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO). + * It may also be modified when installing a new context. + */ +#define HV_DEFAULT_PAGE_SIZE_JUMBO \ + (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO) + +#endif /** The log2 of the granularity at which page tables must be aligned; * in other words, the CPA for a page table must have this many zero @@ -87,7 +107,22 @@ #define HV_DISPATCH_ENTRY_SIZE 32 /** Version of the hypervisor interface defined by this file */ -#define _HV_VERSION 11 +#define _HV_VERSION 13 + +/** Last version of the hypervisor interface with old hv_init() ABI. + * + * The change from version 12 to version 13 corresponds to launching + * the client by default at PL2 instead of PL1 (corresponding to the + * hv itself running at PL3 instead of PL2). To make this explicit, + * the hv_init() API was also extended so the client can report its + * desired PL, resulting in a more helpful failure diagnostic. If you + * call hv_init() with _HV_VERSION_OLD_HV_INIT and omit the client_pl + * argument, the hypervisor will assume client_pl = 1. + * + * Note that this is a deprecated solution and we do not expect to + * support clients of the Tilera hypervisor running at PL1 indefinitely. + */ +#define _HV_VERSION_OLD_HV_INIT 12 /* Index into hypervisor interface dispatch code blocks. * @@ -280,8 +315,14 @@ #define HV_DISPATCH_GET_IPI_PTE 56 #endif +/** hv_set_pte_super_shift */ +#define HV_DISPATCH_SET_PTE_SUPER_SHIFT 57 + +/** hv_console_set_ipi */ +#define HV_DISPATCH_CONSOLE_SET_IPI 63 + /** One more than the largest dispatch value */ -#define _HV_DISPATCH_END 57 +#define _HV_DISPATCH_END 64 #ifndef __ASSEMBLER__ @@ -354,7 +395,11 @@ typedef int HV_Errno; #ifndef __ASSEMBLER__ /** Pass HV_VERSION to hv_init to request this version of the interface. */ -typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber; +typedef enum { + HV_VERSION = _HV_VERSION, + HV_VERSION_OLD_HV_INIT = _HV_VERSION_OLD_HV_INIT, + +} HV_VersionNumber; /** Initializes the hypervisor. * @@ -362,9 +407,11 @@ typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber; * that this program expects, typically HV_VERSION. * @param chip_num Architecture number of the chip the client was built for. * @param chip_rev_num Revision number of the chip the client was built for. + * @param client_pl Privilege level the client is built for + * (not required if interface_version_number == HV_VERSION_OLD_HV_INIT). */ void hv_init(HV_VersionNumber interface_version_number, - int chip_num, int chip_rev_num); + int chip_num, int chip_rev_num, int client_pl); /** Queries we can make for hv_sysconf(). @@ -401,7 +448,18 @@ typedef enum { * that the temperature has hit an upper limit and is no longer being * accurately tracked. */ - HV_SYSCONF_BOARD_TEMP = 6 + HV_SYSCONF_BOARD_TEMP = 6, + + /** Legal page size bitmask for hv_install_context(). + * For example, if 16KB and 64KB small pages are supported, + * it would return "HV_CTX_PG_SM_16K | HV_CTX_PG_SM_64K". + */ + HV_SYSCONF_VALID_PAGE_SIZES = 7, + + /** The size of jumbo pages, in bytes. + * If no jumbo pages are available, zero will be returned. + */ + HV_SYSCONF_PAGE_SIZE_JUMBO = 8, } HV_SysconfQuery; @@ -474,14 +532,36 @@ typedef enum { HV_CONFSTR_SWITCH_CONTROL = 14, /** Chip revision level. */ - HV_CONFSTR_CHIP_REV = 15 + HV_CONFSTR_CHIP_REV = 15, + + /** CPU module part number. */ + HV_CONFSTR_CPUMOD_PART_NUM = 16, + + /** CPU module serial number. */ + HV_CONFSTR_CPUMOD_SERIAL_NUM = 17, + + /** CPU module revision level. */ + HV_CONFSTR_CPUMOD_REV = 18, + + /** Human-readable CPU module description. */ + HV_CONFSTR_CPUMOD_DESC = 19, + + /** Per-tile hypervisor statistics. When this identifier is specified, + * the hv_confstr call takes two extra arguments. The first is the + * HV_XY_TO_LOTAR of the target tile's coordinates. The second is + * a flag word. The only current flag is the lowest bit, which means + * "zero out the stats instead of retrieving them"; in this case the + * buffer and buffer length are ignored. */ + HV_CONFSTR_HV_STATS = 20 } HV_ConfstrQuery; /** Query a configuration string from the hypervisor. * * @param query Identifier for the specific string to be retrieved - * (HV_CONFSTR_xxx). + * (HV_CONFSTR_xxx). Some strings may require or permit extra + * arguments to be appended which select specific objects to be + * described; see the string descriptions above. * @param buf Buffer in which to place the string. * @param len Length of the buffer. * @return If query is valid, then the length of the corresponding string, @@ -489,7 +569,7 @@ typedef enum { * was truncated. If query is invalid, HV_EINVAL. If the specified * buffer is not writable by the client, HV_EFAULT. */ -int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len); +int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len, ...); /** Tile coordinate */ typedef struct @@ -513,6 +593,30 @@ typedef struct */ int hv_get_ipi_pte(HV_Coord tile, int pl, HV_PTE* pte); +/** Configure the console interrupt. + * + * When the console client interrupt is enabled, the hypervisor will + * deliver the specified IPI to the client in the following situations: + * + * - The console has at least one character available for input. + * + * - The console can accept new characters for output, and the last call + * to hv_console_write() did not write all of the characters requested + * by the client. + * + * Note that in some system configurations, console interrupt will not + * be available; clients should be prepared for this routine to fail and + * to fall back to periodic console polling in that case. + * + * @param ipi Index of the IPI register which will receive the interrupt. + * @param event IPI event number for console interrupt. If less than 0, + * disable the console IPI interrupt. + * @param coord Tile to be targeted for console interrupt. + * @return 0 on success, otherwise, HV_EINVAL if illegal parameter, + * HV_ENOTSUP if console interrupt are not available. + */ +int hv_console_set_ipi(int ipi, int event, HV_Coord coord); + #else /* !CHIP_HAS_IPI() */ /** A set of interrupts. */ @@ -649,6 +753,12 @@ void hv_set_rtc(HV_RTCTime time); * new page table does not need to contain any mapping for the * hv_install_context address itself. * + * At most one HV_CTX_PG_SM_* flag may be specified in "flags"; + * if multiple flags are specified, HV_EINVAL is returned. + * Specifying none of the flags results in using the default page size. + * All cores participating in a given client must request the same + * page size, or the results are undefined. + * * @param page_table Root of the page table. * @param access PTE providing info on how to read the page table. This * value must be consistent between multiple tiles sharing a page table, @@ -667,8 +777,36 @@ int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid, #define HV_CTX_DIRECTIO 0x1 /**< Direct I/O requests are accepted from PL0. */ +#define HV_CTX_PG_SM_4K 0x10 /**< Use 4K small pages, if available. */ +#define HV_CTX_PG_SM_16K 0x20 /**< Use 16K small pages, if available. */ +#define HV_CTX_PG_SM_64K 0x40 /**< Use 64K small pages, if available. */ +#define HV_CTX_PG_SM_MASK 0xf0 /**< Mask of all possible small pages. */ + #ifndef __ASSEMBLER__ + +/** Set the number of pages ganged together by HV_PTE_SUPER at a + * particular level of the page table. + * + * The current TILE-Gx hardware only supports powers of four + * (i.e. log2_count must be a multiple of two), and the requested + * "super" page size must be less than the span of the next level in + * the page table. The largest size that can be requested is 64GB. + * + * The shift value is initially "0" for all page table levels, + * indicating that the HV_PTE_SUPER bit is effectively ignored. + * + * If you change the count from one non-zero value to another, the + * hypervisor will flush the entire TLB and TSB to avoid confusion. + * + * @param level Page table level (0, 1, or 2) + * @param log2_count Base-2 log of the number of pages to gang together, + * i.e. how much to shift left the base page size for the super page size. + * @return Zero on success, or a hypervisor error code on failure. + */ +int hv_set_pte_super_shift(int level, int log2_count); + + /** Value returned from hv_inquire_context(). */ typedef struct { @@ -1238,11 +1376,14 @@ HV_Errno hv_set_command_line(HV_VirtAddr buf, int length); * with the existing priority pages) or "red/black" (if they don't). * The bitmask provides information on which parts of the cache * have been used for pinned pages so far on this tile; if (1 << N) - * appears in the bitmask, that indicates that a page has been marked - * "priority" whose PFN equals N, mod 8. + * appears in the bitmask, that indicates that a 4KB region of the + * cache starting at (N * 4KB) is in use by a "priority" page. + * The portion of cache used by a particular page can be computed + * by taking the page's PA, modulo CHIP_L2_CACHE_SIZE(), and setting + * all the "4KB" bits corresponding to the actual page size. * @param bitmask A bitmap of priority page set values */ -void hv_set_caching(unsigned int bitmask); +void hv_set_caching(unsigned long bitmask); /** Zero out a specified number of pages. @@ -1851,12 +1992,12 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, #define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */ #define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */ #define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */ - /* Bits 13-15 are reserved for + /* Bits 13-14 are reserved for future use. */ +#define HV_PTE_INDEX_SUPER 15 /**< Pages ganged together for TLB */ #define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */ #define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */ - /* Bit 19 is reserved for - future use. */ +#define HV_PTE_INDEX_CLIENT2 19 /**< Page client state 2 */ #define HV_PTE_INDEX_LOTAR 20 /**< Page's LOTAR; must be high bits of word */ #define HV_PTE_LOTAR_BITS 12 /**< Number of bits in a LOTAR */ @@ -1869,15 +2010,6 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, of word */ #define HV_PTE_PTFN_BITS 29 /**< Number of bits in a PTFN */ -/** Position of the PFN field within the PTE (subset of the PTFN). */ -#define HV_PTE_INDEX_PFN (HV_PTE_INDEX_PTFN + (HV_LOG2_PAGE_SIZE_SMALL - \ - HV_LOG2_PAGE_TABLE_ALIGN)) - -/** Length of the PFN field within the PTE (subset of the PTFN). */ -#define HV_PTE_INDEX_PFN_BITS (HV_PTE_INDEX_PTFN_BITS - \ - (HV_LOG2_PAGE_SIZE_SMALL - \ - HV_LOG2_PAGE_TABLE_ALIGN)) - /* * Legal values for the PTE's mode field */ @@ -1957,7 +2089,10 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, /** Does this PTE map a page? * - * If this bit is set in the level-1 page table, the entry should be + * If this bit is set in a level-0 page table, the entry should be + * interpreted as a level-2 page table entry mapping a jumbo page. + * + * If this bit is set in a level-1 page table, the entry should be * interpreted as a level-2 page table entry mapping a large page. * * This bit should not be modified by the client while PRESENT is set, as @@ -1967,6 +2102,18 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, */ #define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE) +/** Does this PTE implicitly reference multiple pages? + * + * If this bit is set in the page table (either in the level-2 page table, + * or in a higher level page table in conjunction with the PAGE bit) + * then the PTE specifies a range of contiguous pages, not a single page. + * The hv_set_pte_super_shift() allows you to specify the count for + * each level of the page table. + * + * Note: this bit is not supported on TILEPro systems. + */ +#define HV_PTE_SUPER (__HV_PTE_ONE << HV_PTE_INDEX_SUPER) + /** Is this a global (non-ASID) mapping? * * If this bit is set, the translations established by this PTE will @@ -2046,6 +2193,13 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, */ #define HV_PTE_CLIENT1 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT1) +/** Client-private bit in PTE. + * + * This bit is guaranteed not to be inspected or modified by the + * hypervisor. + */ +#define HV_PTE_CLIENT2 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT2) + /** Non-coherent (NC) bit in PTE. * * If this bit is set, the mapping that is set up will be non-coherent @@ -2178,8 +2332,10 @@ hv_pte_clear_##name(HV_PTE pte) \ */ _HV_BIT(present, PRESENT) _HV_BIT(page, PAGE) +_HV_BIT(super, SUPER) _HV_BIT(client0, CLIENT0) _HV_BIT(client1, CLIENT1) +_HV_BIT(client2, CLIENT2) _HV_BIT(migrating, MIGRATING) _HV_BIT(nc, NC) _HV_BIT(readable, READABLE) @@ -2222,40 +2378,11 @@ hv_pte_set_mode(HV_PTE pte, unsigned int val) * * This field contains the upper bits of the CPA (client physical * address) of the target page; the complete CPA is this field with - * HV_LOG2_PAGE_SIZE_SMALL zero bits appended to it. - * - * For PTEs in a level-1 page table where the Page bit is set, the - * CPA must be aligned modulo the large page size. - */ -static __inline unsigned int -hv_pte_get_pfn(const HV_PTE pte) -{ - return pte.val >> HV_PTE_INDEX_PFN; -} - - -/** Set the page frame number into a PTE. See hv_pte_get_pfn. */ -static __inline HV_PTE -hv_pte_set_pfn(HV_PTE pte, unsigned int val) -{ - /* - * Note that the use of "PTFN" in the next line is intentional; we - * don't want any garbage lower bits left in that field. - */ - pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS) - 1) << HV_PTE_INDEX_PTFN); - pte.val |= (__hv64) val << HV_PTE_INDEX_PFN; - return pte; -} - -/** Get the page table frame number from the PTE. + * HV_LOG2_PAGE_TABLE_ALIGN zero bits appended to it. * - * This field contains the upper bits of the CPA (client physical - * address) of the target page table; the complete CPA is this field with - * with HV_PAGE_TABLE_ALIGN zero bits appended to it. - * - * For PTEs in a level-1 page table when the Page bit is not set, the - * CPA must be aligned modulo the sticter of HV_PAGE_TABLE_ALIGN and - * the level-2 page table size. + * For all PTEs in the lowest-level page table, and for all PTEs with + * the Page bit set in all page tables, the CPA must be aligned modulo + * the relevant page size. */ static __inline unsigned long hv_pte_get_ptfn(const HV_PTE pte) @@ -2263,7 +2390,6 @@ hv_pte_get_ptfn(const HV_PTE pte) return pte.val >> HV_PTE_INDEX_PTFN; } - /** Set the page table frame number into a PTE. See hv_pte_get_ptfn. */ static __inline HV_PTE hv_pte_set_ptfn(HV_PTE pte, unsigned long val) @@ -2273,6 +2399,20 @@ hv_pte_set_ptfn(HV_PTE pte, unsigned long val) return pte; } +/** Get the client physical address from the PTE. See hv_pte_set_ptfn. */ +static __inline HV_PhysAddr +hv_pte_get_pa(const HV_PTE pte) +{ + return (__hv64) hv_pte_get_ptfn(pte) << HV_LOG2_PAGE_TABLE_ALIGN; +} + +/** Set the client physical address into a PTE. See hv_pte_get_ptfn. */ +static __inline HV_PTE +hv_pte_set_pa(HV_PTE pte, HV_PhysAddr pa) +{ + return hv_pte_set_ptfn(pte, pa >> HV_LOG2_PAGE_TABLE_ALIGN); +} + /** Get the remote tile caching this page. * @@ -2308,28 +2448,20 @@ hv_pte_set_lotar(HV_PTE pte, unsigned int val) #endif /* !__ASSEMBLER__ */ -/** Converts a client physical address to a pfn. */ -#define HV_CPA_TO_PFN(p) ((p) >> HV_LOG2_PAGE_SIZE_SMALL) - -/** Converts a pfn to a client physical address. */ -#define HV_PFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_SIZE_SMALL) - /** Converts a client physical address to a ptfn. */ #define HV_CPA_TO_PTFN(p) ((p) >> HV_LOG2_PAGE_TABLE_ALIGN) /** Converts a ptfn to a client physical address. */ #define HV_PTFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_TABLE_ALIGN) -/** Converts a ptfn to a pfn. */ -#define HV_PTFN_TO_PFN(p) \ - ((p) >> (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN)) - -/** Converts a pfn to a ptfn. */ -#define HV_PFN_TO_PTFN(p) \ - ((p) << (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN)) - #if CHIP_VA_WIDTH() > 32 +/* + * Note that we currently do not allow customizing the page size + * of the L0 pages, but fix them at 4GB, so we do not use the + * "_HV_xxx" nomenclature for the L0 macros. + */ + /** Log number of HV_PTE entries in L0 page table */ #define HV_LOG2_L0_ENTRIES (CHIP_VA_WIDTH() - HV_LOG2_L1_SPAN) @@ -2359,69 +2491,104 @@ hv_pte_set_lotar(HV_PTE pte, unsigned int val) #endif /* CHIP_VA_WIDTH() > 32 */ /** Log number of HV_PTE entries in L1 page table */ -#define HV_LOG2_L1_ENTRIES (HV_LOG2_L1_SPAN - HV_LOG2_PAGE_SIZE_LARGE) +#define _HV_LOG2_L1_ENTRIES(log2_page_size_large) \ + (HV_LOG2_L1_SPAN - log2_page_size_large) /** Number of HV_PTE entries in L1 page table */ -#define HV_L1_ENTRIES (1 << HV_LOG2_L1_ENTRIES) +#define _HV_L1_ENTRIES(log2_page_size_large) \ + (1 << _HV_LOG2_L1_ENTRIES(log2_page_size_large)) /** Log size of L1 page table in bytes */ -#define HV_LOG2_L1_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L1_ENTRIES) +#define _HV_LOG2_L1_SIZE(log2_page_size_large) \ + (HV_LOG2_PTE_SIZE + _HV_LOG2_L1_ENTRIES(log2_page_size_large)) /** Size of L1 page table in bytes */ -#define HV_L1_SIZE (1 << HV_LOG2_L1_SIZE) +#define _HV_L1_SIZE(log2_page_size_large) \ + (1 << _HV_LOG2_L1_SIZE(log2_page_size_large)) /** Log number of HV_PTE entries in level-2 page table */ -#define HV_LOG2_L2_ENTRIES (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL) +#define _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small) \ + (log2_page_size_large - log2_page_size_small) /** Number of HV_PTE entries in level-2 page table */ -#define HV_L2_ENTRIES (1 << HV_LOG2_L2_ENTRIES) +#define _HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) \ + (1 << _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small)) /** Log size of level-2 page table in bytes */ -#define HV_LOG2_L2_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L2_ENTRIES) +#define _HV_LOG2_L2_SIZE(log2_page_size_large, log2_page_size_small) \ + (HV_LOG2_PTE_SIZE + \ + _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small)) /** Size of level-2 page table in bytes */ -#define HV_L2_SIZE (1 << HV_LOG2_L2_SIZE) +#define _HV_L2_SIZE(log2_page_size_large, log2_page_size_small) \ + (1 << _HV_LOG2_L2_SIZE(log2_page_size_large, log2_page_size_small)) #ifdef __ASSEMBLER__ #if CHIP_VA_WIDTH() > 32 /** Index in L1 for a specific VA */ -#define HV_L1_INDEX(va) \ - (((va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1)) +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((va) >> log2_page_size_large) & (_HV_L1_ENTRIES(log2_page_size_large) - 1)) #else /* CHIP_VA_WIDTH() > 32 */ /** Index in L1 for a specific VA */ -#define HV_L1_INDEX(va) \ - (((va) >> HV_LOG2_PAGE_SIZE_LARGE)) +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((va) >> log2_page_size_large)) #endif /* CHIP_VA_WIDTH() > 32 */ /** Index in level-2 page table for a specific VA */ -#define HV_L2_INDEX(va) \ - (((va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1)) +#define _HV_L2_INDEX(va, log2_page_size_large, log2_page_size_small) \ + (((va) >> log2_page_size_small) & \ + (_HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) - 1)) #else /* __ASSEMBLER __ */ #if CHIP_VA_WIDTH() > 32 /** Index in L1 for a specific VA */ -#define HV_L1_INDEX(va) \ - (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1)) +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((HV_VirtAddr)(va) >> log2_page_size_large) & \ + (_HV_L1_ENTRIES(log2_page_size_large) - 1)) #else /* CHIP_VA_WIDTH() > 32 */ /** Index in L1 for a specific VA */ -#define HV_L1_INDEX(va) \ - (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE)) +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((HV_VirtAddr)(va) >> log2_page_size_large)) #endif /* CHIP_VA_WIDTH() > 32 */ /** Index in level-2 page table for a specific VA */ -#define HV_L2_INDEX(va) \ - (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1)) +#define _HV_L2_INDEX(va, log2_page_size_large, log2_page_size_small) \ + (((HV_VirtAddr)(va) >> log2_page_size_small) & \ + (_HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) - 1)) #endif /* __ASSEMBLER __ */ -#endif /* _TILE_HV_H */ +/** Position of the PFN field within the PTE (subset of the PTFN). */ +#define _HV_PTE_INDEX_PFN(log2_page_size) \ + (HV_PTE_INDEX_PTFN + (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +/** Length of the PFN field within the PTE (subset of the PTFN). */ +#define _HV_PTE_INDEX_PFN_BITS(log2_page_size) \ + (HV_PTE_INDEX_PTFN_BITS - (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +/** Converts a client physical address to a pfn. */ +#define _HV_CPA_TO_PFN(p, log2_page_size) ((p) >> log2_page_size) + +/** Converts a pfn to a client physical address. */ +#define _HV_PFN_TO_CPA(p, log2_page_size) \ + (((HV_PhysAddr)(p)) << log2_page_size) + +/** Converts a ptfn to a pfn. */ +#define _HV_PTFN_TO_PFN(p, log2_page_size) \ + ((p) >> (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +/** Converts a pfn to a ptfn. */ +#define _HV_PFN_TO_PTFN(p, log2_page_size) \ + ((p) << (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +#endif /* _HV_HV_H */ diff --git a/arch/tile/include/hv/iorpc.h b/arch/tile/include/hv/iorpc.h new file mode 100644 index 00000000000..ddf1604482b --- /dev/null +++ b/arch/tile/include/hv/iorpc.h @@ -0,0 +1,714 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ +#ifndef _HV_IORPC_H_ +#define _HV_IORPC_H_ + +/** + * + * Error codes and struct definitions for the IO RPC library. + * + * The hypervisor's IO RPC component provides a convenient way for + * driver authors to proxy system calls between user space, linux, and + * the hypervisor driver. The core of the system is a set of Python + * files that take ".idl" files as input and generates the following + * source code: + * + * - _rpc_call() routines for use in userspace IO libraries. These + * routines take an argument list specified in the .idl file, pack the + * arguments in to a buffer, and read or write that buffer via the + * Linux iorpc driver. + * + * - dispatch_read() and dispatch_write() routines that hypervisor + * drivers can use to implement most of their dev_pread() and + * dev_pwrite() methods. These routines decode the incoming parameter + * blob, permission check and translate parameters where appropriate, + * and then invoke a callback routine for whichever RPC call has + * arrived. The driver simply implements the set of callback + * routines. + * + * The IO RPC system also includes the Linux 'iorpc' driver, which + * proxies calls between the userspace library and the hypervisor + * driver. The Linux driver is almost entirely device agnostic; it + * watches for special flags indicating cases where a memory buffer + * address might need to be translated, etc. As a result, driver + * writers can avoid many of the problem cases related to registering + * hardware resources like memory pages or interrupts. However, the + * drivers must be careful to obey the conventions documented below in + * order to work properly with the generic Linux iorpc driver. + * + * @section iorpc_domains Service Domains + * + * All iorpc-based drivers must support a notion of service domains. + * A service domain is basically an application context - state + * indicating resources that are allocated to that particular app + * which it may access and (perhaps) other applications may not + * access. Drivers can support any number of service domains they + * choose. In some cases the design is limited by a number of service + * domains supported by the IO hardware; in other cases the service + * domains are a purely software concept and the driver chooses a + * maximum number of domains based on how much state memory it is + * willing to preallocate. + * + * For example, the mPIPE driver only supports as many service domains + * as are supported by the mPIPE hardware. This limitation is + * required because the hardware implements its own MMIO protection + * scheme to allow large MMIO mappings while still protecting small + * register ranges within the page that should only be accessed by the + * hypervisor. + * + * In contrast, drivers with no hardware service domain limitations + * (for instance the TRIO shim) can implement an arbitrary number of + * service domains. In these cases, each service domain is limited to + * a carefully restricted set of legal MMIO addresses if necessary to + * keep one application from corrupting another application's state. + * + * @section iorpc_conventions System Call Conventions + * + * The driver's open routine is responsible for allocating a new + * service domain for each hv_dev_open() call. By convention, the + * return value from open() should be the service domain number on + * success, or GXIO_ERR_NO_SVC_DOM if no more service domains are + * available. + * + * The implementations of hv_dev_pread() and hv_dev_pwrite() are + * responsible for validating the devhdl value passed up by the + * client. Since the device handle returned by hv_dev_open() should + * embed the positive service domain number, drivers should make sure + * that DRV_HDL2BITS(devhdl) is a legal service domain. If the client + * passes an illegal service domain number, the routine should return + * GXIO_ERR_INVAL_SVC_DOM. Once the service domain number has been + * validated, the driver can copy to/from the client buffer and call + * the dispatch_read() or dispatch_write() methods created by the RPC + * generator. + * + * The hv_dev_close() implementation should reset all service domain + * state and put the service domain back on a free list for + * reallocation by a future application. In most cases, this will + * require executing a hardware reset or drain flow and denying any + * MMIO regions that were created for the service domain. + * + * @section iorpc_data Special Data Types + * + * The .idl file syntax allows the creation of syscalls with special + * parameters that require permission checks or translations as part + * of the system call path. Because of limitations in the code + * generator, APIs are generally limited to just one of these special + * parameters per system call, and they are sometimes required to be + * the first or last parameter to the call. Special parameters + * include: + * + * @subsection iorpc_mem_buffer MEM_BUFFER + * + * The MEM_BUFFER() datatype allows user space to "register" memory + * buffers with a device. Registering memory accomplishes two tasks: + * Linux keeps track of all buffers that might be modified by a + * hardware device, and the hardware device drivers bind registered + * buffers to particular hardware resources like ingress NotifRings. + * The MEM_BUFFER() idl syntax can take extra flags like ALIGN_64KB, + * ALIGN_SELF_SIZE, and FLAGS indicating that memory buffers must have + * certain alignment or that the user should be able to pass a "memory + * flags" word specifying attributes like nt_hint or IO cache pinning. + * The parser will accept multiple MEM_BUFFER() flags. + * + * Implementations must obey the following conventions when + * registering memory buffers via the iorpc flow. These rules are a + * result of the Linux driver implementation, which needs to keep + * track of how many times a particular page has been registered with + * the hardware so that it can release the page when all those + * registrations are cleared. + * + * - Memory registrations that refer to a resource which has already + * been bound must return GXIO_ERR_ALREADY_INIT. Thus, it is an + * error to register memory twice without resetting (i.e. closing) the + * resource in between. This convention keeps the Linux driver from + * having to track which particular devices a page is bound to. + * + * - At present, a memory registration is only cleared when the + * service domain is reset. In this case, the Linux driver simply + * closes the HV device file handle and then decrements the reference + * counts of all pages that were previously registered with the + * device. + * + * - In the future, we may add a mechanism for unregistering memory. + * One possible implementation would require that the user specify + * which buffer is currently registered. The HV would then verify + * that that page was actually the one currently mapped and return + * success or failure to Linux, which would then only decrement the + * page reference count if the addresses were mapped. Another scheme + * might allow Linux to pass a token to the HV to be returned when the + * resource is unmapped. + * + * @subsection iorpc_interrupt INTERRUPT + * + * The INTERRUPT .idl datatype allows the client to bind hardware + * interrupts to a particular combination of IPI parameters - CPU, IPI + * PL, and event bit number. This data is passed via a special + * datatype so that the Linux driver can validate the CPU and PL and + * the HV generic iorpc code can translate client CPUs to real CPUs. + * + * @subsection iorpc_pollfd_setup POLLFD_SETUP + * + * The POLLFD_SETUP .idl datatype allows the client to set up hardware + * interrupt bindings which are received by Linux but which are made + * visible to user processes as state transitions on a file descriptor; + * this allows user processes to use Linux primitives, such as poll(), to + * await particular hardware events. This data is passed via a special + * datatype so that the Linux driver may recognize the pollable file + * descriptor and translate it to a set of interrupt target information, + * and so that the HV generic iorpc code can translate client CPUs to real + * CPUs. + * + * @subsection iorpc_pollfd POLLFD + * + * The POLLFD .idl datatype allows manipulation of hardware interrupt + * bindings set up via the POLLFD_SETUP datatype; common operations are + * resetting the state of the requested interrupt events, and unbinding any + * bound interrupts. This data is passed via a special datatype so that + * the Linux driver may recognize the pollable file descriptor and + * translate it to an interrupt identifier previously supplied by the + * hypervisor as the result of an earlier pollfd_setup operation. + * + * @subsection iorpc_blob BLOB + * + * The BLOB .idl datatype allows the client to write an arbitrary + * length string of bytes up to the hypervisor driver. This can be + * useful for passing up large, arbitrarily structured data like + * classifier programs. The iorpc stack takes care of validating the + * buffer VA and CPA as the data passes up to the hypervisor. Unlike + * MEM_BUFFER(), the buffer is not registered - Linux does not bump + * page refcounts and the HV driver should not reuse the buffer once + * the system call is complete. + * + * @section iorpc_translation Translating User Space Calls + * + * The ::iorpc_offset structure describes the formatting of the offset + * that is passed to pread() or pwrite() as part of the generated RPC code. + * When the user calls up to Linux, the rpc code fills in all the fields of + * the offset, including a 16-bit opcode, a 16 bit format indicator, and 32 + * bits of user-specified "sub-offset". The opcode indicates which syscall + * is being requested. The format indicates whether there is a "prefix + * struct" at the start of the memory buffer passed to pwrite(), and if so + * what data is in that prefix struct. These prefix structs are used to + * implement special datatypes like MEM_BUFFER() and INTERRUPT - we arrange + * to put data that needs translation and permission checks at the start of + * the buffer so that the Linux driver and generic portions of the HV iorpc + * code can easily access the data. The 32 bits of user-specified + * "sub-offset" are most useful for pread() calls where the user needs to + * also pass in a few bits indicating which register to read, etc. + * + * The Linux iorpc driver watches for system calls that contain prefix + * structs so that it can translate parameters and bump reference + * counts as appropriate. It does not (currently) have any knowledge + * of the per-device opcodes - it doesn't care what operation you're + * doing to mPIPE, so long as it can do all the generic book-keeping. + * The hv/iorpc.h header file defines all of the generic encoding bits + * needed to translate iorpc calls without knowing which particular + * opcode is being issued. + * + * @section iorpc_globals Global iorpc Calls + * + * Implementing mmap() required adding some special iorpc syscalls + * that are only called by the Linux driver, never by userspace. + * These include get_mmio_base() and check_mmio_offset(). These + * routines are described in globals.idl and must be included in every + * iorpc driver. By providing these routines in every driver, Linux's + * mmap implementation can easily get the PTE bits it needs and + * validate the PA offset without needing to know the per-device + * opcodes to perform those tasks. + * + * @section iorpc_kernel Supporting gxio APIs in the Kernel + * + * The iorpc code generator also supports generation of kernel code + * implementing the gxio APIs. This capability is currently used by + * the mPIPE network driver, and will likely be used by the TRIO root + * complex and endpoint drivers and perhaps an in-kernel crypto + * driver. Each driver that wants to instantiate iorpc calls in the + * kernel needs to generate a kernel version of the generate rpc code + * and (probably) copy any related gxio source files into the kernel. + * The mPIPE driver provides a good example of this pattern. + */ + +#ifdef __KERNEL__ +#include <linux/stddef.h> +#else +#include <stddef.h> +#endif + +#if defined(__HV__) +#include <hv/hypervisor.h> +#elif defined(__KERNEL__) +#include <hv/hypervisor.h> +#include <linux/types.h> +#else +#include <stdint.h> +#endif + + +/** Code indicating translation services required within the RPC path. + * These indicate whether there is a translatable struct at the start + * of the RPC buffer and what information that struct contains. + */ +enum iorpc_format_e +{ + /** No translation required, no prefix struct. */ + IORPC_FORMAT_NONE, + + /** No translation required, no prefix struct, no access to this + * operation from user space. */ + IORPC_FORMAT_NONE_NOUSER, + + /** Prefix struct contains user VA and size. */ + IORPC_FORMAT_USER_MEM, + + /** Prefix struct contains CPA, size, and homing bits. */ + IORPC_FORMAT_KERNEL_MEM, + + /** Prefix struct contains interrupt. */ + IORPC_FORMAT_KERNEL_INTERRUPT, + + /** Prefix struct contains user-level interrupt. */ + IORPC_FORMAT_USER_INTERRUPT, + + /** Prefix struct contains pollfd_setup (interrupt information). */ + IORPC_FORMAT_KERNEL_POLLFD_SETUP, + + /** Prefix struct contains user-level pollfd_setup (file descriptor). */ + IORPC_FORMAT_USER_POLLFD_SETUP, + + /** Prefix struct contains pollfd (interrupt cookie). */ + IORPC_FORMAT_KERNEL_POLLFD, + + /** Prefix struct contains user-level pollfd (file descriptor). */ + IORPC_FORMAT_USER_POLLFD, +}; + + +/** Generate an opcode given format and code. */ +#define IORPC_OPCODE(FORMAT, CODE) (((FORMAT) << 16) | (CODE)) + +/** The offset passed through the read() and write() system calls + combines an opcode with 32 bits of user-specified offset. */ +union iorpc_offset +{ +#ifndef __BIG_ENDIAN__ + uint64_t offset; /**< All bits. */ + + struct + { + uint16_t code; /**< RPC code. */ + uint16_t format; /**< iorpc_format_e */ + uint32_t sub_offset; /**< caller-specified offset. */ + }; + + uint32_t opcode; /**< Opcode combines code & format. */ +#else + uint64_t offset; /**< All bits. */ + + struct + { + uint32_t sub_offset; /**< caller-specified offset. */ + uint16_t format; /**< iorpc_format_e */ + uint16_t code; /**< RPC code. */ + }; + + struct + { + uint32_t padding; + uint32_t opcode; /**< Opcode combines code & format. */ + }; +#endif +}; + + +/** Homing and cache hinting bits that can be used by IO devices. */ +struct iorpc_mem_attr +{ + unsigned int lotar_x:4; /**< lotar X bits (or Gx page_mask). */ + unsigned int lotar_y:4; /**< lotar Y bits (or Gx page_offset). */ + unsigned int hfh:1; /**< Uses hash-for-home. */ + unsigned int nt_hint:1; /**< Non-temporal hint. */ + unsigned int io_pin:1; /**< Only fill 'IO' cache ways. */ +}; + +/** Set the nt_hint bit. */ +#define IORPC_MEM_BUFFER_FLAG_NT_HINT (1 << 0) + +/** Set the IO pin bit. */ +#define IORPC_MEM_BUFFER_FLAG_IO_PIN (1 << 1) + + +/** A structure used to describe memory registration. Different + protection levels describe memory differently, so this union + contains all the different possible descriptions. As a request + moves up the call chain, each layer translates from one + description format to the next. In particular, the Linux iorpc + driver translates user VAs into CPAs and homing parameters. */ +union iorpc_mem_buffer +{ + struct + { + uint64_t va; /**< User virtual address. */ + uint64_t size; /**< Buffer size. */ + unsigned int flags; /**< nt_hint, IO pin. */ + } + user; /**< Buffer as described by user apps. */ + + struct + { + unsigned long long cpa; /**< Client physical address. */ +#if defined(__KERNEL__) || defined(__HV__) + size_t size; /**< Buffer size. */ + HV_PTE pte; /**< PTE describing memory homing. */ +#else + uint64_t size; + uint64_t pte; +#endif + unsigned int flags; /**< nt_hint, IO pin. */ + } + kernel; /**< Buffer as described by kernel. */ + + struct + { + unsigned long long pa; /**< Physical address. */ + size_t size; /**< Buffer size. */ + struct iorpc_mem_attr attr; /**< Homing and locality hint bits. */ + } + hv; /**< Buffer parameters for HV driver. */ +}; + + +/** A structure used to describe interrupts. The format differs slightly + * for user and kernel interrupts. As with the mem_buffer_t, translation + * between the formats is done at each level. */ +union iorpc_interrupt +{ + struct + { + int cpu; /**< CPU. */ + int event; /**< evt_num */ + } + user; /**< Interrupt as described by user applications. */ + + struct + { + int x; /**< X coord. */ + int y; /**< Y coord. */ + int ipi; /**< int_num */ + int event; /**< evt_num */ + } + kernel; /**< Interrupt as described by the kernel. */ + +}; + + +/** A structure used to describe interrupts used with poll(). The format + * differs significantly for requests from user to kernel, and kernel to + * hypervisor. As with the mem_buffer_t, translation between the formats + * is done at each level. */ +union iorpc_pollfd_setup +{ + struct + { + int fd; /**< Pollable file descriptor. */ + } + user; /**< pollfd_setup as described by user applications. */ + + struct + { + int x; /**< X coord. */ + int y; /**< Y coord. */ + int ipi; /**< int_num */ + int event; /**< evt_num */ + } + kernel; /**< pollfd_setup as described by the kernel. */ + +}; + + +/** A structure used to describe previously set up interrupts used with + * poll(). The format differs significantly for requests from user to + * kernel, and kernel to hypervisor. As with the mem_buffer_t, translation + * between the formats is done at each level. */ +union iorpc_pollfd +{ + struct + { + int fd; /**< Pollable file descriptor. */ + } + user; /**< pollfd as described by user applications. */ + + struct + { + int cookie; /**< hv cookie returned by the pollfd_setup operation. */ + } + kernel; /**< pollfd as described by the kernel. */ + +}; + + +/** The various iorpc devices use error codes from -1100 to -1299. + * + * This range is distinct from netio (-700 to -799), the hypervisor + * (-800 to -899), tilepci (-900 to -999), ilib (-1000 to -1099), + * gxcr (-1300 to -1399) and gxpci (-1400 to -1499). + */ +enum gxio_err_e { + + /** Largest iorpc error number. */ + GXIO_ERR_MAX = -1101, + + + /********************************************************/ + /* Generic Error Codes */ + /********************************************************/ + + /** Bad RPC opcode - possible version incompatibility. */ + GXIO_ERR_OPCODE = -1101, + + /** Invalid parameter. */ + GXIO_ERR_INVAL = -1102, + + /** Memory buffer did not meet alignment requirements. */ + GXIO_ERR_ALIGNMENT = -1103, + + /** Memory buffers must be coherent and cacheable. */ + GXIO_ERR_COHERENCE = -1104, + + /** Resource already initialized. */ + GXIO_ERR_ALREADY_INIT = -1105, + + /** No service domains available. */ + GXIO_ERR_NO_SVC_DOM = -1106, + + /** Illegal service domain number. */ + GXIO_ERR_INVAL_SVC_DOM = -1107, + + /** Illegal MMIO address. */ + GXIO_ERR_MMIO_ADDRESS = -1108, + + /** Illegal interrupt binding. */ + GXIO_ERR_INTERRUPT = -1109, + + /** Unreasonable client memory. */ + GXIO_ERR_CLIENT_MEMORY = -1110, + + /** No more IOTLB entries. */ + GXIO_ERR_IOTLB_ENTRY = -1111, + + /** Invalid memory size. */ + GXIO_ERR_INVAL_MEMORY_SIZE = -1112, + + /** Unsupported operation. */ + GXIO_ERR_UNSUPPORTED_OP = -1113, + + /** Insufficient DMA credits. */ + GXIO_ERR_DMA_CREDITS = -1114, + + /** Operation timed out. */ + GXIO_ERR_TIMEOUT = -1115, + + /** No such device or object. */ + GXIO_ERR_NO_DEVICE = -1116, + + /** Device or resource busy. */ + GXIO_ERR_BUSY = -1117, + + /** I/O error. */ + GXIO_ERR_IO = -1118, + + /** Permissions error. */ + GXIO_ERR_PERM = -1119, + + + + /********************************************************/ + /* Test Device Error Codes */ + /********************************************************/ + + /** Illegal register number. */ + GXIO_TEST_ERR_REG_NUMBER = -1120, + + /** Illegal buffer slot. */ + GXIO_TEST_ERR_BUFFER_SLOT = -1121, + + + /********************************************************/ + /* MPIPE Error Codes */ + /********************************************************/ + + + /** Invalid buffer size. */ + GXIO_MPIPE_ERR_INVAL_BUFFER_SIZE = -1131, + + /** Cannot allocate buffer stack. */ + GXIO_MPIPE_ERR_NO_BUFFER_STACK = -1140, + + /** Invalid buffer stack number. */ + GXIO_MPIPE_ERR_BAD_BUFFER_STACK = -1141, + + /** Cannot allocate NotifRing. */ + GXIO_MPIPE_ERR_NO_NOTIF_RING = -1142, + + /** Invalid NotifRing number. */ + GXIO_MPIPE_ERR_BAD_NOTIF_RING = -1143, + + /** Cannot allocate NotifGroup. */ + GXIO_MPIPE_ERR_NO_NOTIF_GROUP = -1144, + + /** Invalid NotifGroup number. */ + GXIO_MPIPE_ERR_BAD_NOTIF_GROUP = -1145, + + /** Cannot allocate bucket. */ + GXIO_MPIPE_ERR_NO_BUCKET = -1146, + + /** Invalid bucket number. */ + GXIO_MPIPE_ERR_BAD_BUCKET = -1147, + + /** Cannot allocate eDMA ring. */ + GXIO_MPIPE_ERR_NO_EDMA_RING = -1148, + + /** Invalid eDMA ring number. */ + GXIO_MPIPE_ERR_BAD_EDMA_RING = -1149, + + /** Invalid channel number. */ + GXIO_MPIPE_ERR_BAD_CHANNEL = -1150, + + /** Bad configuration. */ + GXIO_MPIPE_ERR_BAD_CONFIG = -1151, + + /** Empty iqueue. */ + GXIO_MPIPE_ERR_IQUEUE_EMPTY = -1152, + + /** Empty rules. */ + GXIO_MPIPE_ERR_RULES_EMPTY = -1160, + + /** Full rules. */ + GXIO_MPIPE_ERR_RULES_FULL = -1161, + + /** Corrupt rules. */ + GXIO_MPIPE_ERR_RULES_CORRUPT = -1162, + + /** Invalid rules. */ + GXIO_MPIPE_ERR_RULES_INVALID = -1163, + + /** Classifier is too big. */ + GXIO_MPIPE_ERR_CLASSIFIER_TOO_BIG = -1170, + + /** Classifier is too complex. */ + GXIO_MPIPE_ERR_CLASSIFIER_TOO_COMPLEX = -1171, + + /** Classifier has bad header. */ + GXIO_MPIPE_ERR_CLASSIFIER_BAD_HEADER = -1172, + + /** Classifier has bad contents. */ + GXIO_MPIPE_ERR_CLASSIFIER_BAD_CONTENTS = -1173, + + /** Classifier encountered invalid symbol. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_SYMBOL = -1174, + + /** Classifier encountered invalid bounds. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_BOUNDS = -1175, + + /** Classifier encountered invalid relocation. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_RELOCATION = -1176, + + /** Classifier encountered undefined symbol. */ + GXIO_MPIPE_ERR_CLASSIFIER_UNDEF_SYMBOL = -1177, + + + /********************************************************/ + /* TRIO Error Codes */ + /********************************************************/ + + /** Cannot allocate memory map region. */ + GXIO_TRIO_ERR_NO_MEMORY_MAP = -1180, + + /** Invalid memory map region number. */ + GXIO_TRIO_ERR_BAD_MEMORY_MAP = -1181, + + /** Cannot allocate scatter queue. */ + GXIO_TRIO_ERR_NO_SCATTER_QUEUE = -1182, + + /** Invalid scatter queue number. */ + GXIO_TRIO_ERR_BAD_SCATTER_QUEUE = -1183, + + /** Cannot allocate push DMA ring. */ + GXIO_TRIO_ERR_NO_PUSH_DMA_RING = -1184, + + /** Invalid push DMA ring index. */ + GXIO_TRIO_ERR_BAD_PUSH_DMA_RING = -1185, + + /** Cannot allocate pull DMA ring. */ + GXIO_TRIO_ERR_NO_PULL_DMA_RING = -1186, + + /** Invalid pull DMA ring index. */ + GXIO_TRIO_ERR_BAD_PULL_DMA_RING = -1187, + + /** Cannot allocate PIO region. */ + GXIO_TRIO_ERR_NO_PIO = -1188, + + /** Invalid PIO region index. */ + GXIO_TRIO_ERR_BAD_PIO = -1189, + + /** Cannot allocate ASID. */ + GXIO_TRIO_ERR_NO_ASID = -1190, + + /** Invalid ASID. */ + GXIO_TRIO_ERR_BAD_ASID = -1191, + + + /********************************************************/ + /* MICA Error Codes */ + /********************************************************/ + + /** No such accelerator type. */ + GXIO_MICA_ERR_BAD_ACCEL_TYPE = -1220, + + /** Cannot allocate context. */ + GXIO_MICA_ERR_NO_CONTEXT = -1221, + + /** PKA command queue is full, can't add another command. */ + GXIO_MICA_ERR_PKA_CMD_QUEUE_FULL = -1222, + + /** PKA result queue is empty, can't get a result from the queue. */ + GXIO_MICA_ERR_PKA_RESULT_QUEUE_EMPTY = -1223, + + /********************************************************/ + /* GPIO Error Codes */ + /********************************************************/ + + /** Pin not available. Either the physical pin does not exist, or + * it is reserved by the hypervisor for system usage. */ + GXIO_GPIO_ERR_PIN_UNAVAILABLE = -1240, + + /** Pin busy. The pin exists, and is available for use via GXIO, but + * it has been attached by some other process or driver. */ + GXIO_GPIO_ERR_PIN_BUSY = -1241, + + /** Cannot access unattached pin. One or more of the pins being + * manipulated by this call are not attached to the requesting + * context. */ + GXIO_GPIO_ERR_PIN_UNATTACHED = -1242, + + /** Invalid I/O mode for pin. The wiring of the pin in the system + * is such that the I/O mode or electrical control parameters + * requested could cause damage. */ + GXIO_GPIO_ERR_PIN_INVALID_MODE = -1243, + + /** Smallest iorpc error number. */ + GXIO_ERR_MIN = -1299 +}; + + +#endif /* !_HV_IORPC_H_ */ diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild new file mode 100644 index 00000000000..97dfbecec6b --- /dev/null +++ b/arch/tile/include/uapi/arch/Kbuild @@ -0,0 +1,17 @@ +# UAPI Header export list +header-y += abi.h +header-y += chip.h +header-y += chip_tilegx.h +header-y += chip_tilepro.h +header-y += icache.h +header-y += interrupts.h +header-y += interrupts_32.h +header-y += interrupts_64.h +header-y += opcode.h +header-y += opcode_tilegx.h +header-y += opcode_tilepro.h +header-y += sim.h +header-y += sim_def.h +header-y += spr_def.h +header-y += spr_def_32.h +header-y += spr_def_64.h diff --git a/arch/tile/include/arch/abi.h b/arch/tile/include/uapi/arch/abi.h index c55a3d43264..c55a3d43264 100644 --- a/arch/tile/include/arch/abi.h +++ b/arch/tile/include/uapi/arch/abi.h diff --git a/arch/tile/include/arch/chip.h b/arch/tile/include/uapi/arch/chip.h index 926d3db0e91..4c91f90b936 100644 --- a/arch/tile/include/arch/chip.h +++ b/arch/tile/include/uapi/arch/chip.h @@ -12,9 +12,7 @@ * more details. */ -#if __tile_chip__ == 0 -#include <arch/chip_tile64.h> -#elif __tile_chip__ == 1 +#if __tile_chip__ == 1 #include <arch/chip_tilepro.h> #elif defined(__tilegx__) #include <arch/chip_tilegx.h> diff --git a/arch/tile/include/arch/chip_tilegx.h b/arch/tile/include/uapi/arch/chip_tilegx.h index ea8e4f2c948..ea8e4f2c948 100644 --- a/arch/tile/include/arch/chip_tilegx.h +++ b/arch/tile/include/uapi/arch/chip_tilegx.h diff --git a/arch/tile/include/arch/chip_tilepro.h b/arch/tile/include/uapi/arch/chip_tilepro.h index 70017699a74..70017699a74 100644 --- a/arch/tile/include/arch/chip_tilepro.h +++ b/arch/tile/include/uapi/arch/chip_tilepro.h diff --git a/arch/tile/include/arch/icache.h b/arch/tile/include/uapi/arch/icache.h index 762eafa8a11..762eafa8a11 100644 --- a/arch/tile/include/arch/icache.h +++ b/arch/tile/include/uapi/arch/icache.h diff --git a/arch/tile/include/arch/interrupts.h b/arch/tile/include/uapi/arch/interrupts.h index 20f8f07d2de..20f8f07d2de 100644 --- a/arch/tile/include/arch/interrupts.h +++ b/arch/tile/include/uapi/arch/interrupts.h diff --git a/arch/tile/include/uapi/arch/interrupts_32.h b/arch/tile/include/uapi/arch/interrupts_32.h new file mode 100644 index 00000000000..2efe3f68b2d --- /dev/null +++ b/arch/tile/include/uapi/arch/interrupts_32.h @@ -0,0 +1,309 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __ARCH_INTERRUPTS_H__ +#define __ARCH_INTERRUPTS_H__ + +#ifndef __KERNEL__ +/** Mask for an interrupt. */ +/* Note: must handle breaking interrupts into high and low words manually. */ +#define INT_MASK_LO(intno) (1 << (intno)) +#define INT_MASK_HI(intno) (1 << ((intno) - 32)) + +#ifndef __ASSEMBLER__ +#define INT_MASK(intno) (1ULL << (intno)) +#endif +#endif + + +/** Where a given interrupt executes */ +#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8)) + +/** Where to store a vector for a given interrupt. */ +#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0) + +/** The base address of user-level interrupts. */ +#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0) + + +/** Additional synthetic interrupt. */ +#define INT_BREAKPOINT (63) + +#define INT_ITLB_MISS 0 +#define INT_MEM_ERROR 1 +#define INT_ILL 2 +#define INT_GPV 3 +#define INT_SN_ACCESS 4 +#define INT_IDN_ACCESS 5 +#define INT_UDN_ACCESS 6 +#define INT_IDN_REFILL 7 +#define INT_UDN_REFILL 8 +#define INT_IDN_COMPLETE 9 +#define INT_UDN_COMPLETE 10 +#define INT_SWINT_3 11 +#define INT_SWINT_2 12 +#define INT_SWINT_1 13 +#define INT_SWINT_0 14 +#define INT_UNALIGN_DATA 15 +#define INT_DTLB_MISS 16 +#define INT_DTLB_ACCESS 17 +#define INT_DMATLB_MISS 18 +#define INT_DMATLB_ACCESS 19 +#define INT_SNITLB_MISS 20 +#define INT_SN_NOTIFY 21 +#define INT_SN_FIREWALL 22 +#define INT_IDN_FIREWALL 23 +#define INT_UDN_FIREWALL 24 +#define INT_TILE_TIMER 25 +#define INT_IDN_TIMER 26 +#define INT_UDN_TIMER 27 +#define INT_DMA_NOTIFY 28 +#define INT_IDN_CA 29 +#define INT_UDN_CA 30 +#define INT_IDN_AVAIL 31 +#define INT_UDN_AVAIL 32 +#define INT_PERF_COUNT 33 +#define INT_INTCTRL_3 34 +#define INT_INTCTRL_2 35 +#define INT_INTCTRL_1 36 +#define INT_INTCTRL_0 37 +#define INT_BOOT_ACCESS 38 +#define INT_WORLD_ACCESS 39 +#define INT_I_ASID 40 +#define INT_D_ASID 41 +#define INT_DMA_ASID 42 +#define INT_SNI_ASID 43 +#define INT_DMA_CPL 44 +#define INT_SN_CPL 45 +#define INT_DOUBLE_FAULT 46 +#define INT_SN_STATIC_ACCESS 47 +#define INT_AUX_PERF_COUNT 48 + +#define NUM_INTERRUPTS 49 + +#ifndef __ASSEMBLER__ +#define QUEUED_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + 0) +#define NONQUEUED_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ + 0) +#define CRITICAL_MASKED_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + 0) +#define CRITICAL_UNMASKED_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ + 0) +#define MASKABLE_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + 0) +#define UNMASKABLE_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ + 0) +#define SYNC_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ + 0) +#define NON_SYNC_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + 0) +#endif /* !__ASSEMBLER__ */ +#endif /* !__ARCH_INTERRUPTS_H__ */ diff --git a/arch/tile/include/uapi/arch/interrupts_64.h b/arch/tile/include/uapi/arch/interrupts_64.h new file mode 100644 index 00000000000..13c9f918234 --- /dev/null +++ b/arch/tile/include/uapi/arch/interrupts_64.h @@ -0,0 +1,278 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __ARCH_INTERRUPTS_H__ +#define __ARCH_INTERRUPTS_H__ + +#ifndef __KERNEL__ +/** Mask for an interrupt. */ +#ifdef __ASSEMBLER__ +/* Note: must handle breaking interrupts into high and low words manually. */ +#define INT_MASK(intno) (1 << (intno)) +#else +#define INT_MASK(intno) (1ULL << (intno)) +#endif +#endif + + +/** Where a given interrupt executes */ +#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8)) + +/** Where to store a vector for a given interrupt. */ +#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0) + +/** The base address of user-level interrupts. */ +#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0) + + +/** Additional synthetic interrupt. */ +#define INT_BREAKPOINT (63) + +#define INT_MEM_ERROR 0 +#define INT_SINGLE_STEP_3 1 +#define INT_SINGLE_STEP_2 2 +#define INT_SINGLE_STEP_1 3 +#define INT_SINGLE_STEP_0 4 +#define INT_IDN_COMPLETE 5 +#define INT_UDN_COMPLETE 6 +#define INT_ITLB_MISS 7 +#define INT_ILL 8 +#define INT_GPV 9 +#define INT_IDN_ACCESS 10 +#define INT_UDN_ACCESS 11 +#define INT_SWINT_3 12 +#define INT_SWINT_2 13 +#define INT_SWINT_1 14 +#define INT_SWINT_0 15 +#define INT_ILL_TRANS 16 +#define INT_UNALIGN_DATA 17 +#define INT_DTLB_MISS 18 +#define INT_DTLB_ACCESS 19 +#define INT_IDN_FIREWALL 20 +#define INT_UDN_FIREWALL 21 +#define INT_TILE_TIMER 22 +#define INT_AUX_TILE_TIMER 23 +#define INT_IDN_TIMER 24 +#define INT_UDN_TIMER 25 +#define INT_IDN_AVAIL 26 +#define INT_UDN_AVAIL 27 +#define INT_IPI_3 28 +#define INT_IPI_2 29 +#define INT_IPI_1 30 +#define INT_IPI_0 31 +#define INT_PERF_COUNT 32 +#define INT_AUX_PERF_COUNT 33 +#define INT_INTCTRL_3 34 +#define INT_INTCTRL_2 35 +#define INT_INTCTRL_1 36 +#define INT_INTCTRL_0 37 +#define INT_BOOT_ACCESS 38 +#define INT_WORLD_ACCESS 39 +#define INT_I_ASID 40 +#define INT_D_ASID 41 +#define INT_DOUBLE_FAULT 42 + +#define NUM_INTERRUPTS 43 + +#ifndef __ASSEMBLER__ +#define QUEUED_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + 0) +#define NONQUEUED_INTERRUPTS ( \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + 0) +#define CRITICAL_MASKED_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + 0) +#define CRITICAL_UNMASKED_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + 0) +#define MASKABLE_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + 0) +#define UNMASKABLE_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + 0) +#define SYNC_INTERRUPTS ( \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + 0) +#define NON_SYNC_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + 0) +#endif /* !__ASSEMBLER__ */ +#endif /* !__ARCH_INTERRUPTS_H__ */ diff --git a/arch/tile/include/arch/opcode.h b/arch/tile/include/uapi/arch/opcode.h index 92d15229ece..92d15229ece 100644 --- a/arch/tile/include/arch/opcode.h +++ b/arch/tile/include/uapi/arch/opcode.h diff --git a/arch/tile/include/arch/opcode_tilegx.h b/arch/tile/include/uapi/arch/opcode_tilegx.h index c14d02c8160..d76ff2db745 100644 --- a/arch/tile/include/arch/opcode_tilegx.h +++ b/arch/tile/include/uapi/arch/opcode_tilegx.h @@ -61,6 +61,7 @@ typedef tilegx_bundle_bits tile_bundle_bits; #define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES #define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \ TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES +#define TILE_BPT_BUNDLE TILEGX_BPT_BUNDLE /* 64-bit pattern for a { bpt ; nop } bundle. */ #define TILEGX_BPT_BUNDLE 0x286a44ae51485000ULL diff --git a/arch/tile/include/arch/opcode_tilepro.h b/arch/tile/include/uapi/arch/opcode_tilepro.h index 71b763b8ce8..4451cff1a86 100644 --- a/arch/tile/include/arch/opcode_tilepro.h +++ b/arch/tile/include/uapi/arch/opcode_tilepro.h @@ -71,6 +71,7 @@ typedef tilepro_bundle_bits tile_bundle_bits; #define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES #define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \ TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES +#define TILE_BPT_BUNDLE TILEPRO_BPT_BUNDLE /* 64-bit pattern for a { bpt ; nop } bundle. */ #define TILEPRO_BPT_BUNDLE 0x400b3cae70166000ULL diff --git a/arch/tile/include/arch/sim.h b/arch/tile/include/uapi/arch/sim.h index e54b7b0527f..e54b7b0527f 100644 --- a/arch/tile/include/arch/sim.h +++ b/arch/tile/include/uapi/arch/sim.h diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/uapi/arch/sim_def.h index 4b44a2b6a09..4b44a2b6a09 100644 --- a/arch/tile/include/arch/sim_def.h +++ b/arch/tile/include/uapi/arch/sim_def.h diff --git a/arch/tile/include/uapi/arch/spr_def.h b/arch/tile/include/uapi/arch/spr_def.h new file mode 100644 index 00000000000..c250c5adb1a --- /dev/null +++ b/arch/tile/include/uapi/arch/spr_def.h @@ -0,0 +1,26 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _UAPI__ARCH_SPR_DEF_H__ +#define _UAPI__ARCH_SPR_DEF_H__ + +/* Include the proper base SPR definition file. */ +#ifdef __tilegx__ +#include <arch/spr_def_64.h> +#else +#include <arch/spr_def_32.h> +#endif + + +#endif /* _UAPI__ARCH_SPR_DEF_H__ */ diff --git a/arch/tile/include/arch/spr_def_32.h b/arch/tile/include/uapi/arch/spr_def_32.h index bbc1f4c924e..78daa3146d2 100644 --- a/arch/tile/include/arch/spr_def_32.h +++ b/arch/tile/include/uapi/arch/spr_def_32.h @@ -14,8 +14,8 @@ #ifndef __DOXYGEN__ -#ifndef __ARCH_SPR_DEF_H__ -#define __ARCH_SPR_DEF_H__ +#ifndef __ARCH_SPR_DEF_32_H__ +#define __ARCH_SPR_DEF_32_H__ #define SPR_AUX_PERF_COUNT_0 0x6005 #define SPR_AUX_PERF_COUNT_1 0x6006 @@ -65,6 +65,31 @@ #define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1 #define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4 #define SPR_FAIL 0x4e09 +#define SPR_IDN_AVAIL_EN 0x3e05 +#define SPR_IDN_CA_DATA 0x0b00 +#define SPR_IDN_DATA_AVAIL 0x0b03 +#define SPR_IDN_DEADLOCK_TIMEOUT 0x3406 +#define SPR_IDN_DEMUX_CA_COUNT 0x0a05 +#define SPR_IDN_DEMUX_COUNT_0 0x0a06 +#define SPR_IDN_DEMUX_COUNT_1 0x0a07 +#define SPR_IDN_DEMUX_CTL 0x0a08 +#define SPR_IDN_DEMUX_QUEUE_SEL 0x0a0a +#define SPR_IDN_DEMUX_STATUS 0x0a0b +#define SPR_IDN_DEMUX_WRITE_FIFO 0x0a0c +#define SPR_IDN_DIRECTION_PROTECT 0x2e05 +#define SPR_IDN_PENDING 0x0a0e +#define SPR_IDN_REFILL_EN 0x0e05 +#define SPR_IDN_SP_FIFO_DATA 0x0a0f +#define SPR_IDN_SP_FIFO_SEL 0x0a10 +#define SPR_IDN_SP_FREEZE 0x0a11 +#define SPR_IDN_SP_FREEZE__SP_FRZ_MASK 0x1 +#define SPR_IDN_SP_FREEZE__DEMUX_FRZ_MASK 0x2 +#define SPR_IDN_SP_FREEZE__NON_DEST_EXT_MASK 0x4 +#define SPR_IDN_SP_STATE 0x0a12 +#define SPR_IDN_TAG_0 0x0a13 +#define SPR_IDN_TAG_1 0x0a14 +#define SPR_IDN_TAG_VALID 0x0a15 +#define SPR_IDN_TILE_COORD 0x0a16 #define SPR_INTCTRL_0_STATUS 0x4a07 #define SPR_INTCTRL_1_STATUS 0x4807 #define SPR_INTCTRL_2_STATUS 0x4607 @@ -87,12 +112,36 @@ #define SPR_INTERRUPT_MASK_SET_1_1 0x480e #define SPR_INTERRUPT_MASK_SET_2_0 0x460c #define SPR_INTERRUPT_MASK_SET_2_1 0x460d +#define SPR_MPL_AUX_PERF_COUNT_SET_0 0x6000 +#define SPR_MPL_AUX_PERF_COUNT_SET_1 0x6001 +#define SPR_MPL_AUX_PERF_COUNT_SET_2 0x6002 #define SPR_MPL_DMA_CPL_SET_0 0x5800 #define SPR_MPL_DMA_CPL_SET_1 0x5801 #define SPR_MPL_DMA_CPL_SET_2 0x5802 #define SPR_MPL_DMA_NOTIFY_SET_0 0x3800 #define SPR_MPL_DMA_NOTIFY_SET_1 0x3801 #define SPR_MPL_DMA_NOTIFY_SET_2 0x3802 +#define SPR_MPL_IDN_ACCESS_SET_0 0x0a00 +#define SPR_MPL_IDN_ACCESS_SET_1 0x0a01 +#define SPR_MPL_IDN_ACCESS_SET_2 0x0a02 +#define SPR_MPL_IDN_AVAIL_SET_0 0x3e00 +#define SPR_MPL_IDN_AVAIL_SET_1 0x3e01 +#define SPR_MPL_IDN_AVAIL_SET_2 0x3e02 +#define SPR_MPL_IDN_CA_SET_0 0x3a00 +#define SPR_MPL_IDN_CA_SET_1 0x3a01 +#define SPR_MPL_IDN_CA_SET_2 0x3a02 +#define SPR_MPL_IDN_COMPLETE_SET_0 0x1200 +#define SPR_MPL_IDN_COMPLETE_SET_1 0x1201 +#define SPR_MPL_IDN_COMPLETE_SET_2 0x1202 +#define SPR_MPL_IDN_FIREWALL_SET_0 0x2e00 +#define SPR_MPL_IDN_FIREWALL_SET_1 0x2e01 +#define SPR_MPL_IDN_FIREWALL_SET_2 0x2e02 +#define SPR_MPL_IDN_REFILL_SET_0 0x0e00 +#define SPR_MPL_IDN_REFILL_SET_1 0x0e01 +#define SPR_MPL_IDN_REFILL_SET_2 0x0e02 +#define SPR_MPL_IDN_TIMER_SET_0 0x3400 +#define SPR_MPL_IDN_TIMER_SET_1 0x3401 +#define SPR_MPL_IDN_TIMER_SET_2 0x3402 #define SPR_MPL_INTCTRL_0_SET_0 0x4a00 #define SPR_MPL_INTCTRL_0_SET_1 0x4a01 #define SPR_MPL_INTCTRL_0_SET_2 0x4a02 @@ -102,6 +151,9 @@ #define SPR_MPL_INTCTRL_2_SET_0 0x4600 #define SPR_MPL_INTCTRL_2_SET_1 0x4601 #define SPR_MPL_INTCTRL_2_SET_2 0x4602 +#define SPR_MPL_PERF_COUNT_SET_0 0x4200 +#define SPR_MPL_PERF_COUNT_SET_1 0x4201 +#define SPR_MPL_PERF_COUNT_SET_2 0x4202 #define SPR_MPL_SN_ACCESS_SET_0 0x0800 #define SPR_MPL_SN_ACCESS_SET_1 0x0801 #define SPR_MPL_SN_ACCESS_SET_2 0x0802 @@ -148,8 +200,6 @@ #define SPR_SIM_CONTROL 0x4e0c #define SPR_SNCTL 0x0805 #define SPR_SNCTL__FRZFABRIC_MASK 0x1 -#define SPR_SNCTL__FRZPROC_MASK 0x2 -#define SPR_SNPC 0x080b #define SPR_SNSTATIC 0x080c #define SPR_SYSTEM_SAVE_0_0 0x4b00 #define SPR_SYSTEM_SAVE_0_1 0x4b01 @@ -181,6 +231,7 @@ #define SPR_UDN_DEMUX_STATUS 0x0c0d #define SPR_UDN_DEMUX_WRITE_FIFO 0x0c0e #define SPR_UDN_DIRECTION_PROTECT 0x3005 +#define SPR_UDN_PENDING 0x0c10 #define SPR_UDN_REFILL_EN 0x1005 #define SPR_UDN_SP_FIFO_DATA 0x0c11 #define SPR_UDN_SP_FIFO_SEL 0x0c12 @@ -195,7 +246,10 @@ #define SPR_UDN_TAG_3 0x0c18 #define SPR_UDN_TAG_VALID 0x0c19 #define SPR_UDN_TILE_COORD 0x0c1a +#define SPR_WATCH_CTL 0x4209 +#define SPR_WATCH_MASK 0x420a +#define SPR_WATCH_VAL 0x420b -#endif /* !defined(__ARCH_SPR_DEF_H__) */ +#endif /* !defined(__ARCH_SPR_DEF_32_H__) */ #endif /* !defined(__DOXYGEN__) */ diff --git a/arch/tile/include/arch/spr_def_64.h b/arch/tile/include/uapi/arch/spr_def_64.h index cd3e5f95d5f..67a6c1751e3 100644 --- a/arch/tile/include/arch/spr_def_64.h +++ b/arch/tile/include/uapi/arch/spr_def_64.h @@ -14,8 +14,8 @@ #ifndef __DOXYGEN__ -#ifndef __ARCH_SPR_DEF_H__ -#define __ARCH_SPR_DEF_H__ +#ifndef __ARCH_SPR_DEF_64_H__ +#define __ARCH_SPR_DEF_64_H__ #define SPR_AUX_PERF_COUNT_0 0x2105 #define SPR_AUX_PERF_COUNT_1 0x2106 @@ -52,6 +52,13 @@ #define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1 #define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4 #define SPR_FAIL 0x2707 +#define SPR_IDN_AVAIL_EN 0x1a05 +#define SPR_IDN_DATA_AVAIL 0x0a80 +#define SPR_IDN_DEADLOCK_TIMEOUT 0x1806 +#define SPR_IDN_DEMUX_COUNT_0 0x0a05 +#define SPR_IDN_DEMUX_COUNT_1 0x0a06 +#define SPR_IDN_DIRECTION_PROTECT 0x1405 +#define SPR_IDN_PENDING 0x0a08 #define SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK 0x1 #define SPR_INTCTRL_0_STATUS 0x2505 #define SPR_INTCTRL_1_STATUS 0x2405 @@ -88,9 +95,27 @@ #define SPR_IPI_MASK_SET_0 0x1f0a #define SPR_IPI_MASK_SET_1 0x1e0a #define SPR_IPI_MASK_SET_2 0x1d0a +#define SPR_MPL_AUX_PERF_COUNT_SET_0 0x2100 +#define SPR_MPL_AUX_PERF_COUNT_SET_1 0x2101 +#define SPR_MPL_AUX_PERF_COUNT_SET_2 0x2102 #define SPR_MPL_AUX_TILE_TIMER_SET_0 0x1700 #define SPR_MPL_AUX_TILE_TIMER_SET_1 0x1701 #define SPR_MPL_AUX_TILE_TIMER_SET_2 0x1702 +#define SPR_MPL_IDN_ACCESS_SET_0 0x0a00 +#define SPR_MPL_IDN_ACCESS_SET_1 0x0a01 +#define SPR_MPL_IDN_ACCESS_SET_2 0x0a02 +#define SPR_MPL_IDN_AVAIL_SET_0 0x1a00 +#define SPR_MPL_IDN_AVAIL_SET_1 0x1a01 +#define SPR_MPL_IDN_AVAIL_SET_2 0x1a02 +#define SPR_MPL_IDN_COMPLETE_SET_0 0x0500 +#define SPR_MPL_IDN_COMPLETE_SET_1 0x0501 +#define SPR_MPL_IDN_COMPLETE_SET_2 0x0502 +#define SPR_MPL_IDN_FIREWALL_SET_0 0x1400 +#define SPR_MPL_IDN_FIREWALL_SET_1 0x1401 +#define SPR_MPL_IDN_FIREWALL_SET_2 0x1402 +#define SPR_MPL_IDN_TIMER_SET_0 0x1800 +#define SPR_MPL_IDN_TIMER_SET_1 0x1801 +#define SPR_MPL_IDN_TIMER_SET_2 0x1802 #define SPR_MPL_INTCTRL_0_SET_0 0x2500 #define SPR_MPL_INTCTRL_0_SET_1 0x2501 #define SPR_MPL_INTCTRL_0_SET_2 0x2502 @@ -100,6 +125,21 @@ #define SPR_MPL_INTCTRL_2_SET_0 0x2300 #define SPR_MPL_INTCTRL_2_SET_1 0x2301 #define SPR_MPL_INTCTRL_2_SET_2 0x2302 +#define SPR_MPL_IPI_0 0x1f04 +#define SPR_MPL_IPI_0_SET_0 0x1f00 +#define SPR_MPL_IPI_0_SET_1 0x1f01 +#define SPR_MPL_IPI_0_SET_2 0x1f02 +#define SPR_MPL_IPI_1 0x1e04 +#define SPR_MPL_IPI_1_SET_0 0x1e00 +#define SPR_MPL_IPI_1_SET_1 0x1e01 +#define SPR_MPL_IPI_1_SET_2 0x1e02 +#define SPR_MPL_IPI_2 0x1d04 +#define SPR_MPL_IPI_2_SET_0 0x1d00 +#define SPR_MPL_IPI_2_SET_1 0x1d01 +#define SPR_MPL_IPI_2_SET_2 0x1d02 +#define SPR_MPL_PERF_COUNT_SET_0 0x2000 +#define SPR_MPL_PERF_COUNT_SET_1 0x2001 +#define SPR_MPL_PERF_COUNT_SET_2 0x2002 #define SPR_MPL_UDN_ACCESS_SET_0 0x0b00 #define SPR_MPL_UDN_ACCESS_SET_1 0x0b01 #define SPR_MPL_UDN_ACCESS_SET_2 0x0b02 @@ -167,7 +207,10 @@ #define SPR_UDN_DEMUX_COUNT_2 0x0b07 #define SPR_UDN_DEMUX_COUNT_3 0x0b08 #define SPR_UDN_DIRECTION_PROTECT 0x1505 +#define SPR_UDN_PENDING 0x0b0a +#define SPR_WATCH_MASK 0x200a +#define SPR_WATCH_VAL 0x200b -#endif /* !defined(__ARCH_SPR_DEF_H__) */ +#endif /* !defined(__ARCH_SPR_DEF_64_H__) */ #endif /* !defined(__DOXYGEN__) */ diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild new file mode 100644 index 00000000000..c20db8e428b --- /dev/null +++ b/arch/tile/include/uapi/asm/Kbuild @@ -0,0 +1,21 @@ +# UAPI Header export list +include include/uapi/asm-generic/Kbuild.asm + +header-y += auxvec.h +header-y += bitsperlong.h +header-y += byteorder.h +header-y += cachectl.h +header-y += hardwall.h +header-y += kvm_para.h +header-y += mman.h +header-y += ptrace.h +header-y += setup.h +header-y += sigcontext.h +header-y += siginfo.h +header-y += signal.h +header-y += stat.h +header-y += swab.h +header-y += ucontext.h +header-y += unistd.h + +generic-y += ucontext.h diff --git a/arch/tile/include/asm/auxvec.h b/arch/tile/include/uapi/asm/auxvec.h index 1d393edb064..c93e92709f1 100644 --- a/arch/tile/include/asm/auxvec.h +++ b/arch/tile/include/uapi/asm/auxvec.h @@ -15,6 +15,7 @@ #ifndef _ASM_TILE_AUXVEC_H #define _ASM_TILE_AUXVEC_H -/* No extensions to auxvec */ +/* The vDSO location. */ +#define AT_SYSINFO_EHDR 33 #endif /* _ASM_TILE_AUXVEC_H */ diff --git a/arch/tile/include/asm/bitsperlong.h b/arch/tile/include/uapi/asm/bitsperlong.h index 58c771f2af2..58c771f2af2 100644 --- a/arch/tile/include/asm/bitsperlong.h +++ b/arch/tile/include/uapi/asm/bitsperlong.h diff --git a/arch/tile/include/uapi/asm/byteorder.h b/arch/tile/include/uapi/asm/byteorder.h new file mode 100644 index 00000000000..fb72ecf4921 --- /dev/null +++ b/arch/tile/include/uapi/asm/byteorder.h @@ -0,0 +1,21 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#if defined (__BIG_ENDIAN__) +#include <linux/byteorder/big_endian.h> +#elif defined (__LITTLE_ENDIAN__) +#include <linux/byteorder/little_endian.h> +#else +#error "__BIG_ENDIAN__ or __LITTLE_ENDIAN__ must be defined." +#endif diff --git a/arch/tile/include/uapi/asm/cachectl.h b/arch/tile/include/uapi/asm/cachectl.h new file mode 100644 index 00000000000..572ddcad209 --- /dev/null +++ b/arch/tile/include/uapi/asm/cachectl.h @@ -0,0 +1,42 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_CACHECTL_H +#define _ASM_TILE_CACHECTL_H + +/* + * Options for cacheflush system call. + * + * The ICACHE flush is performed on all cores currently running the + * current process's address space. The intent is for user + * applications to be able to modify code, invoke the system call, + * then allow arbitrary other threads in the same address space to see + * the newly-modified code. Passing a length of CHIP_L1I_CACHE_SIZE() + * or more invalidates the entire icache on all cores in the address + * spaces. (Note: currently this option invalidates the entire icache + * regardless of the requested address and length, but we may choose + * to honor the arguments at some point.) + * + * Flush and invalidation of memory can normally be performed with the + * __insn_flush() and __insn_finv() instructions from userspace. + * The DCACHE option to the system call allows userspace + * to flush the entire L1+L2 data cache from the core. In this case, + * the address and length arguments are not used. The DCACHE flush is + * restricted to the current core, not all cores in the address space. + */ +#define ICACHE (1<<0) /* invalidate L1 instruction cache */ +#define DCACHE (1<<1) /* flush and invalidate data cache */ +#define BCACHE (ICACHE|DCACHE) /* flush both caches */ + +#endif /* _ASM_TILE_CACHECTL_H */ diff --git a/arch/tile/include/uapi/asm/hardwall.h b/arch/tile/include/uapi/asm/hardwall.h new file mode 100644 index 00000000000..c2169d4f401 --- /dev/null +++ b/arch/tile/include/uapi/asm/hardwall.h @@ -0,0 +1,51 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Provide methods for access control of per-cpu resources like + * UDN, IDN, or IPI. + */ + +#ifndef _UAPI_ASM_TILE_HARDWALL_H +#define _UAPI_ASM_TILE_HARDWALL_H + +#include <arch/chip.h> +#include <linux/ioctl.h> + +#define HARDWALL_IOCTL_BASE 0xa2 + +/* + * The HARDWALL_CREATE() ioctl is a macro with a "size" argument. + * The resulting ioctl value is passed to the kernel in conjunction + * with a pointer to a standard kernel bitmask of cpus. + * For network resources (UDN or IDN) the bitmask must physically + * represent a rectangular configuration on the chip. + * The "size" is the number of bytes of cpu mask data. + */ +#define _HARDWALL_CREATE 1 +#define HARDWALL_CREATE(size) \ + _IOC(_IOC_READ, HARDWALL_IOCTL_BASE, _HARDWALL_CREATE, (size)) + +#define _HARDWALL_ACTIVATE 2 +#define HARDWALL_ACTIVATE \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_ACTIVATE) + +#define _HARDWALL_DEACTIVATE 3 +#define HARDWALL_DEACTIVATE \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE) + +#define _HARDWALL_GET_ID 4 +#define HARDWALL_GET_ID \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID) + + +#endif /* _UAPI_ASM_TILE_HARDWALL_H */ diff --git a/arch/tile/include/uapi/asm/kvm_para.h b/arch/tile/include/uapi/asm/kvm_para.h new file mode 100644 index 00000000000..14fab8f0b95 --- /dev/null +++ b/arch/tile/include/uapi/asm/kvm_para.h @@ -0,0 +1 @@ +#include <asm-generic/kvm_para.h> diff --git a/arch/tile/include/asm/mman.h b/arch/tile/include/uapi/asm/mman.h index 81b8fc348d6..81b8fc348d6 100644 --- a/arch/tile/include/asm/mman.h +++ b/arch/tile/include/uapi/asm/mman.h diff --git a/arch/tile/include/uapi/asm/ptrace.h b/arch/tile/include/uapi/asm/ptrace.h new file mode 100644 index 00000000000..7757e1985fb --- /dev/null +++ b/arch/tile/include/uapi/asm/ptrace.h @@ -0,0 +1,94 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _UAPI_ASM_TILE_PTRACE_H +#define _UAPI_ASM_TILE_PTRACE_H + +#include <arch/chip.h> +#include <arch/abi.h> + +/* These must match struct pt_regs, below. */ +#if CHIP_WORD_SIZE() == 32 +#define PTREGS_OFFSET_REG(n) ((n)*4) +#else +#define PTREGS_OFFSET_REG(n) ((n)*8) +#endif +#define PTREGS_OFFSET_BASE 0 +#define PTREGS_OFFSET_TP PTREGS_OFFSET_REG(53) +#define PTREGS_OFFSET_SP PTREGS_OFFSET_REG(54) +#define PTREGS_OFFSET_LR PTREGS_OFFSET_REG(55) +#define PTREGS_NR_GPRS 56 +#define PTREGS_OFFSET_PC PTREGS_OFFSET_REG(56) +#define PTREGS_OFFSET_EX1 PTREGS_OFFSET_REG(57) +#define PTREGS_OFFSET_FAULTNUM PTREGS_OFFSET_REG(58) +#define PTREGS_OFFSET_ORIG_R0 PTREGS_OFFSET_REG(59) +#define PTREGS_OFFSET_FLAGS PTREGS_OFFSET_REG(60) +#if CHIP_HAS_CMPEXCH() +#define PTREGS_OFFSET_CMPEXCH PTREGS_OFFSET_REG(61) +#endif +#define PTREGS_SIZE PTREGS_OFFSET_REG(64) + + +#ifndef __ASSEMBLY__ + +#ifndef __KERNEL__ +/* Provide appropriate length type to userspace regardless of -m32/-m64. */ +typedef uint_reg_t pt_reg_t; +#endif + +/* + * This struct defines the way the registers are stored on the stack during a + * system call or exception. "struct sigcontext" has the same shape. + */ +struct pt_regs { + /* Saved main processor registers; 56..63 are special. */ + /* tp, sp, and lr must immediately follow regs[] for aliasing. */ + pt_reg_t regs[53]; + pt_reg_t tp; /* aliases regs[TREG_TP] */ + pt_reg_t sp; /* aliases regs[TREG_SP] */ + pt_reg_t lr; /* aliases regs[TREG_LR] */ + + /* Saved special registers. */ + pt_reg_t pc; /* stored in EX_CONTEXT_K_0 */ + pt_reg_t ex1; /* stored in EX_CONTEXT_K_1 (PL and ICS bit) */ + pt_reg_t faultnum; /* fault number (INT_SWINT_1 for syscall) */ + pt_reg_t orig_r0; /* r0 at syscall entry, else zero */ + pt_reg_t flags; /* flags (see below) */ +#if !CHIP_HAS_CMPEXCH() + pt_reg_t pad[3]; +#else + pt_reg_t cmpexch; /* value of CMPEXCH_VALUE SPR at interrupt */ + pt_reg_t pad[2]; +#endif +}; + +#endif /* __ASSEMBLY__ */ + +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 + +/* Support TILE-specific ptrace options, with events starting at 16. */ +#define PTRACE_EVENT_MIGRATE 16 +#define PTRACE_O_TRACEMIGRATE (1 << PTRACE_EVENT_MIGRATE) + +/* + * Flag bits in pt_regs.flags that are part of the ptrace API. + * We start our numbering higher up to avoid confusion with the + * non-ABI kernel-internal values that use the low 16 bits. + */ +#define PT_FLAGS_COMPAT 0x10000 /* process is an -m32 compat process */ + +#endif /* _UAPI_ASM_TILE_PTRACE_H */ diff --git a/arch/tile/include/asm/hw_irq.h b/arch/tile/include/uapi/asm/setup.h index 4fac5fbf333..e6f7da265ac 100644 --- a/arch/tile/include/asm/hw_irq.h +++ b/arch/tile/include/uapi/asm/setup.h @@ -12,7 +12,10 @@ * more details. */ -#ifndef _ASM_TILE_HW_IRQ_H -#define _ASM_TILE_HW_IRQ_H +#ifndef _UAPI_ASM_TILE_SETUP_H +#define _UAPI_ASM_TILE_SETUP_H -#endif /* _ASM_TILE_HW_IRQ_H */ +#define COMMAND_LINE_SIZE 2048 + + +#endif /* _UAPI_ASM_TILE_SETUP_H */ diff --git a/arch/tile/include/asm/sigcontext.h b/arch/tile/include/uapi/asm/sigcontext.h index 6348e59d372..6348e59d372 100644 --- a/arch/tile/include/asm/sigcontext.h +++ b/arch/tile/include/uapi/asm/sigcontext.h diff --git a/arch/tile/include/asm/siginfo.h b/arch/tile/include/uapi/asm/siginfo.h index 56d661bb010..56d661bb010 100644 --- a/arch/tile/include/asm/siginfo.h +++ b/arch/tile/include/uapi/asm/siginfo.h diff --git a/arch/tile/include/uapi/asm/signal.h b/arch/tile/include/uapi/asm/signal.h new file mode 100644 index 00000000000..ef0d32d84a4 --- /dev/null +++ b/arch/tile/include/uapi/asm/signal.h @@ -0,0 +1,27 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _UAPI_ASM_TILE_SIGNAL_H +#define _UAPI_ASM_TILE_SIGNAL_H + +/* Do not notify a ptracer when this signal is handled. */ +#define SA_NOPTRACE 0x02000000u + +/* Used in earlier Tilera releases, so keeping for binary compatibility. */ +#define SA_RESTORER 0x04000000u + +#include <asm-generic/signal.h> + + +#endif /* _UAPI_ASM_TILE_SIGNAL_H */ diff --git a/arch/tile/include/asm/stat.h b/arch/tile/include/uapi/asm/stat.h index c0db34d56be..c0db34d56be 100644 --- a/arch/tile/include/asm/stat.h +++ b/arch/tile/include/uapi/asm/stat.h diff --git a/arch/tile/include/asm/swab.h b/arch/tile/include/uapi/asm/swab.h index 7c37b38f6c8..7c37b38f6c8 100644 --- a/arch/tile/include/asm/swab.h +++ b/arch/tile/include/uapi/asm/swab.h diff --git a/arch/tile/include/uapi/asm/unistd.h b/arch/tile/include/uapi/asm/unistd.h new file mode 100644 index 00000000000..3866397aaf5 --- /dev/null +++ b/arch/tile/include/uapi/asm/unistd.h @@ -0,0 +1,36 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#if !defined(__LP64__) || defined(__SYSCALL_COMPAT) +/* Use the flavor of this syscall that matches the 32-bit API better. */ +#define __ARCH_WANT_SYNC_FILE_RANGE2 +#endif + +/* Use the standard ABI for syscalls. */ +#include <asm-generic/unistd.h> + +#define NR_syscalls __NR_syscalls + +/* Additional Tilera-specific syscalls. */ +#define __NR_cacheflush (__NR_arch_specific_syscall + 1) +__SYSCALL(__NR_cacheflush, sys_cacheflush) + +#ifndef __tilegx__ +/* "Fast" syscalls provide atomic support for 32-bit chips. */ +#define __NR_FAST_cmpxchg -1 +#define __NR_FAST_atomic_update -2 +#define __NR_FAST_cmpxchg64 -3 +#define __NR_cmpxchg_badaddr (__NR_arch_specific_syscall + 0) +__SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr) +#endif |
