diff options
Diffstat (limited to 'arch/tile')
304 files changed, 36875 insertions, 11199 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index e32b0c23c4c..4f3006b600e 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -3,15 +3,30 @@ config TILE def_bool y + select HAVE_PERF_EVENTS + select USE_PMC if PERF_EVENTS + select HAVE_DMA_ATTRS + select HAVE_DMA_API_DEBUG select HAVE_KVM if !TILEGX select GENERIC_FIND_FIRST_BIT - select GENERIC_FIND_NEXT_BIT - select USE_GENERIC_SMP_HELPERS + select SYSCTL_EXCEPTION_TRACE select CC_OPTIMIZE_FOR_SIZE - select HAVE_GENERIC_HARDIRQS + select HAVE_DEBUG_KMEMLEAK select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW + select HAVE_DEBUG_BUGVERBOSE + select VIRT_TO_BUS + select SYS_HYPERVISOR + select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAVE_NMI_SAFE_CMPXCHG + select GENERIC_CLOCKEVENTS + select MODULES_USE_ELF_RELA + select HAVE_ARCH_TRACEHOOK + select HAVE_SYSCALL_TRACEPOINTS + select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select HAVE_DEBUG_STACKOVERFLOW + select ARCH_WANT_FRAME_POINTERS # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT @@ -30,9 +45,6 @@ config MMU config GENERIC_CSUM def_bool y -config SEMAPHORE_SLEEPERS - def_bool y - config HAVE_ARCH_ALLOC_REMAP def_bool y @@ -45,22 +57,25 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK config SYS_SUPPORTS_HUGETLBFS def_bool y -config GENERIC_TIME +# Support for additional huge page sizes besides HPAGE_SIZE. +# The software support is currently only present in the TILE-Gx +# hypervisor. TILEPro in any case does not support page sizes +# larger than the default HPAGE_SIZE. +config HUGETLB_SUPER_PAGES + depends on HUGETLB_PAGE && TILEGX def_bool y -config GENERIC_CLOCKEVENTS +config GENERIC_TIME_VSYSCALL def_bool y +# Enable PMC if PERF_EVENTS, OPROFILE, or WATCHPOINTS are enabled. +config USE_PMC + bool + # FIXME: tilegx can implement a more efficient rwsem. config RWSEM_GENERIC_SPINLOCK def_bool y -# We have a very flat architecture from a migration point of view, -# so save boot time by presetting this (particularly useful on tile-sim). -config DEFAULT_MIGRATION_COST - int - default "10000000" - # We only support gcc 4.4 and above, so this should work. config ARCH_SUPPORTS_OPTIMIZED_INLINING def_bool y @@ -71,6 +86,12 @@ config ARCH_PHYS_ADDR_T_64BIT config ARCH_DMA_ADDR_T_64BIT def_bool y +config NEED_DMA_MAP_STATE + def_bool y + +config ARCH_HAS_DMA_SET_COHERENT_MASK + bool + config LOCKDEP_SUPPORT def_bool y @@ -96,35 +117,40 @@ config STRICT_DEVMEM config SMP def_bool y -# Allow checking for compile-time determined overflow errors in -# copy_from_user(). There are still unprovable places in the -# generic code as of 2.6.34, so this option is not really compatible -# with -Werror, which is more useful in general. -config DEBUG_COPY_FROM_USER - def_bool n - config HVC_TILE + depends on TTY select HVC_DRIVER + select HVC_IRQ if TILEGX def_bool y -# Please note: TILE-Gx support is not yet finalized; this is -# the preliminary support. TILE-Gx drivers are only provided -# with the alpha or beta test versions for Tilera customers. config TILEGX - depends on EXPERIMENTAL - bool "Building with TILE-Gx (64-bit) compiler and toolchain" + bool "Building for TILE-Gx (64-bit) processor" + select SPARSE_IRQ + select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ + select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_KPROBES + select HAVE_KRETPROBES + select HAVE_ARCH_KGDB + +config TILEPRO + def_bool !TILEGX config 64BIT - depends on TILEGX - def_bool y + def_bool TILEGX config ARCH_DEFCONFIG string - default "arch/tile/configs/tile_defconfig" if !TILEGX + default "arch/tile/configs/tilepro_defconfig" if !TILEGX default "arch/tile/configs/tilegx_defconfig" if TILEGX source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "Tilera-specific configuration" config NR_CPUS @@ -137,7 +163,30 @@ config NR_CPUS smaller kernel memory footprint results from using a smaller value on chips with fewer tiles. -source "kernel/time/Kconfig" +if TILEGX + +choice + prompt "Kernel page size" + default PAGE_SIZE_64KB + help + This lets you select the page size of the kernel. For best + performance on memory-intensive applications, a page size of 64KB + is recommended. For workloads involving many small files, many + connections, etc., it may be better to select 16KB, which uses + memory more efficiently at some cost in TLB performance. + + Note that this option is TILE-Gx specific; currently + TILEPro page size is set by rebuilding the hypervisor. + +config PAGE_SIZE_16KB + bool "16KB" + +config PAGE_SIZE_64KB + bool "64KB" + +endchoice + +endif source "kernel/Kconfig.hz" @@ -164,7 +213,7 @@ config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC -# We do not currently support disabling HIGHMEM on tile64 and tilepro. +# We do not currently support disabling HIGHMEM on tilepro. config HIGHMEM bool # "Support for more than 512 MB of RAM" default !TILEGX @@ -183,6 +232,22 @@ config HIGHMEM If unsure, say "true". +config ZONE_DMA + def_bool y + +config IOMMU_HELPER + bool + +config NEED_SG_DMA_LENGTH + bool + +config SWIOTLB + bool + default TILEGX + select IOMMU_HELPER + select NEED_SG_DMA_LENGTH + select ARCH_HAS_DMA_SET_COHERENT_MASK + # We do not currently support disabling NUMA. config NUMA bool # "NUMA Memory Allocation and Scheduler Support" @@ -242,6 +307,7 @@ endchoice config PAGE_OFFSET hex + depends on !64BIT default 0xF0000000 if VMSPLIT_3_75G default 0xE0000000 if VMSPLIT_3_5G default 0xB0000000 if VMSPLIT_2_75G @@ -253,6 +319,8 @@ config PAGE_OFFSET source "mm/Kconfig" +source "kernel/Kconfig.preempt" + config CMDLINE_BOOL bool "Built-in kernel command line" default n @@ -300,7 +368,7 @@ config CMDLINE_OVERRIDE config VMALLOC_RESERVE hex - default 0x1000000 + default 0x2000000 config HARDWALL bool "Hardwall support to allow access to user dynamic network" @@ -309,11 +377,19 @@ config HARDWALL config KERNEL_PL int "Processor protection level for kernel" range 1 2 - default "1" + default 2 if TILEGX + default 1 if !TILEGX ---help--- - This setting determines the processor protection level the - kernel will be built to run at. Generally you should use - the default value here. + Since MDE 4.2, the Tilera hypervisor runs the kernel + at PL2 by default. If running under an older hypervisor, + or as a KVM guest, you must run at PL1. (The current + hypervisor may also be recompiled with "make HV_PL=2" to + allow it to run a kernel at PL1, but clients running at PL1 + are not expected to be supported indefinitely.) + + If you're not sure, don't change the default. + +source "arch/tile/gxio/Kconfig" endmenu # Tilera-specific configuration @@ -323,6 +399,9 @@ config PCI bool "PCI support" default y select PCI_DOMAINS + select GENERIC_PCI_IOMAP + select TILE_GXIO_TRIO if TILEGX + select PCI_MSI if TILEGX ---help--- Enable PCI root complex support, so PCIe endpoint devices can be attached to the Tile chip. Many, but not all, PCI devices @@ -334,22 +413,39 @@ config PCI_DOMAINS config NO_IOMEM def_bool !PCI -config NO_IOPORT +config NO_IOPORT_MAP def_bool !PCI +config TILE_PCI_IO + bool "PCI I/O space support" + default n + depends on PCI + depends on TILEGX + ---help--- + Enable PCI I/O space support on TILEGx. Since the PCI I/O space + is used by few modern PCIe endpoint devices, its support is disabled + by default to save the TRIO PIO Region resource for other purposes. + source "drivers/pci/Kconfig" +source "drivers/pci/pcie/Kconfig" + +config TILE_USB + tristate "Tilera USB host adapter support" + default y + depends on USB + depends on TILEGX + select TILE_GXIO_USB_HOST + ---help--- + Provides USB host adapter support for the built-in EHCI and OHCI + interfaces on TILE-Gx chips. + source "drivers/pci/hotplug/Kconfig" endmenu menu "Executable file formats" -# only elf supported -config KCORE_ELF - def_bool y - depends on PROC_FS - source "fs/Kconfig.binfmt" endmenu diff --git a/arch/tile/Kconfig.debug b/arch/tile/Kconfig.debug index 9bc161a02c7..19734d3ab1e 100644 --- a/arch/tile/Kconfig.debug +++ b/arch/tile/Kconfig.debug @@ -14,30 +14,12 @@ config EARLY_PRINTK with klogd/syslogd. You should normally N here, unless you want to debug such a crash. -config DEBUG_STACKOVERFLOW - bool "Check for stack overflows" - depends on DEBUG_KERNEL +config TILE_HVGLUE_TRACE + bool "Provide wrapper functions for hypervisor ABI calls" + default n help - This option will cause messages to be printed if free stack space - drops below a certain limit. - -config DEBUG_STACK_USAGE - bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL - help - Enables the display of the minimum amount of free stack which each - task has ever had available in the sysrq-T and sysrq-P debug output. - - This option will slow down process creation somewhat. - -config DEBUG_EXTRA_FLAGS - string "Additional compiler arguments when building with '-g'" - depends on DEBUG_INFO - default "" - help - Debug info can be large, and flags like - `-femit-struct-debug-baseonly' can reduce the kernel file - size and build time noticeably. Such flags are often - helpful if the main use of debug info is line number info. + Provide wrapper functions for the hypervisor ABI calls + defined in arch/tile/kernel/hvglue.S. This allows tracing + mechanisms, etc., to have visibility into those calls. endmenu diff --git a/arch/tile/Makefile b/arch/tile/Makefile index 17acce70569..4dc380a519d 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -26,14 +26,20 @@ $(error Set TILERA_ROOT or CROSS_COMPILE when building $(ARCH) on $(HOST_ARCH)) endif endif -ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"") -KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS) -endif +# The tile compiler may emit .eh_frame information for backtracing. +# In kernel modules, this causes load failures due to unsupported relocations. +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -LIBGCC_PATH := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) +LIBGCC_PATH := \ + $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name) # Provide the path to use for "make defconfig". -KBUILD_DEFCONFIG := $(ARCH)_defconfig +# We default to the newer TILE-Gx architecture if only "tile" is given. +ifeq ($(ARCH),tile) + KBUILD_DEFCONFIG := tilegx_defconfig +else + KBUILD_DEFCONFIG := $(ARCH)_defconfig +endif # Used as a file extension when useful, e.g. head_$(BITS).o # Not needed for (e.g.) "$(CC) -m32" since the compiler automatically @@ -53,7 +59,7 @@ libs-y += $(LIBGCC_PATH) # See arch/tile/Kbuild for content of core part of the kernel core-y += arch/tile/ -core-$(CONFIG_KVM) += arch/tile/kvm/ +core-$(CONFIG_TILE_GXIO) += arch/tile/gxio/ ifdef TILERA_ROOT INSTALL_PATH ?= $(TILERA_ROOT)/tile/boot diff --git a/arch/tile/configs/tile_defconfig b/arch/tile/configs/tile_defconfig deleted file mode 100644 index 0fe54445fda..00000000000 --- a/arch/tile/configs/tile_defconfig +++ /dev/null @@ -1,71 +0,0 @@ -CONFIG_EXPERIMENTAL=y -# CONFIG_SWAP is not set -CONFIG_SYSVIPC=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="usr/contents.txt" -CONFIG_EXPERT=y -# CONFIG_COMPAT_BRK is not set -CONFIG_PROFILING=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_HZ_100=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_LRO is not set -# CONFIG_INET_DIAG is not set -CONFIG_IPV6=y -# CONFIG_WIRELESS is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y -CONFIG_NETDEVICES=y -CONFIG_TUN=y -# CONFIG_NETDEV_10000 is not set -# CONFIG_WLAN is not set -# CONFIG_INPUT_MOUSEDEV is not set -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -# CONFIG_LEGACY_PTYS is not set -# CONFIG_HW_RANDOM is not set -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_NOWAYOUT=y -# CONFIG_HID_SUPPORT is not set -CONFIG_RTC_CLASS=y -# CONFIG_RTC_INTF_SYSFS is not set -# CONFIG_RTC_INTF_PROC is not set -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_FUSE_FS=y -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=m -CONFIG_TMPFS=y -CONFIG_HUGETLBFS=y -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ISO8859_1=y -CONFIG_FRAME_WARN=2048 -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_VM=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_DEBUG_EXTRA_FLAGS="-femit-struct-debug-baseonly" diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig new file mode 100644 index 00000000000..730e40d9cf6 --- /dev/null +++ b/arch/tile/configs/tilegx_defconfig @@ -0,0 +1,420 @@ +CONFIG_TILEGX=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_LOG_BUF_SHIFT=19 +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_CGROUP=y +CONFIG_NAMESPACES=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_RD_XZ=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_KPROBES=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_CFQ_GROUP_IOSCHED=y +CONFIG_NR_CPUS=100 +CONFIG_HZ_100=y +# CONFIG_COMPACTION is not set +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_TILE_PCI_IO=y +CONFIG_PCI_DEBUG=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_STATISTICS=y +CONFIG_NET_KEY=m +CONFIG_NET_KEY_MIGRATE=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_TCP_MD5SIG=y +CONFIG_IPV6=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_NETLABEL=y +CONFIG_RDS=m +CONFIG_RDS_TCP=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_PHONET=m +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_CLS_IND=y +CONFIG_DCB=y +CONFIG_DNS_RESOLVER=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_SX8=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_ATA_OVER_ETH=m +CONFIG_RAID_ATTRS=m +CONFIG_SCSI_TGT=m +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SAS_ATA=y +CONFIG_ISCSI_TCP=m +CONFIG_SCSI_MVSAS=y +# CONFIG_SCSI_MVSAS_DEBUG is not set +CONFIG_SCSI_MVSAS_TASKLET=y +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_SIL24=y +# CONFIG_ATA_SFF is not set +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID10=m +CONFIG_MD_RAID456=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m +CONFIG_DM_UEVENT=y +CONFIG_TARGET_CORE=m +CONFIG_TCM_IBLOCK=m +CONFIG_TCM_FILEIO=m +CONFIG_TCM_PSCSI=m +CONFIG_LOOPBACK_TARGET=m +CONFIG_ISCSI_TARGET=m +CONFIG_FUSION=y +CONFIG_FUSION_SAS=y +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETPOLL_TRAP=y +CONFIG_TUN=y +CONFIG_VETH=m +CONFIG_NET_DSA_MV88E6060=y +CONFIG_NET_DSA_MV88E6131=y +CONFIG_NET_DSA_MV88E6123_61_65=y +CONFIG_SKY2=y +CONFIG_PTP_1588_CLOCK_TILEGX=y +# CONFIG_WLAN is not set +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_TILEGX=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_TIMERIOMEM=m +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +# CONFIG_VGA_ARB is not set +CONFIG_DRM=m +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_MGA=m +CONFIG_DRM_VIA=m +CONFIG_DRM_SAVAGE=m +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_EDAC=y +CONFIG_EDAC_MM_EDAC=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_TILE=y +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=y +CONFIG_CUSE=m +CONFIG_FSCACHE=m +CONFIG_FSCACHE_STATS=y +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_IOCHARSET="ascii" +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +CONFIG_NFS_V4_1=y +CONFIG_NFS_FSCACHE=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_FSCACHE=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_DLM=m +CONFIG_DLM_DEBUG=y +CONFIG_DYNAMIC_DEBUG=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_REDUCED=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_STRIP_ASM_SYMS=y +CONFIG_DEBUG_FS=y +CONFIG_HEADERS_CHECK=y +# CONFIG_FRAME_POINTER is not set +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_ASYNC_RAID6_TEST=m +CONFIG_KGDB=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_NETWORK_XFRM=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_CRYPTO_PCRYPT=m +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ZLIB=m +CONFIG_CRYPTO_LZO=m diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig new file mode 100644 index 00000000000..80fc32ed049 --- /dev/null +++ b/arch/tile/configs/tilepro_defconfig @@ -0,0 +1,534 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_LOG_BUF_SHIFT=19 +CONFIG_CGROUPS=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_BLK_CGROUP=y +CONFIG_NAMESPACES=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_RD_XZ=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_CFQ_GROUP_IOSCHED=y +CONFIG_HZ_100=y +# CONFIG_COMPACTION is not set +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_PCI_DEBUG=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_STATISTICS=y +CONFIG_NET_KEY=m +CONFIG_NET_KEY_MIGRATE=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_TCP_MD5SIG=y +CONFIG_IPV6=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_ZONES=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=m +CONFIG_NF_CT_PROTO_UDPLITE=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NOTRACK=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_VS=m +CONFIG_IP_VS_IPV6=y +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_PROTO_SCTP=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_NF_CONNTRACK_IPV4=m +# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_EBT_T_NAT=m +CONFIG_BRIDGE_EBT_802_3=m +CONFIG_BRIDGE_EBT_AMONG=m +CONFIG_BRIDGE_EBT_ARP=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_IP6=m +CONFIG_BRIDGE_EBT_LIMIT=m +CONFIG_BRIDGE_EBT_MARK=m +CONFIG_BRIDGE_EBT_PKTTYPE=m +CONFIG_BRIDGE_EBT_STP=m +CONFIG_BRIDGE_EBT_VLAN=m +CONFIG_BRIDGE_EBT_ARPREPLY=m +CONFIG_BRIDGE_EBT_DNAT=m +CONFIG_BRIDGE_EBT_MARK_T=m +CONFIG_BRIDGE_EBT_REDIRECT=m +CONFIG_BRIDGE_EBT_SNAT=m +CONFIG_BRIDGE_EBT_LOG=m +CONFIG_BRIDGE_EBT_ULOG=m +CONFIG_BRIDGE_EBT_NFLOG=m +CONFIG_RDS=m +CONFIG_RDS_TCP=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_PHONET=m +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_CLS_IND=y +CONFIG_DCB=y +CONFIG_DNS_RESOLVER=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_SX8=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_ATA_OVER_ETH=m +CONFIG_RAID_ATTRS=m +CONFIG_SCSI_TGT=m +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_ATA=y +CONFIG_SATA_SIL24=y +# CONFIG_ATA_SFF is not set +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID10=m +CONFIG_MD_RAID456=m +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_DEBUG=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m +CONFIG_DM_UEVENT=y +CONFIG_FUSION=y +CONFIG_FUSION_SAS=y +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETPOLL_TRAP=y +CONFIG_TUN=y +CONFIG_VETH=m +CONFIG_NET_DSA_MV88E6060=y +CONFIG_NET_DSA_MV88E6131=y +CONFIG_NET_DSA_MV88E6123_61_65=y +# CONFIG_NET_VENDOR_3COM is not set +CONFIG_E1000E=y +# CONFIG_WLAN is not set +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_TIMERIOMEM=m +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +# CONFIG_VGA_ARB is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_EDAC=y +CONFIG_EDAC_MM_EDAC=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_TILE=y +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y +CONFIG_BTRFS_FS=m +CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=y +CONFIG_CUSE=m +CONFIG_FSCACHE=m +CONFIG_FSCACHE_STATS=y +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_IOCHARSET="ascii" +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +CONFIG_NFS_V4_1=y +CONFIG_NFS_FSCACHE=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_FSCACHE=y +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_DLM=m +CONFIG_DLM_DEBUG=y +CONFIG_DYNAMIC_DEBUG=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_REDUCED=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_FRAME_WARN=2048 +CONFIG_STRIP_ASM_SYMS=y +CONFIG_DEBUG_FS=y +CONFIG_HEADERS_CHECK=y +# CONFIG_FRAME_POINTER is not set +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_ASYNC_RAID6_TEST=m +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_NETWORK_XFRM=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_CRYPTO_PCRYPT=m +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_CCM=m +CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_ZLIB=m +CONFIG_CRYPTO_LZO=m +CONFIG_CRC_CCITT=m +CONFIG_CRC7=m diff --git a/arch/tile/gxio/Kconfig b/arch/tile/gxio/Kconfig new file mode 100644 index 00000000000..d4e10d58071 --- /dev/null +++ b/arch/tile/gxio/Kconfig @@ -0,0 +1,33 @@ +# Support direct access to TILE-Gx hardware from user space, via the +# gxio library, or from kernel space, via kernel IORPC support. +config TILE_GXIO + bool + depends on TILEGX + +# Support direct access to the common I/O DMA facility within the +# TILE-Gx mPIPE and Trio hardware from kernel space. +config TILE_GXIO_DMA + bool + select TILE_GXIO + +# Support direct access to the TILE-Gx mPIPE hardware from kernel space. +config TILE_GXIO_MPIPE + bool + select TILE_GXIO + select TILE_GXIO_DMA + +# Support direct access to the TILE-Gx TRIO hardware from kernel space. +config TILE_GXIO_TRIO + bool + select TILE_GXIO + select TILE_GXIO_DMA + +# Support direct access to the TILE-Gx USB hardware from kernel space. +config TILE_GXIO_USB_HOST + bool + select TILE_GXIO + +# Support direct access to the TILE-Gx UART hardware from kernel space. +config TILE_GXIO_UART + bool + select TILE_GXIO diff --git a/arch/tile/gxio/Makefile b/arch/tile/gxio/Makefile new file mode 100644 index 00000000000..26ae2c72746 --- /dev/null +++ b/arch/tile/gxio/Makefile @@ -0,0 +1,10 @@ +# +# Makefile for the Tile-Gx device access support. +# + +obj-$(CONFIG_TILE_GXIO) += iorpc_globals.o kiorpc.o +obj-$(CONFIG_TILE_GXIO_DMA) += dma_queue.o +obj-$(CONFIG_TILE_GXIO_MPIPE) += mpipe.o iorpc_mpipe.o iorpc_mpipe_info.o +obj-$(CONFIG_TILE_GXIO_TRIO) += trio.o iorpc_trio.o +obj-$(CONFIG_TILE_GXIO_UART) += uart.o iorpc_uart.o +obj-$(CONFIG_TILE_GXIO_USB_HOST) += usb_host.o iorpc_usb_host.o diff --git a/arch/tile/gxio/dma_queue.c b/arch/tile/gxio/dma_queue.c new file mode 100644 index 00000000000..baa60357f8b --- /dev/null +++ b/arch/tile/gxio/dma_queue.c @@ -0,0 +1,176 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/io.h> +#include <linux/atomic.h> +#include <linux/module.h> +#include <gxio/dma_queue.h> + +/* Wait for a memory read to complete. */ +#define wait_for_value(val) \ + __asm__ __volatile__("move %0, %0" :: "r"(val)) + +/* The index is in the low 16. */ +#define DMA_QUEUE_INDEX_MASK ((1 << 16) - 1) + +/* + * The hardware descriptor-ring type. + * This matches the types used by mpipe (MPIPE_EDMA_POST_REGION_VAL_t) + * and trio (TRIO_PUSH_DMA_REGION_VAL_t or TRIO_PULL_DMA_REGION_VAL_t). + * See those types for more documentation on the individual fields. + */ +typedef union { + struct { +#ifndef __BIG_ENDIAN__ + uint64_t ring_idx:16; + uint64_t count:16; + uint64_t gen:1; + uint64_t __reserved:31; +#else + uint64_t __reserved:31; + uint64_t gen:1; + uint64_t count:16; + uint64_t ring_idx:16; +#endif + }; + uint64_t word; +} __gxio_ring_t; + +void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue, + void *post_region_addr, unsigned int num_entries) +{ + /* + * Limit 65536 entry rings to 65535 credits because we only have a + * 16 bit completion counter. + */ + int64_t credits = (num_entries < 65536) ? num_entries : 65535; + + memset(dma_queue, 0, sizeof(*dma_queue)); + + dma_queue->post_region_addr = post_region_addr; + dma_queue->hw_complete_count = 0; + dma_queue->credits_and_next_index = credits << DMA_QUEUE_CREDIT_SHIFT; +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_init); + +void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue) +{ + __gxio_ring_t val; + uint64_t count; + uint64_t delta; + uint64_t new_count; + + /* + * Read the 64-bit completion count without touching the cache, so + * we later avoid having to evict any sharers of this cache line + * when we update it below. + */ + uint64_t orig_hw_complete_count = + cmpxchg(&dma_queue->hw_complete_count, + -1, -1); + + /* Make sure the load completes before we access the hardware. */ + wait_for_value(orig_hw_complete_count); + + /* Read the 16-bit count of how many packets it has completed. */ + val.word = __gxio_mmio_read(dma_queue->post_region_addr); + count = val.count; + + /* + * Calculate the number of completions since we last updated the + * 64-bit counter. It's safe to ignore the high bits because the + * maximum credit value is 65535. + */ + delta = (count - orig_hw_complete_count) & 0xffff; + if (delta == 0) + return; + + /* + * Try to write back the count, advanced by delta. If we race with + * another thread, this might fail, in which case we return + * immediately on the assumption that some credits are (or at least + * were) available. + */ + new_count = orig_hw_complete_count + delta; + if (cmpxchg(&dma_queue->hw_complete_count, + orig_hw_complete_count, + new_count) != orig_hw_complete_count) + return; + + /* + * We succeeded in advancing the completion count; add back the + * corresponding number of egress credits. + */ + __insn_fetchadd(&dma_queue->credits_and_next_index, + (delta << DMA_QUEUE_CREDIT_SHIFT)); +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_update_credits); + +/* + * A separate 'blocked' method for put() so that backtraces and + * profiles will clearly indicate that we're wasting time spinning on + * egress availability rather than actually posting commands. + */ +int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue, + int64_t modifier) +{ + int backoff = 16; + int64_t old; + + do { + int i; + /* Back off to avoid spamming memory networks. */ + for (i = backoff; i > 0; i--) + __insn_mfspr(SPR_PASS); + + /* Check credits again. */ + __gxio_dma_queue_update_credits(dma_queue); + old = __insn_fetchaddgez(&dma_queue->credits_and_next_index, + modifier); + + /* Calculate bounded exponential backoff for next iteration. */ + if (backoff < 256) + backoff *= 2; + } while (old + modifier < 0); + + return old; +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_wait_for_credits); + +int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue, + unsigned int num, int wait) +{ + return __gxio_dma_queue_reserve(dma_queue, num, wait != 0, true); +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_reserve_aux); + +int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, + int64_t completion_slot, int update) +{ + if (update) { + if (ACCESS_ONCE(dma_queue->hw_complete_count) > + completion_slot) + return 1; + + __gxio_dma_queue_update_credits(dma_queue); + } + + return ACCESS_ONCE(dma_queue->hw_complete_count) > completion_slot; +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_is_complete); diff --git a/arch/tile/gxio/iorpc_globals.c b/arch/tile/gxio/iorpc_globals.c new file mode 100644 index 00000000000..e178e90805a --- /dev/null +++ b/arch/tile/gxio/iorpc_globals.c @@ -0,0 +1,89 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#include "gxio/iorpc_globals.h" + +struct arm_pollfd_param { + union iorpc_pollfd pollfd; +}; + +int __iorpc_arm_pollfd(int fd, int pollfd_cookie) +{ + struct arm_pollfd_param temp; + struct arm_pollfd_param *params = &temp; + + params->pollfd.kernel.cookie = pollfd_cookie; + + return hv_dev_pwrite(fd, 0, (HV_VirtAddr) params, sizeof(*params), + IORPC_OP_ARM_POLLFD); +} + +EXPORT_SYMBOL(__iorpc_arm_pollfd); + +struct close_pollfd_param { + union iorpc_pollfd pollfd; +}; + +int __iorpc_close_pollfd(int fd, int pollfd_cookie) +{ + struct close_pollfd_param temp; + struct close_pollfd_param *params = &temp; + + params->pollfd.kernel.cookie = pollfd_cookie; + + return hv_dev_pwrite(fd, 0, (HV_VirtAddr) params, sizeof(*params), + IORPC_OP_CLOSE_POLLFD); +} + +EXPORT_SYMBOL(__iorpc_close_pollfd); + +struct get_mmio_base_param { + HV_PTE base; +}; + +int __iorpc_get_mmio_base(int fd, HV_PTE *base) +{ + int __result; + struct get_mmio_base_param temp; + struct get_mmio_base_param *params = &temp; + + __result = + hv_dev_pread(fd, 0, (HV_VirtAddr) params, sizeof(*params), + IORPC_OP_GET_MMIO_BASE); + *base = params->base; + + return __result; +} + +EXPORT_SYMBOL(__iorpc_get_mmio_base); + +struct check_mmio_offset_param { + unsigned long offset; + unsigned long size; +}; + +int __iorpc_check_mmio_offset(int fd, unsigned long offset, unsigned long size) +{ + struct check_mmio_offset_param temp; + struct check_mmio_offset_param *params = &temp; + + params->offset = offset; + params->size = size; + + return hv_dev_pwrite(fd, 0, (HV_VirtAddr) params, sizeof(*params), + IORPC_OP_CHECK_MMIO_OFFSET); +} + +EXPORT_SYMBOL(__iorpc_check_mmio_offset); diff --git a/arch/tile/gxio/iorpc_mpipe.c b/arch/tile/gxio/iorpc_mpipe.c new file mode 100644 index 00000000000..e19325c4c43 --- /dev/null +++ b/arch/tile/gxio/iorpc_mpipe.c @@ -0,0 +1,593 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#include "gxio/iorpc_mpipe.h" + +struct alloc_buffer_stacks_param { + unsigned int count; + unsigned int first; + unsigned int flags; +}; + +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags) +{ + struct alloc_buffer_stacks_param temp; + struct alloc_buffer_stacks_param *params = &temp; + + params->count = count; + params->first = first; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_ALLOC_BUFFER_STACKS); +} + +EXPORT_SYMBOL(gxio_mpipe_alloc_buffer_stacks); + +struct init_buffer_stack_aux_param { + union iorpc_mem_buffer buffer; + unsigned int stack; + unsigned int buffer_size_enum; +}; + +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context, + void *mem_va, size_t mem_size, + unsigned int mem_flags, unsigned int stack, + unsigned int buffer_size_enum) +{ + int __result; + unsigned long long __cpa; + pte_t __pte; + struct init_buffer_stack_aux_param temp; + struct init_buffer_stack_aux_param *params = &temp; + + __result = va_to_cpa_and_pte(mem_va, &__cpa, &__pte); + if (__result != 0) + return __result; + params->buffer.kernel.cpa = __cpa; + params->buffer.kernel.size = mem_size; + params->buffer.kernel.pte = __pte; + params->buffer.kernel.flags = mem_flags; + params->stack = stack; + params->buffer_size_enum = buffer_size_enum; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_INIT_BUFFER_STACK_AUX); +} + +EXPORT_SYMBOL(gxio_mpipe_init_buffer_stack_aux); + + +struct alloc_notif_rings_param { + unsigned int count; + unsigned int first; + unsigned int flags; +}; + +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags) +{ + struct alloc_notif_rings_param temp; + struct alloc_notif_rings_param *params = &temp; + + params->count = count; + params->first = first; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_ALLOC_NOTIF_RINGS); +} + +EXPORT_SYMBOL(gxio_mpipe_alloc_notif_rings); + +struct init_notif_ring_aux_param { + union iorpc_mem_buffer buffer; + unsigned int ring; +}; + +int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va, + size_t mem_size, unsigned int mem_flags, + unsigned int ring) +{ + int __result; + unsigned long long __cpa; + pte_t __pte; + struct init_notif_ring_aux_param temp; + struct init_notif_ring_aux_param *params = &temp; + + __result = va_to_cpa_and_pte(mem_va, &__cpa, &__pte); + if (__result != 0) + return __result; + params->buffer.kernel.cpa = __cpa; + params->buffer.kernel.size = mem_size; + params->buffer.kernel.pte = __pte; + params->buffer.kernel.flags = mem_flags; + params->ring = ring; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_INIT_NOTIF_RING_AUX); +} + +EXPORT_SYMBOL(gxio_mpipe_init_notif_ring_aux); + +struct request_notif_ring_interrupt_param { + union iorpc_interrupt interrupt; + unsigned int ring; +}; + +int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context, + int inter_x, int inter_y, + int inter_ipi, int inter_event, + unsigned int ring) +{ + struct request_notif_ring_interrupt_param temp; + struct request_notif_ring_interrupt_param *params = &temp; + + params->interrupt.kernel.x = inter_x; + params->interrupt.kernel.y = inter_y; + params->interrupt.kernel.ipi = inter_ipi; + params->interrupt.kernel.event = inter_event; + params->ring = ring; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_REQUEST_NOTIF_RING_INTERRUPT); +} + +EXPORT_SYMBOL(gxio_mpipe_request_notif_ring_interrupt); + +struct enable_notif_ring_interrupt_param { + unsigned int ring; +}; + +int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context, + unsigned int ring) +{ + struct enable_notif_ring_interrupt_param temp; + struct enable_notif_ring_interrupt_param *params = &temp; + + params->ring = ring; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_ENABLE_NOTIF_RING_INTERRUPT); +} + +EXPORT_SYMBOL(gxio_mpipe_enable_notif_ring_interrupt); + +struct alloc_notif_groups_param { + unsigned int count; + unsigned int first; + unsigned int flags; +}; + +int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags) +{ + struct alloc_notif_groups_param temp; + struct alloc_notif_groups_param *params = &temp; + + params->count = count; + params->first = first; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_ALLOC_NOTIF_GROUPS); +} + +EXPORT_SYMBOL(gxio_mpipe_alloc_notif_groups); + +struct init_notif_group_param { + unsigned int group; + gxio_mpipe_notif_group_bits_t bits; +}; + +int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, + unsigned int group, + gxio_mpipe_notif_group_bits_t bits) +{ + struct init_notif_group_param temp; + struct init_notif_group_param *params = &temp; + + params->group = group; + params->bits = bits; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_INIT_NOTIF_GROUP); +} + +EXPORT_SYMBOL(gxio_mpipe_init_notif_group); + +struct alloc_buckets_param { + unsigned int count; + unsigned int first; + unsigned int flags; +}; + +int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count, + unsigned int first, unsigned int flags) +{ + struct alloc_buckets_param temp; + struct alloc_buckets_param *params = &temp; + + params->count = count; + params->first = first; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_ALLOC_BUCKETS); +} + +EXPORT_SYMBOL(gxio_mpipe_alloc_buckets); + +struct init_bucket_param { + unsigned int bucket; + MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info; +}; + +int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket, + MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info) +{ + struct init_bucket_param temp; + struct init_bucket_param *params = &temp; + + params->bucket = bucket; + params->bucket_info = bucket_info; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_INIT_BUCKET); +} + +EXPORT_SYMBOL(gxio_mpipe_init_bucket); + +struct alloc_edma_rings_param { + unsigned int count; + unsigned int first; + unsigned int flags; +}; + +int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags) +{ + struct alloc_edma_rings_param temp; + struct alloc_edma_rings_param *params = &temp; + + params->count = count; + params->first = first; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_ALLOC_EDMA_RINGS); +} + +EXPORT_SYMBOL(gxio_mpipe_alloc_edma_rings); + +struct init_edma_ring_aux_param { + union iorpc_mem_buffer buffer; + unsigned int ring; + unsigned int channel; +}; + +int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va, + size_t mem_size, unsigned int mem_flags, + unsigned int ring, unsigned int channel) +{ + int __result; + unsigned long long __cpa; + pte_t __pte; + struct init_edma_ring_aux_param temp; + struct init_edma_ring_aux_param *params = &temp; + + __result = va_to_cpa_and_pte(mem_va, &__cpa, &__pte); + if (__result != 0) + return __result; + params->buffer.kernel.cpa = __cpa; + params->buffer.kernel.size = mem_size; + params->buffer.kernel.pte = __pte; + params->buffer.kernel.flags = mem_flags; + params->ring = ring; + params->channel = channel; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_INIT_EDMA_RING_AUX); +} + +EXPORT_SYMBOL(gxio_mpipe_init_edma_ring_aux); + + +int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob, + size_t blob_size) +{ + const void *params = blob; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, blob_size, + GXIO_MPIPE_OP_COMMIT_RULES); +} + +EXPORT_SYMBOL(gxio_mpipe_commit_rules); + +struct register_client_memory_param { + unsigned int iotlb; + HV_PTE pte; + unsigned int flags; +}; + +int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, + unsigned int iotlb, HV_PTE pte, + unsigned int flags) +{ + struct register_client_memory_param temp; + struct register_client_memory_param *params = &temp; + + params->iotlb = iotlb; + params->pte = pte; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_REGISTER_CLIENT_MEMORY); +} + +EXPORT_SYMBOL(gxio_mpipe_register_client_memory); + +struct link_open_aux_param { + _gxio_mpipe_link_name_t name; + unsigned int flags; +}; + +int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context, + _gxio_mpipe_link_name_t name, unsigned int flags) +{ + struct link_open_aux_param temp; + struct link_open_aux_param *params = &temp; + + params->name = name; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_LINK_OPEN_AUX); +} + +EXPORT_SYMBOL(gxio_mpipe_link_open_aux); + +struct link_close_aux_param { + int mac; +}; + +int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac) +{ + struct link_close_aux_param temp; + struct link_close_aux_param *params = &temp; + + params->mac = mac; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_LINK_CLOSE_AUX); +} + +EXPORT_SYMBOL(gxio_mpipe_link_close_aux); + +struct link_set_attr_aux_param { + int mac; + uint32_t attr; + int64_t val; +}; + +int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac, + uint32_t attr, int64_t val) +{ + struct link_set_attr_aux_param temp; + struct link_set_attr_aux_param *params = &temp; + + params->mac = mac; + params->attr = attr; + params->val = val; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_LINK_SET_ATTR_AUX); +} + +EXPORT_SYMBOL(gxio_mpipe_link_set_attr_aux); + +struct get_timestamp_aux_param { + uint64_t sec; + uint64_t nsec; + uint64_t cycles; +}; + +int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec, + uint64_t *nsec, uint64_t *cycles) +{ + int __result; + struct get_timestamp_aux_param temp; + struct get_timestamp_aux_param *params = &temp; + + __result = + hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), + GXIO_MPIPE_OP_GET_TIMESTAMP_AUX); + *sec = params->sec; + *nsec = params->nsec; + *cycles = params->cycles; + + return __result; +} + +EXPORT_SYMBOL(gxio_mpipe_get_timestamp_aux); + +struct set_timestamp_aux_param { + uint64_t sec; + uint64_t nsec; + uint64_t cycles; +}; + +int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec, + uint64_t nsec, uint64_t cycles) +{ + struct set_timestamp_aux_param temp; + struct set_timestamp_aux_param *params = &temp; + + params->sec = sec; + params->nsec = nsec; + params->cycles = cycles; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_SET_TIMESTAMP_AUX); +} + +EXPORT_SYMBOL(gxio_mpipe_set_timestamp_aux); + +struct adjust_timestamp_aux_param { + int64_t nsec; +}; + +int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec) +{ + struct adjust_timestamp_aux_param temp; + struct adjust_timestamp_aux_param *params = &temp; + + params->nsec = nsec; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_ADJUST_TIMESTAMP_AUX); +} + +EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_aux); + +struct config_edma_ring_blks_param { + unsigned int ering; + unsigned int max_blks; + unsigned int min_snf_blks; + unsigned int db; +}; + +int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context, + unsigned int ering, unsigned int max_blks, + unsigned int min_snf_blks, unsigned int db) +{ + struct config_edma_ring_blks_param temp; + struct config_edma_ring_blks_param *params = &temp; + + params->ering = ering; + params->max_blks = max_blks; + params->min_snf_blks = min_snf_blks; + params->db = db; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_CONFIG_EDMA_RING_BLKS); +} + +EXPORT_SYMBOL(gxio_mpipe_config_edma_ring_blks); + +struct adjust_timestamp_freq_param { + int32_t ppb; +}; + +int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb) +{ + struct adjust_timestamp_freq_param temp; + struct adjust_timestamp_freq_param *params = &temp; + + params->ppb = ppb; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ); +} + +EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq); + +struct arm_pollfd_param { + union iorpc_pollfd pollfd; +}; + +int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie) +{ + struct arm_pollfd_param temp; + struct arm_pollfd_param *params = &temp; + + params->pollfd.kernel.cookie = pollfd_cookie; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_ARM_POLLFD); +} + +EXPORT_SYMBOL(gxio_mpipe_arm_pollfd); + +struct close_pollfd_param { + union iorpc_pollfd pollfd; +}; + +int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie) +{ + struct close_pollfd_param temp; + struct close_pollfd_param *params = &temp; + + params->pollfd.kernel.cookie = pollfd_cookie; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_CLOSE_POLLFD); +} + +EXPORT_SYMBOL(gxio_mpipe_close_pollfd); + +struct get_mmio_base_param { + HV_PTE base; +}; + +int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base) +{ + int __result; + struct get_mmio_base_param temp; + struct get_mmio_base_param *params = &temp; + + __result = + hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), + GXIO_MPIPE_OP_GET_MMIO_BASE); + *base = params->base; + + return __result; +} + +EXPORT_SYMBOL(gxio_mpipe_get_mmio_base); + +struct check_mmio_offset_param { + unsigned long offset; + unsigned long size; +}; + +int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context, + unsigned long offset, unsigned long size) +{ + struct check_mmio_offset_param temp; + struct check_mmio_offset_param *params = &temp; + + params->offset = offset; + params->size = size; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_OP_CHECK_MMIO_OFFSET); +} + +EXPORT_SYMBOL(gxio_mpipe_check_mmio_offset); diff --git a/arch/tile/gxio/iorpc_mpipe_info.c b/arch/tile/gxio/iorpc_mpipe_info.c new file mode 100644 index 00000000000..77019c6e9b4 --- /dev/null +++ b/arch/tile/gxio/iorpc_mpipe_info.c @@ -0,0 +1,102 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#include "gxio/iorpc_mpipe_info.h" + +struct instance_aux_param { + _gxio_mpipe_link_name_t name; +}; + +int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context, + _gxio_mpipe_link_name_t name) +{ + struct instance_aux_param temp; + struct instance_aux_param *params = &temp; + + params->name = name; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_MPIPE_INFO_OP_INSTANCE_AUX); +} + +EXPORT_SYMBOL(gxio_mpipe_info_instance_aux); + +struct enumerate_aux_param { + _gxio_mpipe_link_name_t name; + _gxio_mpipe_link_mac_t mac; +}; + +int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context, + unsigned int idx, + _gxio_mpipe_link_name_t *name, + _gxio_mpipe_link_mac_t *mac) +{ + int __result; + struct enumerate_aux_param temp; + struct enumerate_aux_param *params = &temp; + + __result = + hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), + (((uint64_t)idx << 32) | + GXIO_MPIPE_INFO_OP_ENUMERATE_AUX)); + *name = params->name; + *mac = params->mac; + + return __result; +} + +EXPORT_SYMBOL(gxio_mpipe_info_enumerate_aux); + +struct get_mmio_base_param { + HV_PTE base; +}; + +int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context, + HV_PTE *base) +{ + int __result; + struct get_mmio_base_param temp; + struct get_mmio_base_param *params = &temp; + + __result = + hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), + GXIO_MPIPE_INFO_OP_GET_MMIO_BASE); + *base = params->base; + + return __result; +} + +EXPORT_SYMBOL(gxio_mpipe_info_get_mmio_base); + +struct check_mmio_offset_param { + unsigned long offset; + unsigned long size; +}; + +int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context, + unsigned long offset, unsigned long size) +{ + struct check_mmio_offset_param temp; + struct check_mmio_offset_param *params = &temp; + + params->offset = offset; + params->size = size; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_INFO_OP_CHECK_MMIO_OFFSET); +} + +EXPORT_SYMBOL(gxio_mpipe_info_check_mmio_offset); diff --git a/arch/tile/gxio/iorpc_trio.c b/arch/tile/gxio/iorpc_trio.c new file mode 100644 index 00000000000..1d3cedb9aeb --- /dev/null +++ b/arch/tile/gxio/iorpc_trio.c @@ -0,0 +1,350 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#include "gxio/iorpc_trio.h" + +struct alloc_asids_param { + unsigned int count; + unsigned int first; + unsigned int flags; +}; + +int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count, + unsigned int first, unsigned int flags) +{ + struct alloc_asids_param temp; + struct alloc_asids_param *params = &temp; + + params->count = count; + params->first = first; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_TRIO_OP_ALLOC_ASIDS); +} + +EXPORT_SYMBOL(gxio_trio_alloc_asids); + + +struct alloc_memory_maps_param { + unsigned int count; + unsigned int first; + unsigned int flags; +}; + +int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags) +{ + struct alloc_memory_maps_param temp; + struct alloc_memory_maps_param *params = &temp; + + params->count = count; + params->first = first; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_TRIO_OP_ALLOC_MEMORY_MAPS); +} + +EXPORT_SYMBOL(gxio_trio_alloc_memory_maps); + +struct alloc_scatter_queues_param { + unsigned int count; + unsigned int first; + unsigned int flags; +}; + +int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags) +{ + struct alloc_scatter_queues_param temp; + struct alloc_scatter_queues_param *params = &temp; + + params->count = count; + params->first = first; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_TRIO_OP_ALLOC_SCATTER_QUEUES); +} + +EXPORT_SYMBOL(gxio_trio_alloc_scatter_queues); + +struct alloc_pio_regions_param { + unsigned int count; + unsigned int first; + unsigned int flags; +}; + +int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags) +{ + struct alloc_pio_regions_param temp; + struct alloc_pio_regions_param *params = &temp; + + params->count = count; + params->first = first; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_TRIO_OP_ALLOC_PIO_REGIONS); +} + +EXPORT_SYMBOL(gxio_trio_alloc_pio_regions); + +struct init_pio_region_aux_param { + unsigned int pio_region; + unsigned int mac; + uint32_t bus_address_hi; + unsigned int flags; +}; + +int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context, + unsigned int pio_region, unsigned int mac, + uint32_t bus_address_hi, unsigned int flags) +{ + struct init_pio_region_aux_param temp; + struct init_pio_region_aux_param *params = &temp; + + params->pio_region = pio_region; + params->mac = mac; + params->bus_address_hi = bus_address_hi; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_TRIO_OP_INIT_PIO_REGION_AUX); +} + +EXPORT_SYMBOL(gxio_trio_init_pio_region_aux); + + +struct init_memory_map_mmu_aux_param { + unsigned int map; + unsigned long va; + uint64_t size; + unsigned int asid; + unsigned int mac; + uint64_t bus_address; + unsigned int node; + unsigned int order_mode; +}; + +int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context, + unsigned int map, unsigned long va, + uint64_t size, unsigned int asid, + unsigned int mac, uint64_t bus_address, + unsigned int node, + unsigned int order_mode) +{ + struct init_memory_map_mmu_aux_param temp; + struct init_memory_map_mmu_aux_param *params = &temp; + + params->map = map; + params->va = va; + params->size = size; + params->asid = asid; + params->mac = mac; + params->bus_address = bus_address; + params->node = node; + params->order_mode = order_mode; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX); +} + +EXPORT_SYMBOL(gxio_trio_init_memory_map_mmu_aux); + +struct get_port_property_param { + struct pcie_trio_ports_property trio_ports; +}; + +int gxio_trio_get_port_property(gxio_trio_context_t *context, + struct pcie_trio_ports_property *trio_ports) +{ + int __result; + struct get_port_property_param temp; + struct get_port_property_param *params = &temp; + + __result = + hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), + GXIO_TRIO_OP_GET_PORT_PROPERTY); + *trio_ports = params->trio_ports; + + return __result; +} + +EXPORT_SYMBOL(gxio_trio_get_port_property); + +struct config_legacy_intr_param { + union iorpc_interrupt interrupt; + unsigned int mac; + unsigned int intx; +}; + +int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event, + unsigned int mac, unsigned int intx) +{ + struct config_legacy_intr_param temp; + struct config_legacy_intr_param *params = &temp; + + params->interrupt.kernel.x = inter_x; + params->interrupt.kernel.y = inter_y; + params->interrupt.kernel.ipi = inter_ipi; + params->interrupt.kernel.event = inter_event; + params->mac = mac; + params->intx = intx; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_TRIO_OP_CONFIG_LEGACY_INTR); +} + +EXPORT_SYMBOL(gxio_trio_config_legacy_intr); + +struct config_msi_intr_param { + union iorpc_interrupt interrupt; + unsigned int mac; + unsigned int mem_map; + uint64_t mem_map_base; + uint64_t mem_map_limit; + unsigned int asid; +}; + +int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event, + unsigned int mac, unsigned int mem_map, + uint64_t mem_map_base, uint64_t mem_map_limit, + unsigned int asid) +{ + struct config_msi_intr_param temp; + struct config_msi_intr_param *params = &temp; + + params->interrupt.kernel.x = inter_x; + params->interrupt.kernel.y = inter_y; + params->interrupt.kernel.ipi = inter_ipi; + params->interrupt.kernel.event = inter_event; + params->mac = mac; + params->mem_map = mem_map; + params->mem_map_base = mem_map_base; + params->mem_map_limit = mem_map_limit; + params->asid = asid; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_TRIO_OP_CONFIG_MSI_INTR); +} + +EXPORT_SYMBOL(gxio_trio_config_msi_intr); + + +struct set_mps_mrs_param { + uint16_t mps; + uint16_t mrs; + unsigned int mac; +}; + +int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps, + uint16_t mrs, unsigned int mac) +{ + struct set_mps_mrs_param temp; + struct set_mps_mrs_param *params = &temp; + + params->mps = mps; + params->mrs = mrs; + params->mac = mac; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_TRIO_OP_SET_MPS_MRS); +} + +EXPORT_SYMBOL(gxio_trio_set_mps_mrs); + +struct force_rc_link_up_param { + unsigned int mac; +}; + +int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac) +{ + struct force_rc_link_up_param temp; + struct force_rc_link_up_param *params = &temp; + + params->mac = mac; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_TRIO_OP_FORCE_RC_LINK_UP); +} + +EXPORT_SYMBOL(gxio_trio_force_rc_link_up); + +struct force_ep_link_up_param { + unsigned int mac; +}; + +int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac) +{ + struct force_ep_link_up_param temp; + struct force_ep_link_up_param *params = &temp; + + params->mac = mac; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_TRIO_OP_FORCE_EP_LINK_UP); +} + +EXPORT_SYMBOL(gxio_trio_force_ep_link_up); + +struct get_mmio_base_param { + HV_PTE base; +}; + +int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base) +{ + int __result; + struct get_mmio_base_param temp; + struct get_mmio_base_param *params = &temp; + + __result = + hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), + GXIO_TRIO_OP_GET_MMIO_BASE); + *base = params->base; + + return __result; +} + +EXPORT_SYMBOL(gxio_trio_get_mmio_base); + +struct check_mmio_offset_param { + unsigned long offset; + unsigned long size; +}; + +int gxio_trio_check_mmio_offset(gxio_trio_context_t *context, + unsigned long offset, unsigned long size) +{ + struct check_mmio_offset_param temp; + struct check_mmio_offset_param *params = &temp; + + params->offset = offset; + params->size = size; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_TRIO_OP_CHECK_MMIO_OFFSET); +} + +EXPORT_SYMBOL(gxio_trio_check_mmio_offset); diff --git a/arch/tile/gxio/iorpc_uart.c b/arch/tile/gxio/iorpc_uart.c new file mode 100644 index 00000000000..b9a6d6193d7 --- /dev/null +++ b/arch/tile/gxio/iorpc_uart.c @@ -0,0 +1,77 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#include "gxio/iorpc_uart.h" + +struct cfg_interrupt_param { + union iorpc_interrupt interrupt; +}; + +int gxio_uart_cfg_interrupt(gxio_uart_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event) +{ + struct cfg_interrupt_param temp; + struct cfg_interrupt_param *params = &temp; + + params->interrupt.kernel.x = inter_x; + params->interrupt.kernel.y = inter_y; + params->interrupt.kernel.ipi = inter_ipi; + params->interrupt.kernel.event = inter_event; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_UART_OP_CFG_INTERRUPT); +} + +EXPORT_SYMBOL(gxio_uart_cfg_interrupt); + +struct get_mmio_base_param { + HV_PTE base; +}; + +int gxio_uart_get_mmio_base(gxio_uart_context_t *context, HV_PTE *base) +{ + int __result; + struct get_mmio_base_param temp; + struct get_mmio_base_param *params = &temp; + + __result = + hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), + GXIO_UART_OP_GET_MMIO_BASE); + *base = params->base; + + return __result; +} + +EXPORT_SYMBOL(gxio_uart_get_mmio_base); + +struct check_mmio_offset_param { + unsigned long offset; + unsigned long size; +}; + +int gxio_uart_check_mmio_offset(gxio_uart_context_t *context, + unsigned long offset, unsigned long size) +{ + struct check_mmio_offset_param temp; + struct check_mmio_offset_param *params = &temp; + + params->offset = offset; + params->size = size; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_UART_OP_CHECK_MMIO_OFFSET); +} + +EXPORT_SYMBOL(gxio_uart_check_mmio_offset); diff --git a/arch/tile/gxio/iorpc_usb_host.c b/arch/tile/gxio/iorpc_usb_host.c new file mode 100644 index 00000000000..9c820073bfc --- /dev/null +++ b/arch/tile/gxio/iorpc_usb_host.c @@ -0,0 +1,99 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#include "gxio/iorpc_usb_host.h" + +struct cfg_interrupt_param { + union iorpc_interrupt interrupt; +}; + +int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event) +{ + struct cfg_interrupt_param temp; + struct cfg_interrupt_param *params = &temp; + + params->interrupt.kernel.x = inter_x; + params->interrupt.kernel.y = inter_y; + params->interrupt.kernel.ipi = inter_ipi; + params->interrupt.kernel.event = inter_event; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), GXIO_USB_HOST_OP_CFG_INTERRUPT); +} + +EXPORT_SYMBOL(gxio_usb_host_cfg_interrupt); + +struct register_client_memory_param { + HV_PTE pte; + unsigned int flags; +}; + +int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context, + HV_PTE pte, unsigned int flags) +{ + struct register_client_memory_param temp; + struct register_client_memory_param *params = &temp; + + params->pte = pte; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_USB_HOST_OP_REGISTER_CLIENT_MEMORY); +} + +EXPORT_SYMBOL(gxio_usb_host_register_client_memory); + +struct get_mmio_base_param { + HV_PTE base; +}; + +int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base) +{ + int __result; + struct get_mmio_base_param temp; + struct get_mmio_base_param *params = &temp; + + __result = + hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params), + GXIO_USB_HOST_OP_GET_MMIO_BASE); + *base = params->base; + + return __result; +} + +EXPORT_SYMBOL(gxio_usb_host_get_mmio_base); + +struct check_mmio_offset_param { + unsigned long offset; + unsigned long size; +}; + +int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context, + unsigned long offset, unsigned long size) +{ + struct check_mmio_offset_param temp; + struct check_mmio_offset_param *params = &temp; + + params->offset = offset; + params->size = size; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_USB_HOST_OP_CHECK_MMIO_OFFSET); +} + +EXPORT_SYMBOL(gxio_usb_host_check_mmio_offset); diff --git a/arch/tile/gxio/kiorpc.c b/arch/tile/gxio/kiorpc.c new file mode 100644 index 00000000000..c8096aa5a3f --- /dev/null +++ b/arch/tile/gxio/kiorpc.c @@ -0,0 +1,61 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE-Gx IORPC support for kernel I/O drivers. + */ + +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/io.h> +#include <gxio/iorpc_globals.h> +#include <gxio/kiorpc.h> + +#ifdef DEBUG_IORPC +#define TRACE(FMT, ...) pr_info(SIMPLE_MSG_LINE FMT, ## __VA_ARGS__) +#else +#define TRACE(...) +#endif + +/* Create kernel-VA-space MMIO mapping for an on-chip IO device. */ +void __iomem *iorpc_ioremap(int hv_fd, resource_size_t offset, + unsigned long size) +{ + pgprot_t mmio_base, prot = { 0 }; + unsigned long pfn; + int err; + + /* Look up the shim's lotar and base PA. */ + err = __iorpc_get_mmio_base(hv_fd, &mmio_base); + if (err) { + TRACE("get_mmio_base() failure: %d\n", err); + return NULL; + } + + /* Make sure the HV driver approves of our offset and size. */ + err = __iorpc_check_mmio_offset(hv_fd, offset, size); + if (err) { + TRACE("check_mmio_offset() failure: %d\n", err); + return NULL; + } + + /* + * mmio_base contains a base pfn and homing coordinates. Turn + * it into an MMIO pgprot and offset pfn. + */ + prot = hv_pte_set_lotar(prot, hv_pte_get_lotar(mmio_base)); + pfn = pte_pfn(mmio_base) + PFN_DOWN(offset); + + return ioremap_prot(PFN_PHYS(pfn), size, prot); +} + +EXPORT_SYMBOL(iorpc_ioremap); diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c new file mode 100644 index 00000000000..5301a9ffbae --- /dev/null +++ b/arch/tile/gxio/mpipe.c @@ -0,0 +1,578 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * Implementation of mpipe gxio calls. + */ + +#include <linux/errno.h> +#include <linux/io.h> +#include <linux/module.h> + +#include <gxio/iorpc_globals.h> +#include <gxio/iorpc_mpipe.h> +#include <gxio/iorpc_mpipe_info.h> +#include <gxio/kiorpc.h> +#include <gxio/mpipe.h> + +/* HACK: Avoid pointless "shadow" warnings. */ +#define link link_shadow + +int gxio_mpipe_init(gxio_mpipe_context_t *context, unsigned int mpipe_index) +{ + char file[32]; + + int fd; + int i; + + if (mpipe_index >= GXIO_MPIPE_INSTANCE_MAX) + return -EINVAL; + + snprintf(file, sizeof(file), "mpipe/%d/iorpc", mpipe_index); + fd = hv_dev_open((HV_VirtAddr) file, 0); + + context->fd = fd; + + if (fd < 0) { + if (fd >= GXIO_ERR_MIN && fd <= GXIO_ERR_MAX) + return fd; + else + return -ENODEV; + } + + /* Map in the MMIO space. */ + context->mmio_cfg_base = (void __force *) + iorpc_ioremap(fd, HV_MPIPE_CONFIG_MMIO_OFFSET, + HV_MPIPE_CONFIG_MMIO_SIZE); + if (context->mmio_cfg_base == NULL) + goto cfg_failed; + + context->mmio_fast_base = (void __force *) + iorpc_ioremap(fd, HV_MPIPE_FAST_MMIO_OFFSET, + HV_MPIPE_FAST_MMIO_SIZE); + if (context->mmio_fast_base == NULL) + goto fast_failed; + + /* Initialize the stacks. */ + for (i = 0; i < 8; i++) + context->__stacks.stacks[i] = 255; + + context->instance = mpipe_index; + + return 0; + + fast_failed: + iounmap((void __force __iomem *)(context->mmio_cfg_base)); + cfg_failed: + hv_dev_close(context->fd); + context->fd = -1; + return -ENODEV; +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_init); + +int gxio_mpipe_destroy(gxio_mpipe_context_t *context) +{ + iounmap((void __force __iomem *)(context->mmio_cfg_base)); + iounmap((void __force __iomem *)(context->mmio_fast_base)); + return hv_dev_close(context->fd); +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_destroy); + +static int16_t gxio_mpipe_buffer_sizes[8] = + { 128, 256, 512, 1024, 1664, 4096, 10368, 16384 }; + +gxio_mpipe_buffer_size_enum_t gxio_mpipe_buffer_size_to_buffer_size_enum(size_t + size) +{ + int i; + for (i = 0; i < 7; i++) + if (size <= gxio_mpipe_buffer_sizes[i]) + break; + return i; +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_buffer_size_to_buffer_size_enum); + +size_t gxio_mpipe_buffer_size_enum_to_buffer_size(gxio_mpipe_buffer_size_enum_t + buffer_size_enum) +{ + if (buffer_size_enum > 7) + buffer_size_enum = 7; + + return gxio_mpipe_buffer_sizes[buffer_size_enum]; +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_buffer_size_enum_to_buffer_size); + +size_t gxio_mpipe_calc_buffer_stack_bytes(unsigned long buffers) +{ + const int BUFFERS_PER_LINE = 12; + + /* Count the number of cachlines. */ + unsigned long lines = + (buffers + BUFFERS_PER_LINE - 1) / BUFFERS_PER_LINE; + + /* Convert to bytes. */ + return lines * CHIP_L2_LINE_SIZE(); +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_calc_buffer_stack_bytes); + +int gxio_mpipe_init_buffer_stack(gxio_mpipe_context_t *context, + unsigned int stack, + gxio_mpipe_buffer_size_enum_t + buffer_size_enum, void *mem, size_t mem_size, + unsigned int mem_flags) +{ + int result; + + memset(mem, 0, mem_size); + + result = gxio_mpipe_init_buffer_stack_aux(context, mem, mem_size, + mem_flags, stack, + buffer_size_enum); + if (result < 0) + return result; + + /* Save the stack. */ + context->__stacks.stacks[buffer_size_enum] = stack; + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_init_buffer_stack); + +int gxio_mpipe_init_notif_ring(gxio_mpipe_context_t *context, + unsigned int ring, + void *mem, size_t mem_size, + unsigned int mem_flags) +{ + return gxio_mpipe_init_notif_ring_aux(context, mem, mem_size, + mem_flags, ring); +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_init_notif_ring); + +int gxio_mpipe_init_notif_group_and_buckets(gxio_mpipe_context_t *context, + unsigned int group, + unsigned int ring, + unsigned int num_rings, + unsigned int bucket, + unsigned int num_buckets, + gxio_mpipe_bucket_mode_t mode) +{ + int i; + int result; + + gxio_mpipe_bucket_info_t bucket_info = { { + .group = group, + .mode = mode, + } + }; + + gxio_mpipe_notif_group_bits_t bits = { {0} }; + + for (i = 0; i < num_rings; i++) + gxio_mpipe_notif_group_add_ring(&bits, ring + i); + + result = gxio_mpipe_init_notif_group(context, group, bits); + if (result != 0) + return result; + + for (i = 0; i < num_buckets; i++) { + bucket_info.notifring = ring + (i % num_rings); + + result = gxio_mpipe_init_bucket(context, bucket + i, + bucket_info); + if (result != 0) + return result; + } + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_init_notif_group_and_buckets); + +int gxio_mpipe_init_edma_ring(gxio_mpipe_context_t *context, + unsigned int ring, unsigned int channel, + void *mem, size_t mem_size, + unsigned int mem_flags) +{ + memset(mem, 0, mem_size); + + return gxio_mpipe_init_edma_ring_aux(context, mem, mem_size, mem_flags, + ring, channel); +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_init_edma_ring); + +void gxio_mpipe_rules_init(gxio_mpipe_rules_t *rules, + gxio_mpipe_context_t *context) +{ + rules->context = context; + memset(&rules->list, 0, sizeof(rules->list)); +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_rules_init); + +int gxio_mpipe_rules_begin(gxio_mpipe_rules_t *rules, + unsigned int bucket, unsigned int num_buckets, + gxio_mpipe_rules_stacks_t *stacks) +{ + int i; + int stack = 255; + + gxio_mpipe_rules_list_t *list = &rules->list; + + /* Current rule. */ + gxio_mpipe_rules_rule_t *rule = + (gxio_mpipe_rules_rule_t *) (list->rules + list->head); + + unsigned int head = list->tail; + + /* + * Align next rule properly. + *Note that "dmacs_and_vlans" will also be aligned. + */ + unsigned int pad = 0; + while (((head + pad) % __alignof__(gxio_mpipe_rules_rule_t)) != 0) + pad++; + + /* + * Verify room. + * ISSUE: Mark rules as broken on error? + */ + if (head + pad + sizeof(*rule) >= sizeof(list->rules)) + return GXIO_MPIPE_ERR_RULES_FULL; + + /* Verify num_buckets is a power of 2. */ + if (__builtin_popcount(num_buckets) != 1) + return GXIO_MPIPE_ERR_RULES_INVALID; + + /* Add padding to previous rule. */ + rule->size += pad; + + /* Start a new rule. */ + list->head = head + pad; + + rule = (gxio_mpipe_rules_rule_t *) (list->rules + list->head); + + /* Default some values. */ + rule->headroom = 2; + rule->tailroom = 0; + rule->capacity = 16384; + + /* Save the bucket info. */ + rule->bucket_mask = num_buckets - 1; + rule->bucket_first = bucket; + + for (i = 8 - 1; i >= 0; i--) { + int maybe = + stacks ? stacks->stacks[i] : rules->context->__stacks. + stacks[i]; + if (maybe != 255) + stack = maybe; + rule->stacks.stacks[i] = stack; + } + + if (stack == 255) + return GXIO_MPIPE_ERR_RULES_INVALID; + + /* NOTE: Only entries at the end of the array can be 255. */ + for (i = 8 - 1; i > 0; i--) { + if (rule->stacks.stacks[i] == 255) { + rule->stacks.stacks[i] = stack; + rule->capacity = + gxio_mpipe_buffer_size_enum_to_buffer_size(i - + 1); + } + } + + rule->size = sizeof(*rule); + list->tail = list->head + rule->size; + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_rules_begin); + +int gxio_mpipe_rules_add_channel(gxio_mpipe_rules_t *rules, + unsigned int channel) +{ + gxio_mpipe_rules_list_t *list = &rules->list; + + gxio_mpipe_rules_rule_t *rule = + (gxio_mpipe_rules_rule_t *) (list->rules + list->head); + + /* Verify channel. */ + if (channel >= 32) + return GXIO_MPIPE_ERR_RULES_INVALID; + + /* Verify begun. */ + if (list->tail == 0) + return GXIO_MPIPE_ERR_RULES_EMPTY; + + rule->channel_bits |= (1UL << channel); + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_rules_add_channel); + +int gxio_mpipe_rules_set_headroom(gxio_mpipe_rules_t *rules, uint8_t headroom) +{ + gxio_mpipe_rules_list_t *list = &rules->list; + + gxio_mpipe_rules_rule_t *rule = + (gxio_mpipe_rules_rule_t *) (list->rules + list->head); + + /* Verify begun. */ + if (list->tail == 0) + return GXIO_MPIPE_ERR_RULES_EMPTY; + + rule->headroom = headroom; + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_rules_set_headroom); + +int gxio_mpipe_rules_commit(gxio_mpipe_rules_t *rules) +{ + gxio_mpipe_rules_list_t *list = &rules->list; + unsigned int size = + offsetof(gxio_mpipe_rules_list_t, rules) + list->tail; + return gxio_mpipe_commit_rules(rules->context, list, size); +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_rules_commit); + +int gxio_mpipe_iqueue_init(gxio_mpipe_iqueue_t *iqueue, + gxio_mpipe_context_t *context, + unsigned int ring, + void *mem, size_t mem_size, unsigned int mem_flags) +{ + /* The init call below will verify that "mem_size" is legal. */ + unsigned int num_entries = mem_size / sizeof(gxio_mpipe_idesc_t); + + iqueue->context = context; + iqueue->idescs = (gxio_mpipe_idesc_t *)mem; + iqueue->ring = ring; + iqueue->num_entries = num_entries; + iqueue->mask_num_entries = num_entries - 1; + iqueue->log2_num_entries = __builtin_ctz(num_entries); + iqueue->head = 1; +#ifdef __BIG_ENDIAN__ + iqueue->swapped = 0; +#endif + + /* Initialize the "tail". */ + __gxio_mmio_write(mem, iqueue->head); + + return gxio_mpipe_init_notif_ring(context, ring, mem, mem_size, + mem_flags); +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_iqueue_init); + +int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue, + gxio_mpipe_context_t *context, + unsigned int ering, + unsigned int channel, + void *mem, unsigned int mem_size, + unsigned int mem_flags) +{ + /* The init call below will verify that "mem_size" is legal. */ + unsigned int num_entries = mem_size / sizeof(gxio_mpipe_edesc_t); + + /* Offset used to read number of completed commands. */ + MPIPE_EDMA_POST_REGION_ADDR_t offset; + + int result = gxio_mpipe_init_edma_ring(context, ering, channel, + mem, mem_size, mem_flags); + if (result < 0) + return result; + + memset(equeue, 0, sizeof(*equeue)); + + offset.word = 0; + offset.region = + MPIPE_MMIO_ADDR__REGION_VAL_EDMA - + MPIPE_MMIO_ADDR__REGION_VAL_IDMA; + offset.ring = ering; + + __gxio_dma_queue_init(&equeue->dma_queue, + context->mmio_fast_base + offset.word, + num_entries); + equeue->edescs = mem; + equeue->mask_num_entries = num_entries - 1; + equeue->log2_num_entries = __builtin_ctz(num_entries); + equeue->context = context; + equeue->ering = ering; + equeue->channel = channel; + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_equeue_init); + +int gxio_mpipe_set_timestamp(gxio_mpipe_context_t *context, + const struct timespec *ts) +{ + cycles_t cycles = get_cycles(); + return gxio_mpipe_set_timestamp_aux(context, (uint64_t)ts->tv_sec, + (uint64_t)ts->tv_nsec, + (uint64_t)cycles); +} + +int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context, + struct timespec *ts) +{ + int ret; + cycles_t cycles_prev, cycles_now, clock_rate; + cycles_prev = get_cycles(); + ret = gxio_mpipe_get_timestamp_aux(context, (uint64_t *)&ts->tv_sec, + (uint64_t *)&ts->tv_nsec, + (uint64_t *)&cycles_now); + if (ret < 0) { + return ret; + } + + clock_rate = get_clock_rate(); + ts->tv_nsec -= (cycles_now - cycles_prev) * 1000000000LL / clock_rate; + if (ts->tv_nsec < 0) { + ts->tv_nsec += 1000000000LL; + ts->tv_sec -= 1; + } + return ret; +} + +int gxio_mpipe_adjust_timestamp(gxio_mpipe_context_t *context, int64_t delta) +{ + return gxio_mpipe_adjust_timestamp_aux(context, delta); +} + +/* Get our internal context used for link name access. This context is + * special in that it is not associated with an mPIPE service domain. + */ +static gxio_mpipe_context_t *_gxio_get_link_context(void) +{ + static gxio_mpipe_context_t context; + static gxio_mpipe_context_t *contextp; + static int tried_open = 0; + static DEFINE_MUTEX(mutex); + + mutex_lock(&mutex); + + if (!tried_open) { + int i = 0; + tried_open = 1; + + /* + * "4" here is the maximum possible number of mPIPE shims; it's + * an exaggeration but we shouldn't ever go beyond 2 anyway. + */ + for (i = 0; i < 4; i++) { + char file[80]; + + snprintf(file, sizeof(file), "mpipe/%d/iorpc_info", i); + context.fd = hv_dev_open((HV_VirtAddr) file, 0); + if (context.fd < 0) + continue; + + contextp = &context; + break; + } + } + + mutex_unlock(&mutex); + + return contextp; +} + +int gxio_mpipe_link_instance(const char *link_name) +{ + _gxio_mpipe_link_name_t name; + gxio_mpipe_context_t *context = _gxio_get_link_context(); + + if (!context) + return GXIO_ERR_NO_DEVICE; + + strncpy(name.name, link_name, sizeof(name.name)); + name.name[GXIO_MPIPE_LINK_NAME_LEN - 1] = '\0'; + + return gxio_mpipe_info_instance_aux(context, name); +} + +int gxio_mpipe_link_enumerate_mac(int idx, char *link_name, uint8_t *link_mac) +{ + int rv; + _gxio_mpipe_link_name_t name; + _gxio_mpipe_link_mac_t mac; + + gxio_mpipe_context_t *context = _gxio_get_link_context(); + if (!context) + return GXIO_ERR_NO_DEVICE; + + rv = gxio_mpipe_info_enumerate_aux(context, idx, &name, &mac); + if (rv >= 0) { + strncpy(link_name, name.name, sizeof(name.name)); + memcpy(link_mac, mac.mac, sizeof(mac.mac)); + } + + return rv; +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_link_enumerate_mac); + +int gxio_mpipe_link_open(gxio_mpipe_link_t *link, + gxio_mpipe_context_t *context, const char *link_name, + unsigned int flags) +{ + _gxio_mpipe_link_name_t name; + int rv; + + strncpy(name.name, link_name, sizeof(name.name)); + name.name[GXIO_MPIPE_LINK_NAME_LEN - 1] = '\0'; + + rv = gxio_mpipe_link_open_aux(context, name, flags); + if (rv < 0) + return rv; + + link->context = context; + link->channel = rv >> 8; + link->mac = rv & 0xFF; + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_link_open); + +int gxio_mpipe_link_close(gxio_mpipe_link_t *link) +{ + return gxio_mpipe_link_close_aux(link->context, link->mac); +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_link_close); + +int gxio_mpipe_link_set_attr(gxio_mpipe_link_t *link, uint32_t attr, + int64_t val) +{ + return gxio_mpipe_link_set_attr_aux(link->context, link->mac, attr, + val); +} + +EXPORT_SYMBOL_GPL(gxio_mpipe_link_set_attr); diff --git a/arch/tile/gxio/trio.c b/arch/tile/gxio/trio.c new file mode 100644 index 00000000000..69f0b8df3ce --- /dev/null +++ b/arch/tile/gxio/trio.c @@ -0,0 +1,49 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * Implementation of trio gxio calls. + */ + +#include <linux/errno.h> +#include <linux/io.h> +#include <linux/module.h> + +#include <gxio/trio.h> +#include <gxio/iorpc_globals.h> +#include <gxio/iorpc_trio.h> +#include <gxio/kiorpc.h> + +int gxio_trio_init(gxio_trio_context_t *context, unsigned int trio_index) +{ + char file[32]; + int fd; + + snprintf(file, sizeof(file), "trio/%d/iorpc", trio_index); + fd = hv_dev_open((HV_VirtAddr) file, 0); + if (fd < 0) { + context->fd = -1; + + if (fd >= GXIO_ERR_MIN && fd <= GXIO_ERR_MAX) + return fd; + else + return -ENODEV; + } + + context->fd = fd; + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_trio_init); diff --git a/arch/tile/gxio/uart.c b/arch/tile/gxio/uart.c new file mode 100644 index 00000000000..ba585175ef8 --- /dev/null +++ b/arch/tile/gxio/uart.c @@ -0,0 +1,87 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * Implementation of UART gxio calls. + */ + +#include <linux/io.h> +#include <linux/errno.h> +#include <linux/module.h> + +#include <gxio/uart.h> +#include <gxio/iorpc_globals.h> +#include <gxio/iorpc_uart.h> +#include <gxio/kiorpc.h> + +int gxio_uart_init(gxio_uart_context_t *context, int uart_index) +{ + char file[32]; + int fd; + + snprintf(file, sizeof(file), "uart/%d/iorpc", uart_index); + fd = hv_dev_open((HV_VirtAddr) file, 0); + if (fd < 0) { + if (fd >= GXIO_ERR_MIN && fd <= GXIO_ERR_MAX) + return fd; + else + return -ENODEV; + } + + context->fd = fd; + + /* Map in the MMIO space. */ + context->mmio_base = (void __force *) + iorpc_ioremap(fd, HV_UART_MMIO_OFFSET, HV_UART_MMIO_SIZE); + + if (context->mmio_base == NULL) { + hv_dev_close(context->fd); + context->fd = -1; + return -ENODEV; + } + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_uart_init); + +int gxio_uart_destroy(gxio_uart_context_t *context) +{ + iounmap((void __force __iomem *)(context->mmio_base)); + hv_dev_close(context->fd); + + context->mmio_base = NULL; + context->fd = -1; + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_uart_destroy); + +/* UART register write wrapper. */ +void gxio_uart_write(gxio_uart_context_t *context, uint64_t offset, + uint64_t word) +{ + __gxio_mmio_write(context->mmio_base + offset, word); +} + +EXPORT_SYMBOL_GPL(gxio_uart_write); + +/* UART register read wrapper. */ +uint64_t gxio_uart_read(gxio_uart_context_t *context, uint64_t offset) +{ + return __gxio_mmio_read(context->mmio_base + offset); +} + +EXPORT_SYMBOL_GPL(gxio_uart_read); diff --git a/arch/tile/gxio/usb_host.c b/arch/tile/gxio/usb_host.c new file mode 100644 index 00000000000..785afad7922 --- /dev/null +++ b/arch/tile/gxio/usb_host.c @@ -0,0 +1,91 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * + * Implementation of USB gxio calls. + */ + +#include <linux/io.h> +#include <linux/errno.h> +#include <linux/module.h> + +#include <gxio/iorpc_globals.h> +#include <gxio/iorpc_usb_host.h> +#include <gxio/kiorpc.h> +#include <gxio/usb_host.h> + +int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index, + int is_ehci) +{ + char file[32]; + int fd; + + if (is_ehci) + snprintf(file, sizeof(file), "usb_host/%d/iorpc/ehci", + usb_index); + else + snprintf(file, sizeof(file), "usb_host/%d/iorpc/ohci", + usb_index); + + fd = hv_dev_open((HV_VirtAddr) file, 0); + if (fd < 0) { + if (fd >= GXIO_ERR_MIN && fd <= GXIO_ERR_MAX) + return fd; + else + return -ENODEV; + } + + context->fd = fd; + + // Map in the MMIO space. + context->mmio_base = + (void __force *)iorpc_ioremap(fd, 0, HV_USB_HOST_MMIO_SIZE); + + if (context->mmio_base == NULL) { + hv_dev_close(context->fd); + return -ENODEV; + } + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_usb_host_init); + +int gxio_usb_host_destroy(gxio_usb_host_context_t *context) +{ + iounmap((void __force __iomem *)(context->mmio_base)); + hv_dev_close(context->fd); + + context->mmio_base = NULL; + context->fd = -1; + + return 0; +} + +EXPORT_SYMBOL_GPL(gxio_usb_host_destroy); + +void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context) +{ + return context->mmio_base; +} + +EXPORT_SYMBOL_GPL(gxio_usb_host_get_reg_start); + +size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context) +{ + return HV_USB_HOST_MMIO_SIZE; +} + +EXPORT_SYMBOL_GPL(gxio_usb_host_get_reg_len); diff --git a/arch/tile/include/arch/Kbuild b/arch/tile/include/arch/Kbuild new file mode 100644 index 00000000000..3751c9fabcf --- /dev/null +++ b/arch/tile/include/arch/Kbuild @@ -0,0 +1 @@ +# Tile arch headers diff --git a/arch/tile/include/arch/interrupts_32.h b/arch/tile/include/arch/interrupts_32.h deleted file mode 100644 index 96b5710505b..00000000000 --- a/arch/tile/include/arch/interrupts_32.h +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#ifndef __ARCH_INTERRUPTS_H__ -#define __ARCH_INTERRUPTS_H__ - -/** Mask for an interrupt. */ -/* Note: must handle breaking interrupts into high and low words manually. */ -#define INT_MASK_LO(intno) (1 << (intno)) -#define INT_MASK_HI(intno) (1 << ((intno) - 32)) - -#ifndef __ASSEMBLER__ -#define INT_MASK(intno) (1ULL << (intno)) -#endif - - -/** Where a given interrupt executes */ -#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8)) - -/** Where to store a vector for a given interrupt. */ -#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0) - -/** The base address of user-level interrupts. */ -#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0) - - -/** Additional synthetic interrupt. */ -#define INT_BREAKPOINT (63) - -#define INT_ITLB_MISS 0 -#define INT_MEM_ERROR 1 -#define INT_ILL 2 -#define INT_GPV 3 -#define INT_SN_ACCESS 4 -#define INT_IDN_ACCESS 5 -#define INT_UDN_ACCESS 6 -#define INT_IDN_REFILL 7 -#define INT_UDN_REFILL 8 -#define INT_IDN_COMPLETE 9 -#define INT_UDN_COMPLETE 10 -#define INT_SWINT_3 11 -#define INT_SWINT_2 12 -#define INT_SWINT_1 13 -#define INT_SWINT_0 14 -#define INT_UNALIGN_DATA 15 -#define INT_DTLB_MISS 16 -#define INT_DTLB_ACCESS 17 -#define INT_DMATLB_MISS 18 -#define INT_DMATLB_ACCESS 19 -#define INT_SNITLB_MISS 20 -#define INT_SN_NOTIFY 21 -#define INT_SN_FIREWALL 22 -#define INT_IDN_FIREWALL 23 -#define INT_UDN_FIREWALL 24 -#define INT_TILE_TIMER 25 -#define INT_IDN_TIMER 26 -#define INT_UDN_TIMER 27 -#define INT_DMA_NOTIFY 28 -#define INT_IDN_CA 29 -#define INT_UDN_CA 30 -#define INT_IDN_AVAIL 31 -#define INT_UDN_AVAIL 32 -#define INT_PERF_COUNT 33 -#define INT_INTCTRL_3 34 -#define INT_INTCTRL_2 35 -#define INT_INTCTRL_1 36 -#define INT_INTCTRL_0 37 -#define INT_BOOT_ACCESS 38 -#define INT_WORLD_ACCESS 39 -#define INT_I_ASID 40 -#define INT_D_ASID 41 -#define INT_DMA_ASID 42 -#define INT_SNI_ASID 43 -#define INT_DMA_CPL 44 -#define INT_SN_CPL 45 -#define INT_DOUBLE_FAULT 46 -#define INT_SN_STATIC_ACCESS 47 -#define INT_AUX_PERF_COUNT 48 - -#define NUM_INTERRUPTS 49 - -#ifndef __ASSEMBLER__ -#define QUEUED_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - 0) -#define NONQUEUED_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ - 0) -#define CRITICAL_MASKED_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - 0) -#define CRITICAL_UNMASKED_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ - 0) -#define MASKABLE_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - 0) -#define UNMASKABLE_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ - 0) -#define SYNC_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ - 0) -#define NON_SYNC_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - 0) -#endif /* !__ASSEMBLER__ */ -#endif /* !__ARCH_INTERRUPTS_H__ */ diff --git a/arch/tile/include/arch/mpipe.h b/arch/tile/include/arch/mpipe.h new file mode 100644 index 00000000000..904538e754d --- /dev/null +++ b/arch/tile/include/arch/mpipe.h @@ -0,0 +1,371 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_MPIPE_H__ +#define __ARCH_MPIPE_H__ + +#include <arch/abi.h> +#include <arch/mpipe_def.h> + +#ifndef __ASSEMBLER__ + +/* + * MMIO Ingress DMA Release Region Address. + * This is a description of the physical addresses used to manipulate ingress + * credit counters. Accesses to this address space should use an address of + * this form and a value like that specified in IDMA_RELEASE_REGION_VAL. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Reserved. */ + uint_reg_t __reserved_0 : 3; + /* NotifRing to be released */ + uint_reg_t ring : 8; + /* Bucket to be released */ + uint_reg_t bucket : 13; + /* Enable NotifRing release */ + uint_reg_t ring_enable : 1; + /* Enable Bucket release */ + uint_reg_t bucket_enable : 1; + /* + * This field of the address selects the region (address space) to be + * accessed. For the iDMA release region, this field must be 4. + */ + uint_reg_t region : 3; + /* Reserved. */ + uint_reg_t __reserved_1 : 6; + /* This field of the address indexes the 32 entry service domain table. */ + uint_reg_t svc_dom : 5; + /* Reserved. */ + uint_reg_t __reserved_2 : 24; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_2 : 24; + uint_reg_t svc_dom : 5; + uint_reg_t __reserved_1 : 6; + uint_reg_t region : 3; + uint_reg_t bucket_enable : 1; + uint_reg_t ring_enable : 1; + uint_reg_t bucket : 13; + uint_reg_t ring : 8; + uint_reg_t __reserved_0 : 3; +#endif + }; + + uint_reg_t word; +} MPIPE_IDMA_RELEASE_REGION_ADDR_t; + +/* + * MMIO Ingress DMA Release Region Value - Release NotifRing and/or Bucket. + * Provides release of the associated NotifRing. The address of the MMIO + * operation is described in IDMA_RELEASE_REGION_ADDR. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * Number of packets being released. The load balancer's count of + * inflight packets will be decremented by this amount for the associated + * Bucket and/or NotifRing + */ + uint_reg_t count : 16; + /* Reserved. */ + uint_reg_t __reserved : 48; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 48; + uint_reg_t count : 16; +#endif + }; + + uint_reg_t word; +} MPIPE_IDMA_RELEASE_REGION_VAL_t; + +/* + * MMIO Buffer Stack Manager Region Address. + * This MMIO region is used for posting or fetching buffers to/from the + * buffer stack manager. On an MMIO load, this pops a buffer descriptor from + * the top of stack if one is available. On an MMIO store, this pushes a + * buffer to the stack. The value read or written is described in + * BSM_REGION_VAL. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Reserved. */ + uint_reg_t __reserved_0 : 3; + /* BufferStack being accessed. */ + uint_reg_t stack : 5; + /* Reserved. */ + uint_reg_t __reserved_1 : 18; + /* + * This field of the address selects the region (address space) to be + * accessed. For the buffer stack manager region, this field must be 6. + */ + uint_reg_t region : 3; + /* Reserved. */ + uint_reg_t __reserved_2 : 6; + /* This field of the address indexes the 32 entry service domain table. */ + uint_reg_t svc_dom : 5; + /* Reserved. */ + uint_reg_t __reserved_3 : 24; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_3 : 24; + uint_reg_t svc_dom : 5; + uint_reg_t __reserved_2 : 6; + uint_reg_t region : 3; + uint_reg_t __reserved_1 : 18; + uint_reg_t stack : 5; + uint_reg_t __reserved_0 : 3; +#endif + }; + + uint_reg_t word; +} MPIPE_BSM_REGION_ADDR_t; + +/* + * MMIO Buffer Stack Manager Region Value. + * This MMIO region is used for posting or fetching buffers to/from the + * buffer stack manager. On an MMIO load, this pops a buffer descriptor from + * the top of stack if one is available. On an MMIO store, this pushes a + * buffer to the stack. The address of the MMIO operation is described in + * BSM_REGION_ADDR. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Reserved. */ + uint_reg_t __reserved_0 : 7; + /* + * Base virtual address of the buffer. Must be sign extended by consumer. + */ + int_reg_t va : 35; + /* Reserved. */ + uint_reg_t __reserved_1 : 6; + /* + * Index of the buffer stack to which this buffer belongs. Ignored on + * writes since the offset bits specify the stack being accessed. + */ + uint_reg_t stack_idx : 5; + /* Reserved. */ + uint_reg_t __reserved_2 : 3; + /* + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. This field is ignored on writes. + */ + uint_reg_t inst : 2; + /* + * Reads as one to indicate that this is a hardware managed buffer. + * Ignored on writes since all buffers on a given stack are the same size. + */ + uint_reg_t hwb : 1; + /* + * Encoded size of buffer (ignored on writes): + * 0 = 128 bytes + * 1 = 256 bytes + * 2 = 512 bytes + * 3 = 1024 bytes + * 4 = 1664 bytes + * 5 = 4096 bytes + * 6 = 10368 bytes + * 7 = 16384 bytes + */ + uint_reg_t size : 3; + /* + * Valid indication for the buffer. Ignored on writes. + * 0 : Valid buffer descriptor popped from stack. + * 3 : Could not pop a buffer from the stack. Either the stack is empty, + * or the hardware's prefetch buffer is empty for this stack. + */ + uint_reg_t c : 2; +#else /* __BIG_ENDIAN__ */ + uint_reg_t c : 2; + uint_reg_t size : 3; + uint_reg_t hwb : 1; + uint_reg_t inst : 2; + uint_reg_t __reserved_2 : 3; + uint_reg_t stack_idx : 5; + uint_reg_t __reserved_1 : 6; + int_reg_t va : 35; + uint_reg_t __reserved_0 : 7; +#endif + }; + + uint_reg_t word; +} MPIPE_BSM_REGION_VAL_t; + +/* + * MMIO Egress DMA Post Region Address. + * Used to post descriptor locations to the eDMA descriptor engine. The + * value to be written is described in EDMA_POST_REGION_VAL + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Reserved. */ + uint_reg_t __reserved_0 : 3; + /* eDMA ring being accessed */ + uint_reg_t ring : 6; + /* Reserved. */ + uint_reg_t __reserved_1 : 17; + /* + * This field of the address selects the region (address space) to be + * accessed. For the egress DMA post region, this field must be 5. + */ + uint_reg_t region : 3; + /* Reserved. */ + uint_reg_t __reserved_2 : 6; + /* This field of the address indexes the 32 entry service domain table. */ + uint_reg_t svc_dom : 5; + /* Reserved. */ + uint_reg_t __reserved_3 : 24; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_3 : 24; + uint_reg_t svc_dom : 5; + uint_reg_t __reserved_2 : 6; + uint_reg_t region : 3; + uint_reg_t __reserved_1 : 17; + uint_reg_t ring : 6; + uint_reg_t __reserved_0 : 3; +#endif + }; + + uint_reg_t word; +} MPIPE_EDMA_POST_REGION_ADDR_t; + +/* + * MMIO Egress DMA Post Region Value. + * Used to post descriptor locations to the eDMA descriptor engine. The + * address is described in EDMA_POST_REGION_ADDR. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * For writes, this specifies the current ring tail pointer prior to any + * post. For example, to post 1 or more descriptors starting at location + * 23, this would contain 23 (not 24). On writes, this index must be + * masked based on the ring size. The new tail pointer after this post + * is COUNT+RING_IDX (masked by the ring size). + * + * For reads, this provides the hardware descriptor fetcher's head + * pointer. The descriptors prior to the head pointer, however, may not + * yet have been processed so this indicator is only used to determine + * how full the ring is and if software may post more descriptors. + */ + uint_reg_t ring_idx : 16; + /* + * For writes, this specifies number of contiguous descriptors that are + * being posted. Software may post up to RingSize descriptors with a + * single MMIO store. A zero in this field on a write will "wake up" an + * eDMA ring and cause it fetch descriptors regardless of the hardware's + * current view of the state of the tail pointer. + * + * For reads, this field provides a rolling count of the number of + * descriptors that have been completely processed. This may be used by + * software to determine when buffers associated with a descriptor may be + * returned or reused. When the ring's flush bit is cleared by software + * (after having been set by HW or SW), the COUNT will be cleared. + */ + uint_reg_t count : 16; + /* + * For writes, this specifies the generation number of the tail being + * posted. Note that if tail+cnt wraps to the beginning of the ring, the + * eDMA hardware assumes that the descriptors posted at the beginning of + * the ring are also valid so it is okay to post around the wrap point. + * + * For reads, this is the current generation number. Valid descriptors + * will have the inverse of this generation number. + */ + uint_reg_t gen : 1; + /* Reserved. */ + uint_reg_t __reserved : 31; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 31; + uint_reg_t gen : 1; + uint_reg_t count : 16; + uint_reg_t ring_idx : 16; +#endif + }; + + uint_reg_t word; +} MPIPE_EDMA_POST_REGION_VAL_t; + +/* + * Load Balancer Bucket Status Data. + * Read/Write data for load balancer Bucket-Status Table. 4160 entries + * indexed by LBL_INIT_CTL.IDX when LBL_INIT_CTL.STRUCT_SEL is BSTS_TBL + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* NotifRing currently assigned to this bucket. */ + uint_reg_t notifring : 8; + /* Current reference count. */ + uint_reg_t count : 16; + /* Group associated with this bucket. */ + uint_reg_t group : 5; + /* Mode select for this bucket. */ + uint_reg_t mode : 3; + /* Reserved. */ + uint_reg_t __reserved : 32; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 32; + uint_reg_t mode : 3; + uint_reg_t group : 5; + uint_reg_t count : 16; + uint_reg_t notifring : 8; +#endif + }; + + uint_reg_t word; +} MPIPE_LBL_INIT_DAT_BSTS_TBL_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_MPIPE_H__) */ diff --git a/arch/tile/include/arch/mpipe_constants.h b/arch/tile/include/arch/mpipe_constants.h new file mode 100644 index 00000000000..84022ac5fe8 --- /dev/null +++ b/arch/tile/include/arch/mpipe_constants.h @@ -0,0 +1,42 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + + +#ifndef __ARCH_MPIPE_CONSTANTS_H__ +#define __ARCH_MPIPE_CONSTANTS_H__ + +#define MPIPE_NUM_CLASSIFIERS 16 +#define MPIPE_CLS_MHZ 1200 + +#define MPIPE_NUM_EDMA_RINGS 64 + +#define MPIPE_NUM_SGMII_MACS 16 +#define MPIPE_NUM_XAUI_MACS 16 +#define MPIPE_NUM_LOOPBACK_CHANNELS 4 +#define MPIPE_NUM_NON_LB_CHANNELS 28 + +#define MPIPE_NUM_IPKT_BLOCKS 1536 + +#define MPIPE_NUM_BUCKETS 4160 + +#define MPIPE_NUM_NOTIF_RINGS 256 + +#define MPIPE_NUM_NOTIF_GROUPS 32 + +#define MPIPE_NUM_TLBS_PER_ASID 16 +#define MPIPE_TLB_IDX_WIDTH 4 + +#define MPIPE_MMIO_NUM_SVC_DOM 32 + +#endif /* __ARCH_MPIPE_CONSTANTS_H__ */ diff --git a/arch/tile/include/arch/mpipe_def.h b/arch/tile/include/arch/mpipe_def.h new file mode 100644 index 00000000000..c3d30217fc6 --- /dev/null +++ b/arch/tile/include/arch/mpipe_def.h @@ -0,0 +1,39 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_MPIPE_DEF_H__ +#define __ARCH_MPIPE_DEF_H__ +#define MPIPE_MMIO_ADDR__REGION_SHIFT 26 +#define MPIPE_MMIO_ADDR__REGION_VAL_CFG 0x0 +#define MPIPE_MMIO_ADDR__REGION_VAL_IDMA 0x4 +#define MPIPE_MMIO_ADDR__REGION_VAL_EDMA 0x5 +#define MPIPE_MMIO_ADDR__REGION_VAL_BSM 0x6 +#define MPIPE_BSM_REGION_VAL__VA_SHIFT 7 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_128 0x0 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_256 0x1 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_512 0x2 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1024 0x3 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1664 0x4 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_4096 0x5 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_10368 0x6 +#define MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_16384 0x7 +#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_DFA 0x0 +#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_FIXED 0x1 +#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_ALWAYS_PICK 0x2 +#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY 0x3 +#define MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY_RAND 0x7 +#define MPIPE_LBL_NR_STATE__FIRST_WORD 0x2138 +#endif /* !defined(__ARCH_MPIPE_DEF_H__) */ diff --git a/arch/tile/include/arch/mpipe_shm.h b/arch/tile/include/arch/mpipe_shm.h new file mode 100644 index 00000000000..13b3c4300e5 --- /dev/null +++ b/arch/tile/include/arch/mpipe_shm.h @@ -0,0 +1,521 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + + +#ifndef __ARCH_MPIPE_SHM_H__ +#define __ARCH_MPIPE_SHM_H__ + +#include <arch/abi.h> +#include <arch/mpipe_shm_def.h> + +#ifndef __ASSEMBLER__ +/** + * MPIPE eDMA Descriptor. + * The eDMA descriptor is written by software and consumed by hardware. It + * is used to specify the location of egress packet data to be sent out of + * the chip via one of the packet interfaces. + */ + +__extension__ +typedef union +{ + struct + { + /* Word 0 */ + +#ifndef __BIG_ENDIAN__ + /** + * Generation number. Used to indicate a valid descriptor in ring. When + * a new descriptor is written into the ring, software must toggle this + * bit. The net effect is that the GEN bit being written into new + * descriptors toggles each time the ring tail pointer wraps. + */ + uint_reg_t gen : 1; + /** + * For devices with EDMA reorder support, this field allows the + * descriptor to select the egress FIFO. The associated DMA ring must + * have ALLOW_EFIFO_SEL enabled. + */ + uint_reg_t efifo_sel : 6; + /** Reserved. Must be zero. */ + uint_reg_t r0 : 1; + /** Checksum generation enabled for this transfer. */ + uint_reg_t csum : 1; + /** + * Nothing to be sent. Used, for example, when software has dropped a + * packet but still wishes to return all of the associated buffers. + */ + uint_reg_t ns : 1; + /** + * Notification interrupt will be delivered when packet has been egressed. + */ + uint_reg_t notif : 1; + /** + * Boundary indicator. When 1, this transfer includes the EOP for this + * command. Must be clear on all but the last descriptor for an egress + * packet. + */ + uint_reg_t bound : 1; + /** Reserved. Must be zero. */ + uint_reg_t r1 : 4; + /** + * Number of bytes to be sent for this descriptor. When zero, no data + * will be moved and the buffer descriptor will be ignored. If the + * buffer descriptor indicates that it is chained, the low 7 bits of the + * VA indicate the offset within the first buffer (e.g. 127 bytes is the + * maximum offset into the first buffer). If the size exceeds a single + * buffer, subsequent buffer descriptors will be fetched prior to + * processing the next eDMA descriptor in the ring. + */ + uint_reg_t xfer_size : 14; + /** Reserved. Must be zero. */ + uint_reg_t r2 : 2; + /** + * Destination of checksum relative to CSUM_START relative to the first + * byte moved by this descriptor. Must be zero if CSUM=0 in this + * descriptor. Must be less than XFER_SIZE (e.g. the first byte of the + * CSUM_DEST must be within the span of this descriptor). + */ + uint_reg_t csum_dest : 8; + /** + * Start byte of checksum relative to the first byte moved by this + * descriptor. If this is not the first descriptor for the egress + * packet, CSUM_START is still relative to the first byte in this + * descriptor. Must be zero if CSUM=0 in this descriptor. + */ + uint_reg_t csum_start : 8; + /** + * Initial value for 16-bit 1's compliment checksum if enabled via CSUM. + * Specified in network order. That is, bits[7:0] will be added to the + * byte pointed to by CSUM_START and bits[15:8] will be added to the byte + * pointed to by CSUM_START+1 (with appropriate 1's compliment carries). + * Must be zero if CSUM=0 in this descriptor. + */ + uint_reg_t csum_seed : 16; +#else /* __BIG_ENDIAN__ */ + uint_reg_t csum_seed : 16; + uint_reg_t csum_start : 8; + uint_reg_t csum_dest : 8; + uint_reg_t r2 : 2; + uint_reg_t xfer_size : 14; + uint_reg_t r1 : 4; + uint_reg_t bound : 1; + uint_reg_t notif : 1; + uint_reg_t ns : 1; + uint_reg_t csum : 1; + uint_reg_t r0 : 1; + uint_reg_t efifo_sel : 6; + uint_reg_t gen : 1; +#endif + + /* Word 1 */ + +#ifndef __BIG_ENDIAN__ + /** Virtual address. Must be sign extended by consumer. */ + int_reg_t va : 42; + /** Reserved. */ + uint_reg_t __reserved_0 : 6; + /** Index of the buffer stack to which this buffer belongs. */ + uint_reg_t stack_idx : 5; + /** Reserved. */ + uint_reg_t __reserved_1 : 3; + /** + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. + */ + uint_reg_t inst : 2; + /** + * Always set to one by hardware in iDMA packet descriptors. For eDMA, + * indicates whether the buffer will be released to the buffer stack + * manager. When 0, software is responsible for releasing the buffer. + */ + uint_reg_t hwb : 1; + /** + * Encoded size of buffer. Set by the ingress hardware for iDMA packet + * descriptors. For eDMA descriptors, indicates the buffer size if .c + * indicates a chained packet. If an eDMA descriptor is not chained and + * the .hwb bit is not set, this field is ignored and the size is + * specified by the .xfer_size field. + * 0 = 128 bytes + * 1 = 256 bytes + * 2 = 512 bytes + * 3 = 1024 bytes + * 4 = 1664 bytes + * 5 = 4096 bytes + * 6 = 10368 bytes + * 7 = 16384 bytes + */ + uint_reg_t size : 3; + /** + * Chaining configuration for the buffer. Indicates that an ingress + * packet or egress command is chained across multiple buffers, with each + * buffer's size indicated by the .size field. + */ + uint_reg_t c : 2; +#else /* __BIG_ENDIAN__ */ + uint_reg_t c : 2; + uint_reg_t size : 3; + uint_reg_t hwb : 1; + uint_reg_t inst : 2; + uint_reg_t __reserved_1 : 3; + uint_reg_t stack_idx : 5; + uint_reg_t __reserved_0 : 6; + int_reg_t va : 42; +#endif + + }; + + /** Word access */ + uint_reg_t words[2]; +} MPIPE_EDMA_DESC_t; + +/** + * MPIPE Packet Descriptor. + * The packet descriptor is filled by the mPIPE's classification, + * load-balancing, and buffer management services. Some fields are consumed + * by mPIPE hardware, and others are consumed by Tile software. + */ + +__extension__ +typedef union +{ + struct + { + /* Word 0 */ + +#ifndef __BIG_ENDIAN__ + /** + * Notification ring into which this packet descriptor is written. + * Typically written by load balancer, but can be overridden by + * classification program if NR is asserted. + */ + uint_reg_t notif_ring : 8; + /** Source channel for this packet. Written by mPIPE DMA hardware. */ + uint_reg_t channel : 5; + /** Reserved. */ + uint_reg_t __reserved_0 : 1; + /** + * MAC Error. + * Generated by the MAC interface. Asserted if there was an overrun of + * the MAC's receive FIFO. This condition generally only occurs if the + * mPIPE clock is running too slowly. + */ + uint_reg_t me : 1; + /** + * Truncation Error. + * Written by the iDMA hardware. Asserted if packet was truncated due to + * insufficient space in iPkt buffer + */ + uint_reg_t tr : 1; + /** + * Written by the iDMA hardware. Indicates the number of bytes written + * to Tile memory. In general, this is the actual size of the packet as + * received from the MAC. But if the packet is truncated due to running + * out of buffers or due to the iPkt buffer filling up, then the L2_SIZE + * will be reduced to reflect the actual number of valid bytes written to + * Tile memory. + */ + uint_reg_t l2_size : 14; + /** + * CRC Error. + * Generated by the MAC. Asserted if MAC indicated an L2 CRC error or + * other L2 error (bad length etc.) on the packet. + */ + uint_reg_t ce : 1; + /** + * Cut Through. + * Written by the iDMA hardware. Asserted if packet was not completely + * received before being sent to classifier. L2_Size will indicate + * number of bytes received so far. + */ + uint_reg_t ct : 1; + /** + * Written by the classification program. Used by the load balancer to + * select the ring into which this packet descriptor is written. + */ + uint_reg_t bucket_id : 13; + /** Reserved. */ + uint_reg_t __reserved_1 : 3; + /** + * Checksum. + * Written by classification program. When 1, the checksum engine will + * perform checksum based on the CSUM_SEED, CSUM_START, and CSUM_BYTES + * fields. The result will be placed in CSUM_VAL. + */ + uint_reg_t cs : 1; + /** + * Notification Ring Select. + * Written by the classification program. When 1, the NotifRingIDX is + * set by classification program rather than being set by load balancer. + */ + uint_reg_t nr : 1; + /** + * Written by classification program. Indicates whether packet and + * descriptor should both be dropped, both be delivered, or only the + * descriptor should be delivered. + */ + uint_reg_t dest : 2; + /** + * General Purpose Sequence Number Enable. + * Written by the classification program. When 1, the GP_SQN_SEL field + * contains the sequence number selector and the GP_SQN field will be + * replaced with the associated sequence number. When clear, the GP_SQN + * field is left intact and be used as "Custom" bytes. + */ + uint_reg_t sq : 1; + /** + * TimeStamp Enable. + * Enable TimeStamp insertion. When clear, timestamp field may be filled + * with custom data by classifier. When set, hardware inserts the + * timestamp when the start of packet is received from the MAC. + */ + uint_reg_t ts : 1; + /** + * Packet Sequence Number Enable. + * Enable PacketSQN insertion. When clear, PacketSQN field may be filled + * with custom data by classifier. When set, hardware inserts the packet + * sequence number when the packet descriptor is written to a + * notification ring. + */ + uint_reg_t ps : 1; + /** + * Buffer Error. + * Written by the iDMA hardware. Asserted if iDMA ran out of buffers + * while writing the packet. Software must still return any buffer + * descriptors whose C field indicates a valid descriptor was consumed. + */ + uint_reg_t be : 1; + /** + * Written by the classification program. The associated counter is + * incremented when the packet is sent. + */ + uint_reg_t ctr0 : 5; + /** Reserved. */ + uint_reg_t __reserved_2 : 3; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_2 : 3; + uint_reg_t ctr0 : 5; + uint_reg_t be : 1; + uint_reg_t ps : 1; + uint_reg_t ts : 1; + uint_reg_t sq : 1; + uint_reg_t dest : 2; + uint_reg_t nr : 1; + uint_reg_t cs : 1; + uint_reg_t __reserved_1 : 3; + uint_reg_t bucket_id : 13; + uint_reg_t ct : 1; + uint_reg_t ce : 1; + uint_reg_t l2_size : 14; + uint_reg_t tr : 1; + uint_reg_t me : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t channel : 5; + uint_reg_t notif_ring : 8; +#endif + + /* Word 1 */ + +#ifndef __BIG_ENDIAN__ + /** + * Written by the classification program. The associated counter is + * incremented when the packet is sent. + */ + uint_reg_t ctr1 : 5; + /** Reserved. */ + uint_reg_t __reserved_3 : 3; + /** + * Written by classification program. Indicates the start byte for + * checksum. Relative to 1st byte received from MAC. + */ + uint_reg_t csum_start : 8; + /** + * Checksum seed written by classification program. Overwritten with + * resultant checksum if CS bit is asserted. The endianness of the CSUM + * value bits when viewed by Tile software match the packet byte order. + * That is, bits[7:0] of the resulting checksum value correspond to + * earlier (more significant) bytes in the packet. To avoid classifier + * software from having to byte swap the CSUM_SEED, the iDMA checksum + * engine byte swaps the classifier's result before seeding the checksum + * calculation. Thus, the CSUM_START byte of packet data is added to + * bits[15:8] of the CSUM_SEED field generated by the classifier. This + * byte swap will be visible to Tile software if the CS bit is clear. + */ + uint_reg_t csum_seed_val : 16; + /** + * Written by the classification program. Not interpreted by mPIPE + * hardware. + */ + uint_reg_t custom0 : 32; +#else /* __BIG_ENDIAN__ */ + uint_reg_t custom0 : 32; + uint_reg_t csum_seed_val : 16; + uint_reg_t csum_start : 8; + uint_reg_t __reserved_3 : 3; + uint_reg_t ctr1 : 5; +#endif + + /* Word 2 */ + +#ifndef __BIG_ENDIAN__ + /** + * Written by the classification program. Not interpreted by mPIPE + * hardware. + */ + uint_reg_t custom1 : 64; +#else /* __BIG_ENDIAN__ */ + uint_reg_t custom1 : 64; +#endif + + /* Word 3 */ + +#ifndef __BIG_ENDIAN__ + /** + * Written by the classification program. Not interpreted by mPIPE + * hardware. + */ + uint_reg_t custom2 : 64; +#else /* __BIG_ENDIAN__ */ + uint_reg_t custom2 : 64; +#endif + + /* Word 4 */ + +#ifndef __BIG_ENDIAN__ + /** + * Written by the classification program. Not interpreted by mPIPE + * hardware. + */ + uint_reg_t custom3 : 64; +#else /* __BIG_ENDIAN__ */ + uint_reg_t custom3 : 64; +#endif + + /* Word 5 */ + +#ifndef __BIG_ENDIAN__ + /** + * Sequence number applied when packet is distributed. Classifier + * selects which sequence number is to be applied by writing the 13-bit + * SQN-selector into this field. For devices that support EXT_SQN (as + * indicated in IDMA_INFO.EXT_SQN_SUPPORT), the GP_SQN can be extended to + * 32-bits via the IDMA_CTL.EXT_SQN register. In this case the + * PACKET_SQN will be reduced to 32 bits. + */ + uint_reg_t gp_sqn : 16; + /** + * Written by notification hardware. The packet sequence number is + * incremented for each packet that wasn't dropped. + */ + uint_reg_t packet_sqn : 48; +#else /* __BIG_ENDIAN__ */ + uint_reg_t packet_sqn : 48; + uint_reg_t gp_sqn : 16; +#endif + + /* Word 6 */ + +#ifndef __BIG_ENDIAN__ + /** + * Written by hardware when the start-of-packet is received by the mPIPE + * from the MAC. This is the nanoseconds part of the packet timestamp. + */ + uint_reg_t time_stamp_ns : 32; + /** + * Written by hardware when the start-of-packet is received by the mPIPE + * from the MAC. This is the seconds part of the packet timestamp. + */ + uint_reg_t time_stamp_sec : 32; +#else /* __BIG_ENDIAN__ */ + uint_reg_t time_stamp_sec : 32; + uint_reg_t time_stamp_ns : 32; +#endif + + /* Word 7 */ + +#ifndef __BIG_ENDIAN__ + /** Virtual address. Must be sign extended by consumer. */ + int_reg_t va : 42; + /** Reserved. */ + uint_reg_t __reserved_4 : 6; + /** Index of the buffer stack to which this buffer belongs. */ + uint_reg_t stack_idx : 5; + /** Reserved. */ + uint_reg_t __reserved_5 : 3; + /** + * Instance ID. For devices that support automatic buffer return between + * mPIPE instances, this field indicates the buffer owner. If the INST + * field does not match the mPIPE's instance number when a packet is + * egressed, buffers with HWB set will be returned to the other mPIPE + * instance. Note that not all devices support multi-mPIPE buffer + * return. The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates + * whether the INST field in the buffer descriptor is populated by iDMA + * hardware. + */ + uint_reg_t inst : 2; + /** + * Always set to one by hardware in iDMA packet descriptors. For eDMA, + * indicates whether the buffer will be released to the buffer stack + * manager. When 0, software is responsible for releasing the buffer. + */ + uint_reg_t hwb : 1; + /** + * Encoded size of buffer. Set by the ingress hardware for iDMA packet + * descriptors. For eDMA descriptors, indicates the buffer size if .c + * indicates a chained packet. If an eDMA descriptor is not chained and + * the .hwb bit is not set, this field is ignored and the size is + * specified by the .xfer_size field. + * 0 = 128 bytes + * 1 = 256 bytes + * 2 = 512 bytes + * 3 = 1024 bytes + * 4 = 1664 bytes + * 5 = 4096 bytes + * 6 = 10368 bytes + * 7 = 16384 bytes + */ + uint_reg_t size : 3; + /** + * Chaining configuration for the buffer. Indicates that an ingress + * packet or egress command is chained across multiple buffers, with each + * buffer's size indicated by the .size field. + */ + uint_reg_t c : 2; +#else /* __BIG_ENDIAN__ */ + uint_reg_t c : 2; + uint_reg_t size : 3; + uint_reg_t hwb : 1; + uint_reg_t inst : 2; + uint_reg_t __reserved_5 : 3; + uint_reg_t stack_idx : 5; + uint_reg_t __reserved_4 : 6; + int_reg_t va : 42; +#endif + + }; + + /** Word access */ + uint_reg_t words[8]; +} MPIPE_PDESC_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_MPIPE_SHM_H__) */ diff --git a/arch/tile/include/arch/mpipe_shm_def.h b/arch/tile/include/arch/mpipe_shm_def.h new file mode 100644 index 00000000000..6124d39c831 --- /dev/null +++ b/arch/tile/include/arch/mpipe_shm_def.h @@ -0,0 +1,23 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_MPIPE_SHM_DEF_H__ +#define __ARCH_MPIPE_SHM_DEF_H__ +#define MPIPE_EDMA_DESC_WORD1__C_VAL_UNCHAINED 0x0 +#define MPIPE_EDMA_DESC_WORD1__C_VAL_CHAINED 0x1 +#define MPIPE_EDMA_DESC_WORD1__C_VAL_NOT_RDY 0x2 +#define MPIPE_EDMA_DESC_WORD1__C_VAL_INVALID 0x3 +#endif /* !defined(__ARCH_MPIPE_SHM_DEF_H__) */ diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h index 442fcba0d12..2de83e7aff3 100644 --- a/arch/tile/include/arch/spr_def.h +++ b/arch/tile/include/arch/spr_def.h @@ -11,6 +11,11 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. */ +#ifndef __ARCH_SPR_DEF_H__ +#define __ARCH_SPR_DEF_H__ + +#include <uapi/arch/spr_def.h> + /* * In addition to including the proper base SPR definition file, depending @@ -29,7 +34,6 @@ #define _concat4(a, b, c, d) __concat4(a, b, c, d) #ifdef __tilegx__ -#include <arch/spr_def_64.h> /* TILE-Gx dependent, protection-level dependent SPRs. */ @@ -52,8 +56,8 @@ _concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,) #define SPR_IPI_EVENT_RESET_K \ _concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,) -#define SPR_IPI_MASK_SET_K \ - _concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_EVENT_SET_K \ + _concat4(SPR_IPI_EVENT_SET_, CONFIG_KERNEL_PL,,) #define INT_IPI_K \ _concat4(INT_IPI_, CONFIG_KERNEL_PL,,) @@ -65,7 +69,6 @@ _concat4(INT_SINGLE_STEP_, CONFIG_KERNEL_PL,,) #else -#include <arch/spr_def_32.h> /* TILEPro dependent, protection-level dependent SPRs. */ @@ -102,3 +105,5 @@ _concat4(SPR_INTCTRL_, CONFIG_KERNEL_PL, _STATUS,) #define INT_INTCTRL_K \ _concat4(INT_INTCTRL_, CONFIG_KERNEL_PL,,) + +#endif /* __ARCH_SPR_DEF_H__ */ diff --git a/arch/tile/include/arch/trio.h b/arch/tile/include/arch/trio.h new file mode 100644 index 00000000000..c0ddedcae08 --- /dev/null +++ b/arch/tile/include/arch/trio.h @@ -0,0 +1,111 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_H__ +#define __ARCH_TRIO_H__ + +#include <arch/abi.h> +#include <arch/trio_def.h> + +#ifndef __ASSEMBLER__ + +/* + * Map SQ Doorbell Format. + * This describes the format of the write-only doorbell register that exists + * in the last 8-bytes of the MAP_SQ_BASE/LIM range. This register is only + * writable from PCIe space. Writes to this register will not be written to + * Tile memory space and thus no IO VA translation is required if the last + * page of the BASE/LIM range is not otherwise written. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * When written with a 1, the associated MAP_SQ region's doorbell + * interrupt will be triggered once all previous writes are visible to + * Tile software. + */ + uint_reg_t doorbell : 1; + /* + * When written with a 1, the descriptor at the head of the associated + * MAP_SQ's FIFO will be dequeued. + */ + uint_reg_t pop : 1; + /* Reserved. */ + uint_reg_t __reserved : 62; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 62; + uint_reg_t pop : 1; + uint_reg_t doorbell : 1; +#endif + }; + + uint_reg_t word; +} TRIO_MAP_SQ_DOORBELL_FMT_t; + + +/* + * Tile PIO Region Configuration - CFG Address Format. + * This register describes the address format for PIO accesses when the + * associated region is setup with TYPE=CFG. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Register Address (full byte address). */ + uint_reg_t reg_addr : 12; + /* Function Number */ + uint_reg_t fn : 3; + /* Device Number */ + uint_reg_t dev : 5; + /* BUS Number */ + uint_reg_t bus : 8; + /* Config Type: 0 for access to directly-attached device. 1 otherwise. */ + uint_reg_t type : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* + * MAC select. This must match the configuration in + * TILE_PIO_REGION_SETUP.MAC. + */ + uint_reg_t mac : 2; + /* Reserved. */ + uint_reg_t __reserved_1 : 32; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_1 : 32; + uint_reg_t mac : 2; + uint_reg_t __reserved_0 : 1; + uint_reg_t type : 1; + uint_reg_t bus : 8; + uint_reg_t dev : 5; + uint_reg_t fn : 3; + uint_reg_t reg_addr : 12; +#endif + }; + + uint_reg_t word; +} TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_TRIO_H__) */ diff --git a/arch/tile/include/arch/trio_constants.h b/arch/tile/include/arch/trio_constants.h new file mode 100644 index 00000000000..85647e91a45 --- /dev/null +++ b/arch/tile/include/arch/trio_constants.h @@ -0,0 +1,36 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + + +#ifndef __ARCH_TRIO_CONSTANTS_H__ +#define __ARCH_TRIO_CONSTANTS_H__ + +#define TRIO_NUM_ASIDS 32 +#define TRIO_NUM_TLBS_PER_ASID 16 + +#define TRIO_NUM_TPIO_REGIONS 8 +#define TRIO_LOG2_NUM_TPIO_REGIONS 3 + +#define TRIO_NUM_MAP_MEM_REGIONS 32 +#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 5 +#define TRIO_NUM_MAP_SQ_REGIONS 8 +#define TRIO_LOG2_NUM_MAP_SQ_REGIONS 3 + +#define TRIO_LOG2_NUM_SQ_FIFO_ENTRIES 6 + +#define TRIO_NUM_PUSH_DMA_RINGS 64 + +#define TRIO_NUM_PULL_DMA_RINGS 64 + +#endif /* __ARCH_TRIO_CONSTANTS_H__ */ diff --git a/arch/tile/include/arch/trio_def.h b/arch/tile/include/arch/trio_def.h new file mode 100644 index 00000000000..e80500317dc --- /dev/null +++ b/arch/tile/include/arch/trio_def.h @@ -0,0 +1,41 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_DEF_H__ +#define __ARCH_TRIO_DEF_H__ +#define TRIO_CFG_REGION_ADDR__REG_SHIFT 0 +#define TRIO_CFG_REGION_ADDR__INTFC_SHIFT 16 +#define TRIO_CFG_REGION_ADDR__INTFC_VAL_TRIO 0x0 +#define TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE 0x1 +#define TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD 0x2 +#define TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_PROTECTED 0x3 +#define TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT 18 +#define TRIO_CFG_REGION_ADDR__PROT_SHIFT 20 +#define TRIO_PIO_REGIONS_ADDR__REGION_SHIFT 32 +#define TRIO_MAP_MEM_REG_INT0 0x1000000000 +#define TRIO_MAP_MEM_REG_INT1 0x1000000008 +#define TRIO_MAP_MEM_REG_INT2 0x1000000010 +#define TRIO_MAP_MEM_REG_INT3 0x1000000018 +#define TRIO_MAP_MEM_REG_INT4 0x1000000020 +#define TRIO_MAP_MEM_REG_INT5 0x1000000028 +#define TRIO_MAP_MEM_REG_INT6 0x1000000030 +#define TRIO_MAP_MEM_REG_INT7 0x1000000038 +#define TRIO_MAP_MEM_LIM__ADDR_SHIFT 12 +#define TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_UNORDERED 0x0 +#define TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_STRICT 0x1 +#define TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_REL_ORD 0x2 +#define TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT 30 +#endif /* !defined(__ARCH_TRIO_DEF_H__) */ diff --git a/arch/tile/include/arch/trio_pcie_intfc.h b/arch/tile/include/arch/trio_pcie_intfc.h new file mode 100644 index 00000000000..0487fdb9d58 --- /dev/null +++ b/arch/tile/include/arch/trio_pcie_intfc.h @@ -0,0 +1,229 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_PCIE_INTFC_H__ +#define __ARCH_TRIO_PCIE_INTFC_H__ + +#include <arch/abi.h> +#include <arch/trio_pcie_intfc_def.h> + +#ifndef __ASSEMBLER__ + +/* + * Port Configuration. + * Configuration of the PCIe Port + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Provides the state of the strapping pins for this port. */ + uint_reg_t strap_state : 3; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* + * When 1, the device type will be overridden using OVD_DEV_TYPE_VAL. + * When 0, the device type is determined based on the STRAP_STATE. + */ + uint_reg_t ovd_dev_type : 1; + /* Provides the device type when OVD_DEV_TYPE is 1. */ + uint_reg_t ovd_dev_type_val : 4; + /* Determines how link is trained. */ + uint_reg_t train_mode : 2; + /* Reserved. */ + uint_reg_t __reserved_1 : 1; + /* + * For PCIe, used to flip physical RX lanes that were not properly wired. + * This is not the same as lane reversal which is handled automatically + * during link training. When 0, RX Lane0 must be wired to the link + * partner (either to its Lane0 or it's LaneN). When RX_LANE_FLIP is 1, + * the highest numbered lane for this port becomes Lane0 and Lane0 does + * NOT have to be wired to the link partner. + */ + uint_reg_t rx_lane_flip : 1; + /* + * For PCIe, used to flip physical TX lanes that were not properly wired. + * This is not the same as lane reversal which is handled automatically + * during link training. When 0, TX Lane0 must be wired to the link + * partner (either to its Lane0 or it's LaneN). When TX_LANE_FLIP is 1, + * the highest numbered lane for this port becomes Lane0 and Lane0 does + * NOT have to be wired to the link partner. + */ + uint_reg_t tx_lane_flip : 1; + /* + * For StreamIO port, configures the width of the port when TRAIN_MODE is + * not STRAP. + */ + uint_reg_t stream_width : 2; + /* + * For StreamIO port, configures the rate of the port when TRAIN_MODE is + * not STRAP. + */ + uint_reg_t stream_rate : 2; + /* Reserved. */ + uint_reg_t __reserved_2 : 46; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_2 : 46; + uint_reg_t stream_rate : 2; + uint_reg_t stream_width : 2; + uint_reg_t tx_lane_flip : 1; + uint_reg_t rx_lane_flip : 1; + uint_reg_t __reserved_1 : 1; + uint_reg_t train_mode : 2; + uint_reg_t ovd_dev_type_val : 4; + uint_reg_t ovd_dev_type : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t strap_state : 3; +#endif + }; + + uint_reg_t word; +} TRIO_PCIE_INTFC_PORT_CONFIG_t; + +/* + * Port Status. + * Status of the PCIe Port. This register applies to the StreamIO port when + * StreamIO is enabled. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * Indicates the DL state of the port. When 1, the port is up and ready + * to receive traffic. + */ + uint_reg_t dl_up : 1; + /* + * Indicates the number of times the link has gone down. Clears on read. + */ + uint_reg_t dl_down_cnt : 7; + /* Indicates the SERDES PLL has spun up and is providing a valid clock. */ + uint_reg_t clock_ready : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 7; + /* Device revision ID. */ + uint_reg_t device_rev : 8; + /* Link state (PCIe). */ + uint_reg_t ltssm_state : 6; + /* Link power management state (PCIe). */ + uint_reg_t pm_state : 3; + /* Reserved. */ + uint_reg_t __reserved_1 : 31; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_1 : 31; + uint_reg_t pm_state : 3; + uint_reg_t ltssm_state : 6; + uint_reg_t device_rev : 8; + uint_reg_t __reserved_0 : 7; + uint_reg_t clock_ready : 1; + uint_reg_t dl_down_cnt : 7; + uint_reg_t dl_up : 1; +#endif + }; + + uint_reg_t word; +} TRIO_PCIE_INTFC_PORT_STATUS_t; + +/* + * Transmit FIFO Control. + * Contains TX FIFO thresholds. These registers are for diagnostics purposes + * only. Changing these values causes undefined behavior. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * Almost-Empty level for TX0 data. Typically set to at least + * roundup(38.0*M/N) where N=tclk frequency and M=MAC symbol rate in MHz + * for a x4 port (250MHz). + */ + uint_reg_t tx0_data_ae_lvl : 7; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* Almost-Empty level for TX1 data. */ + uint_reg_t tx1_data_ae_lvl : 7; + /* Reserved. */ + uint_reg_t __reserved_1 : 1; + /* Almost-Full level for TX0 data. */ + uint_reg_t tx0_data_af_lvl : 7; + /* Reserved. */ + uint_reg_t __reserved_2 : 1; + /* Almost-Full level for TX1 data. */ + uint_reg_t tx1_data_af_lvl : 7; + /* Reserved. */ + uint_reg_t __reserved_3 : 1; + /* Almost-Full level for TX0 info. */ + uint_reg_t tx0_info_af_lvl : 5; + /* Reserved. */ + uint_reg_t __reserved_4 : 3; + /* Almost-Full level for TX1 info. */ + uint_reg_t tx1_info_af_lvl : 5; + /* Reserved. */ + uint_reg_t __reserved_5 : 3; + /* + * This register provides performance adjustment for high bandwidth + * flows. The MAC will assert almost-full to TRIO if non-posted credits + * fall below this level. Note that setting this larger than the initial + * PORT_CREDIT.NPH value will cause READS to never be sent. If the + * initial credit value from the link partner is smaller than this value + * when the link comes up, the value will be reset to the initial credit + * value to prevent lockup. + */ + uint_reg_t min_np_credits : 8; + /* + * This register provides performance adjustment for high bandwidth + * flows. The MAC will assert almost-full to TRIO if posted credits fall + * below this level. Note that setting this larger than the initial + * PORT_CREDIT.PH value will cause WRITES to never be sent. If the + * initial credit value from the link partner is smaller than this value + * when the link comes up, the value will be reset to the initial credit + * value to prevent lockup. + */ + uint_reg_t min_p_credits : 8; +#else /* __BIG_ENDIAN__ */ + uint_reg_t min_p_credits : 8; + uint_reg_t min_np_credits : 8; + uint_reg_t __reserved_5 : 3; + uint_reg_t tx1_info_af_lvl : 5; + uint_reg_t __reserved_4 : 3; + uint_reg_t tx0_info_af_lvl : 5; + uint_reg_t __reserved_3 : 1; + uint_reg_t tx1_data_af_lvl : 7; + uint_reg_t __reserved_2 : 1; + uint_reg_t tx0_data_af_lvl : 7; + uint_reg_t __reserved_1 : 1; + uint_reg_t tx1_data_ae_lvl : 7; + uint_reg_t __reserved_0 : 1; + uint_reg_t tx0_data_ae_lvl : 7; +#endif + }; + + uint_reg_t word; +} TRIO_PCIE_INTFC_TX_FIFO_CTL_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_TRIO_PCIE_INTFC_H__) */ diff --git a/arch/tile/include/arch/trio_pcie_intfc_def.h b/arch/tile/include/arch/trio_pcie_intfc_def.h new file mode 100644 index 00000000000..d3fd6781fb2 --- /dev/null +++ b/arch/tile/include/arch/trio_pcie_intfc_def.h @@ -0,0 +1,32 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_PCIE_INTFC_DEF_H__ +#define __ARCH_TRIO_PCIE_INTFC_DEF_H__ +#define TRIO_PCIE_INTFC_MAC_INT_STS 0x0000 +#define TRIO_PCIE_INTFC_MAC_INT_STS__INT_LEVEL_MASK 0xf000 +#define TRIO_PCIE_INTFC_PORT_CONFIG 0x0018 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_DISABLED 0x0 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT 0x1 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC 0x2 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT_G1 0x3 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC_G1 0x4 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_XLINK 0x5 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_STREAM_X1 0x6 +#define TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_STREAM_X4 0x7 +#define TRIO_PCIE_INTFC_PORT_STATUS 0x0020 +#define TRIO_PCIE_INTFC_TX_FIFO_CTL 0x0050 +#endif /* !defined(__ARCH_TRIO_PCIE_INTFC_DEF_H__) */ diff --git a/arch/tile/include/arch/trio_pcie_rc.h b/arch/tile/include/arch/trio_pcie_rc.h new file mode 100644 index 00000000000..6a25d0aca85 --- /dev/null +++ b/arch/tile/include/arch/trio_pcie_rc.h @@ -0,0 +1,156 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_PCIE_RC_H__ +#define __ARCH_TRIO_PCIE_RC_H__ + +#include <arch/abi.h> +#include <arch/trio_pcie_rc_def.h> + +#ifndef __ASSEMBLER__ + +/* Device Capabilities Register. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * Max_Payload_Size Supported, writablethrough the MAC_STANDARD interface + */ + uint_reg_t mps_sup : 3; + /* + * This field is writable through the MAC_STANDARD interface. However, + * Phantom Function is not supported. Therefore, the application must + * not write any value other than 0x0 to this field. + */ + uint_reg_t phantom_function_supported : 2; + /* This bit is writable through the MAC_STANDARD interface. */ + uint_reg_t ext_tag_field_supported : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 3; + /* Endpoint L1 Acceptable Latency Must be 0x0 for non-Endpoint devices. */ + uint_reg_t l1_lat : 3; + /* + * Undefined since PCI Express 1.1 (Was Attention Button Present for PCI + * Express 1.0a) + */ + uint_reg_t r1 : 1; + /* + * Undefined since PCI Express 1.1 (Was Attention Indicator Present for + * PCI Express 1.0a) + */ + uint_reg_t r2 : 1; + /* + * Undefined since PCI Express 1.1 (Was Power Indicator Present for PCI + * Express 1.0a) + */ + uint_reg_t r3 : 1; + /* + * Role-Based Error Reporting, writable through the MAC_STANDARD + * interface. Required to be set for device compliant to 1.1 spec and + * later. + */ + uint_reg_t rer : 1; + /* Reserved. */ + uint_reg_t __reserved_1 : 2; + /* Captured Slot Power Limit Value Upstream port only. */ + uint_reg_t slot_pwr_lim : 8; + /* Captured Slot Power Limit Scale Upstream port only. */ + uint_reg_t slot_pwr_scale : 2; + /* Reserved. */ + uint_reg_t __reserved_2 : 4; + /* Endpoint L0s Acceptable LatencyMust be 0x0 for non-Endpoint devices. */ + uint_reg_t l0s_lat : 1; + /* Reserved. */ + uint_reg_t __reserved_3 : 31; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_3 : 31; + uint_reg_t l0s_lat : 1; + uint_reg_t __reserved_2 : 4; + uint_reg_t slot_pwr_scale : 2; + uint_reg_t slot_pwr_lim : 8; + uint_reg_t __reserved_1 : 2; + uint_reg_t rer : 1; + uint_reg_t r3 : 1; + uint_reg_t r2 : 1; + uint_reg_t r1 : 1; + uint_reg_t l1_lat : 3; + uint_reg_t __reserved_0 : 3; + uint_reg_t ext_tag_field_supported : 1; + uint_reg_t phantom_function_supported : 2; + uint_reg_t mps_sup : 3; +#endif + }; + + uint_reg_t word; +} TRIO_PCIE_RC_DEVICE_CAP_t; + +/* Device Control Register. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Correctable Error Reporting Enable */ + uint_reg_t cor_err_ena : 1; + /* Non-Fatal Error Reporting Enable */ + uint_reg_t nf_err_ena : 1; + /* Fatal Error Reporting Enable */ + uint_reg_t fatal_err_ena : 1; + /* Unsupported Request Reporting Enable */ + uint_reg_t ur_ena : 1; + /* Relaxed orderring enable */ + uint_reg_t ro_ena : 1; + /* Max Payload Size */ + uint_reg_t max_payload_size : 3; + /* Extended Tag Field Enable */ + uint_reg_t ext_tag : 1; + /* Phantom Function Enable */ + uint_reg_t ph_fn_ena : 1; + /* AUX Power PM Enable */ + uint_reg_t aux_pm_ena : 1; + /* Enable NoSnoop */ + uint_reg_t no_snoop : 1; + /* Max read request size */ + uint_reg_t max_read_req_sz : 3; + /* Reserved. */ + uint_reg_t __reserved : 49; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 49; + uint_reg_t max_read_req_sz : 3; + uint_reg_t no_snoop : 1; + uint_reg_t aux_pm_ena : 1; + uint_reg_t ph_fn_ena : 1; + uint_reg_t ext_tag : 1; + uint_reg_t max_payload_size : 3; + uint_reg_t ro_ena : 1; + uint_reg_t ur_ena : 1; + uint_reg_t fatal_err_ena : 1; + uint_reg_t nf_err_ena : 1; + uint_reg_t cor_err_ena : 1; +#endif + }; + + uint_reg_t word; +} TRIO_PCIE_RC_DEVICE_CONTROL_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_TRIO_PCIE_RC_H__) */ diff --git a/arch/tile/include/arch/trio_pcie_rc_def.h b/arch/tile/include/arch/trio_pcie_rc_def.h new file mode 100644 index 00000000000..74081a65b6f --- /dev/null +++ b/arch/tile/include/arch/trio_pcie_rc_def.h @@ -0,0 +1,24 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_PCIE_RC_DEF_H__ +#define __ARCH_TRIO_PCIE_RC_DEF_H__ +#define TRIO_PCIE_RC_DEVICE_CAP 0x0074 +#define TRIO_PCIE_RC_DEVICE_CONTROL 0x0078 +#define TRIO_PCIE_RC_DEVICE_ID_VEN_ID 0x0000 +#define TRIO_PCIE_RC_DEVICE_ID_VEN_ID__DEV_ID_SHIFT 16 +#define TRIO_PCIE_RC_REVISION_ID 0x0008 +#endif /* !defined(__ARCH_TRIO_PCIE_RC_DEF_H__) */ diff --git a/arch/tile/include/arch/trio_shm.h b/arch/tile/include/arch/trio_shm.h new file mode 100644 index 00000000000..3382e38245a --- /dev/null +++ b/arch/tile/include/arch/trio_shm.h @@ -0,0 +1,125 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + + +#ifndef __ARCH_TRIO_SHM_H__ +#define __ARCH_TRIO_SHM_H__ + +#include <arch/abi.h> +#include <arch/trio_shm_def.h> + +#ifndef __ASSEMBLER__ +/** + * TRIO DMA Descriptor. + * The TRIO DMA descriptor is written by software and consumed by hardware. + * It is used to specify the location of transaction data in the IO and Tile + * domains. + */ + +__extension__ +typedef union +{ + struct + { + /* Word 0 */ + +#ifndef __BIG_ENDIAN__ + /** Tile side virtual address. */ + int_reg_t va : 42; + /** + * Encoded size of buffer used on push DMA when C=1: + * 0 = 128 bytes + * 1 = 256 bytes + * 2 = 512 bytes + * 3 = 1024 bytes + * 4 = 1664 bytes + * 5 = 4096 bytes + * 6 = 10368 bytes + * 7 = 16384 bytes + */ + uint_reg_t bsz : 3; + /** + * Chaining designation. Always zero for pull DMA + * 0 : Unchained buffer pointer + * 1 : Chained buffer pointer. Next buffer descriptor (e.g. VA) stored + * in 1st 8-bytes in buffer. For chained buffers, first 8-bytes of each + * buffer contain the next buffer descriptor formatted exactly like a PDE + * buffer descriptor. This allows a chained PDE buffer to be sent using + * push DMA. + */ + uint_reg_t c : 1; + /** + * Notification interrupt will be delivered when the transaction has + * completed (all data has been read from or written to the Tile-side + * buffer). + */ + uint_reg_t notif : 1; + /** + * When 0, the XSIZE field specifies the total byte count for the + * transaction. When 1, the XSIZE field is encoded as 2^(N+14) for N in + * {0..6}: + * 0 = 16KB + * 1 = 32KB + * 2 = 64KB + * 3 = 128KB + * 4 = 256KB + * 5 = 512KB + * 6 = 1MB + * All other encodings of the XSIZE field are reserved when SMOD=1 + */ + uint_reg_t smod : 1; + /** + * Total number of bytes to move for this transaction. When SMOD=1, + * this field is encoded - see SMOD description. + */ + uint_reg_t xsize : 14; + /** Reserved. */ + uint_reg_t __reserved_0 : 1; + /** + * Generation number. Used to indicate a valid descriptor in ring. When + * a new descriptor is written into the ring, software must toggle this + * bit. The net effect is that the GEN bit being written into new + * descriptors toggles each time the ring tail pointer wraps. + */ + uint_reg_t gen : 1; +#else /* __BIG_ENDIAN__ */ + uint_reg_t gen : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t xsize : 14; + uint_reg_t smod : 1; + uint_reg_t notif : 1; + uint_reg_t c : 1; + uint_reg_t bsz : 3; + int_reg_t va : 42; +#endif + + /* Word 1 */ + +#ifndef __BIG_ENDIAN__ + /** IO-side address */ + uint_reg_t io_address : 64; +#else /* __BIG_ENDIAN__ */ + uint_reg_t io_address : 64; +#endif + + }; + + /** Word access */ + uint_reg_t words[2]; +} TRIO_DMA_DESC_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_TRIO_SHM_H__) */ diff --git a/arch/tile/include/arch/trio_shm_def.h b/arch/tile/include/arch/trio_shm_def.h new file mode 100644 index 00000000000..72a59c88b06 --- /dev/null +++ b/arch/tile/include/arch/trio_shm_def.h @@ -0,0 +1,19 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_TRIO_SHM_DEF_H__ +#define __ARCH_TRIO_SHM_DEF_H__ +#endif /* !defined(__ARCH_TRIO_SHM_DEF_H__) */ diff --git a/arch/tile/include/arch/uart.h b/arch/tile/include/arch/uart.h new file mode 100644 index 00000000000..07966970ada --- /dev/null +++ b/arch/tile/include/arch/uart.h @@ -0,0 +1,300 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_UART_H__ +#define __ARCH_UART_H__ + +#include <arch/abi.h> +#include <arch/uart_def.h> + +#ifndef __ASSEMBLER__ + +/* Divisor. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * Baud Rate Divisor. Desired_baud_rate = REF_CLK frequency / (baud * + * 16). + * Note: REF_CLK is always 125 MHz, the default + * divisor = 68, baud rate = 125M/(68*16) = 115200 baud. + */ + uint_reg_t divisor : 12; + /* Reserved. */ + uint_reg_t __reserved : 52; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved : 52; + uint_reg_t divisor : 12; +#endif + }; + + uint_reg_t word; +} UART_DIVISOR_t; + +/* FIFO Count. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* + * n: n active entries in the receive FIFO (max is 2**8). Each entry has + * 8 bits. + * 0: no active entry in the receive FIFO (that is empty). + */ + uint_reg_t rfifo_count : 9; + /* Reserved. */ + uint_reg_t __reserved_0 : 7; + /* + * n: n active entries in the transmit FIFO (max is 2**8). Each entry has + * 8 bits. + * 0: no active entry in the transmit FIFO (that is empty). + */ + uint_reg_t tfifo_count : 9; + /* Reserved. */ + uint_reg_t __reserved_1 : 7; + /* + * n: n active entries in the write FIFO (max is 2**2). Each entry has 8 + * bits. + * 0: no active entry in the write FIFO (that is empty). + */ + uint_reg_t wfifo_count : 3; + /* Reserved. */ + uint_reg_t __reserved_2 : 29; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_2 : 29; + uint_reg_t wfifo_count : 3; + uint_reg_t __reserved_1 : 7; + uint_reg_t tfifo_count : 9; + uint_reg_t __reserved_0 : 7; + uint_reg_t rfifo_count : 9; +#endif + }; + + uint_reg_t word; +} UART_FIFO_COUNT_t; + +/* FLAG. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* 1: receive FIFO is empty */ + uint_reg_t rfifo_empty : 1; + /* 1: write FIFO is empty. */ + uint_reg_t wfifo_empty : 1; + /* 1: transmit FIFO is empty. */ + uint_reg_t tfifo_empty : 1; + /* 1: receive FIFO is full. */ + uint_reg_t rfifo_full : 1; + /* 1: write FIFO is full. */ + uint_reg_t wfifo_full : 1; + /* 1: transmit FIFO is full. */ + uint_reg_t tfifo_full : 1; + /* Reserved. */ + uint_reg_t __reserved_1 : 57; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_1 : 57; + uint_reg_t tfifo_full : 1; + uint_reg_t wfifo_full : 1; + uint_reg_t rfifo_full : 1; + uint_reg_t tfifo_empty : 1; + uint_reg_t wfifo_empty : 1; + uint_reg_t rfifo_empty : 1; + uint_reg_t __reserved_0 : 1; +#endif + }; + + uint_reg_t word; +} UART_FLAG_t; + +/* + * Interrupt Vector Mask. + * Each bit in this register corresponds to a specific interrupt. When set, + * the associated interrupt will not be dispatched. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Read data FIFO read and no data available */ + uint_reg_t rdat_err : 1; + /* Write FIFO was written but it was full */ + uint_reg_t wdat_err : 1; + /* Stop bit not found when current data was received */ + uint_reg_t frame_err : 1; + /* Parity error was detected when current data was received */ + uint_reg_t parity_err : 1; + /* Data was received but the receive FIFO was full */ + uint_reg_t rfifo_overflow : 1; + /* + * An almost full event is reached when data is to be written to the + * receive FIFO, and the receive FIFO has more than or equal to + * BUFFER_THRESHOLD.RFIFO_AFULL bytes. + */ + uint_reg_t rfifo_afull : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* An entry in the transmit FIFO was popped */ + uint_reg_t tfifo_re : 1; + /* An entry has been pushed into the receive FIFO */ + uint_reg_t rfifo_we : 1; + /* An entry of the write FIFO has been popped */ + uint_reg_t wfifo_re : 1; + /* Rshim read receive FIFO in protocol mode */ + uint_reg_t rfifo_err : 1; + /* + * An almost empty event is reached when data is to be read from the + * transmit FIFO, and the transmit FIFO has less than or equal to + * BUFFER_THRESHOLD.TFIFO_AEMPTY bytes. + */ + uint_reg_t tfifo_aempty : 1; + /* Reserved. */ + uint_reg_t __reserved_1 : 52; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_1 : 52; + uint_reg_t tfifo_aempty : 1; + uint_reg_t rfifo_err : 1; + uint_reg_t wfifo_re : 1; + uint_reg_t rfifo_we : 1; + uint_reg_t tfifo_re : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t rfifo_afull : 1; + uint_reg_t rfifo_overflow : 1; + uint_reg_t parity_err : 1; + uint_reg_t frame_err : 1; + uint_reg_t wdat_err : 1; + uint_reg_t rdat_err : 1; +#endif + }; + + uint_reg_t word; +} UART_INTERRUPT_MASK_t; + +/* + * Interrupt vector, write-one-to-clear. + * Each bit in this register corresponds to a specific interrupt. Hardware + * sets the bit when the associated condition has occurred. Writing a 1 + * clears the status bit. + */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Read data FIFO read and no data available */ + uint_reg_t rdat_err : 1; + /* Write FIFO was written but it was full */ + uint_reg_t wdat_err : 1; + /* Stop bit not found when current data was received */ + uint_reg_t frame_err : 1; + /* Parity error was detected when current data was received */ + uint_reg_t parity_err : 1; + /* Data was received but the receive FIFO was full */ + uint_reg_t rfifo_overflow : 1; + /* + * Data was received and the receive FIFO is now almost full (more than + * BUFFER_THRESHOLD.RFIFO_AFULL bytes in it) + */ + uint_reg_t rfifo_afull : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* An entry in the transmit FIFO was popped */ + uint_reg_t tfifo_re : 1; + /* An entry has been pushed into the receive FIFO */ + uint_reg_t rfifo_we : 1; + /* An entry of the write FIFO has been popped */ + uint_reg_t wfifo_re : 1; + /* Rshim read receive FIFO in protocol mode */ + uint_reg_t rfifo_err : 1; + /* + * Data was read from the transmit FIFO and now it is almost empty (less + * than or equal to BUFFER_THRESHOLD.TFIFO_AEMPTY bytes in it). + */ + uint_reg_t tfifo_aempty : 1; + /* Reserved. */ + uint_reg_t __reserved_1 : 52; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_1 : 52; + uint_reg_t tfifo_aempty : 1; + uint_reg_t rfifo_err : 1; + uint_reg_t wfifo_re : 1; + uint_reg_t rfifo_we : 1; + uint_reg_t tfifo_re : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t rfifo_afull : 1; + uint_reg_t rfifo_overflow : 1; + uint_reg_t parity_err : 1; + uint_reg_t frame_err : 1; + uint_reg_t wdat_err : 1; + uint_reg_t rdat_err : 1; +#endif + }; + + uint_reg_t word; +} UART_INTERRUPT_STATUS_t; + +/* Type. */ + +__extension__ +typedef union +{ + struct + { +#ifndef __BIG_ENDIAN__ + /* Number of stop bits, rx and tx */ + uint_reg_t sbits : 1; + /* Reserved. */ + uint_reg_t __reserved_0 : 1; + /* Data word size, rx and tx */ + uint_reg_t dbits : 1; + /* Reserved. */ + uint_reg_t __reserved_1 : 1; + /* Parity selection, rx and tx */ + uint_reg_t ptype : 3; + /* Reserved. */ + uint_reg_t __reserved_2 : 57; +#else /* __BIG_ENDIAN__ */ + uint_reg_t __reserved_2 : 57; + uint_reg_t ptype : 3; + uint_reg_t __reserved_1 : 1; + uint_reg_t dbits : 1; + uint_reg_t __reserved_0 : 1; + uint_reg_t sbits : 1; +#endif + }; + + uint_reg_t word; +} UART_TYPE_t; +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_UART_H__) */ diff --git a/arch/tile/include/arch/uart_def.h b/arch/tile/include/arch/uart_def.h new file mode 100644 index 00000000000..42bcaf53537 --- /dev/null +++ b/arch/tile/include/arch/uart_def.h @@ -0,0 +1,120 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_UART_DEF_H__ +#define __ARCH_UART_DEF_H__ +#define UART_DIVISOR 0x0158 +#define UART_FIFO_COUNT 0x0110 +#define UART_FLAG 0x0108 +#define UART_INTERRUPT_MASK 0x0208 +#define UART_INTERRUPT_MASK__RDAT_ERR_SHIFT 0 +#define UART_INTERRUPT_MASK__RDAT_ERR_WIDTH 1 +#define UART_INTERRUPT_MASK__RDAT_ERR_RESET_VAL 1 +#define UART_INTERRUPT_MASK__RDAT_ERR_RMASK 0x1 +#define UART_INTERRUPT_MASK__RDAT_ERR_MASK 0x1 +#define UART_INTERRUPT_MASK__RDAT_ERR_FIELD 0,0 +#define UART_INTERRUPT_MASK__WDAT_ERR_SHIFT 1 +#define UART_INTERRUPT_MASK__WDAT_ERR_WIDTH 1 +#define UART_INTERRUPT_MASK__WDAT_ERR_RESET_VAL 1 +#define UART_INTERRUPT_MASK__WDAT_ERR_RMASK 0x1 +#define UART_INTERRUPT_MASK__WDAT_ERR_MASK 0x2 +#define UART_INTERRUPT_MASK__WDAT_ERR_FIELD 1,1 +#define UART_INTERRUPT_MASK__FRAME_ERR_SHIFT 2 +#define UART_INTERRUPT_MASK__FRAME_ERR_WIDTH 1 +#define UART_INTERRUPT_MASK__FRAME_ERR_RESET_VAL 1 +#define UART_INTERRUPT_MASK__FRAME_ERR_RMASK 0x1 +#define UART_INTERRUPT_MASK__FRAME_ERR_MASK 0x4 +#define UART_INTERRUPT_MASK__FRAME_ERR_FIELD 2,2 +#define UART_INTERRUPT_MASK__PARITY_ERR_SHIFT 3 +#define UART_INTERRUPT_MASK__PARITY_ERR_WIDTH 1 +#define UART_INTERRUPT_MASK__PARITY_ERR_RESET_VAL 1 +#define UART_INTERRUPT_MASK__PARITY_ERR_RMASK 0x1 +#define UART_INTERRUPT_MASK__PARITY_ERR_MASK 0x8 +#define UART_INTERRUPT_MASK__PARITY_ERR_FIELD 3,3 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_SHIFT 4 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_WIDTH 1 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_RESET_VAL 1 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_RMASK 0x1 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_MASK 0x10 +#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_FIELD 4,4 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_SHIFT 5 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_WIDTH 1 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_RESET_VAL 1 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_RMASK 0x1 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_MASK 0x20 +#define UART_INTERRUPT_MASK__RFIFO_AFULL_FIELD 5,5 +#define UART_INTERRUPT_MASK__TFIFO_RE_SHIFT 7 +#define UART_INTERRUPT_MASK__TFIFO_RE_WIDTH 1 +#define UART_INTERRUPT_MASK__TFIFO_RE_RESET_VAL 1 +#define UART_INTERRUPT_MASK__TFIFO_RE_RMASK 0x1 +#define UART_INTERRUPT_MASK__TFIFO_RE_MASK 0x80 +#define UART_INTERRUPT_MASK__TFIFO_RE_FIELD 7,7 +#define UART_INTERRUPT_MASK__RFIFO_WE_SHIFT 8 +#define UART_INTERRUPT_MASK__RFIFO_WE_WIDTH 1 +#define UART_INTERRUPT_MASK__RFIFO_WE_RESET_VAL 1 +#define UART_INTERRUPT_MASK__RFIFO_WE_RMASK 0x1 +#define UART_INTERRUPT_MASK__RFIFO_WE_MASK 0x100 +#define UART_INTERRUPT_MASK__RFIFO_WE_FIELD 8,8 +#define UART_INTERRUPT_MASK__WFIFO_RE_SHIFT 9 +#define UART_INTERRUPT_MASK__WFIFO_RE_WIDTH 1 +#define UART_INTERRUPT_MASK__WFIFO_RE_RESET_VAL 1 +#define UART_INTERRUPT_MASK__WFIFO_RE_RMASK 0x1 +#define UART_INTERRUPT_MASK__WFIFO_RE_MASK 0x200 +#define UART_INTERRUPT_MASK__WFIFO_RE_FIELD 9,9 +#define UART_INTERRUPT_MASK__RFIFO_ERR_SHIFT 10 +#define UART_INTERRUPT_MASK__RFIFO_ERR_WIDTH 1 +#define UART_INTERRUPT_MASK__RFIFO_ERR_RESET_VAL 1 +#define UART_INTERRUPT_MASK__RFIFO_ERR_RMASK 0x1 +#define UART_INTERRUPT_MASK__RFIFO_ERR_MASK 0x400 +#define UART_INTERRUPT_MASK__RFIFO_ERR_FIELD 10,10 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_SHIFT 11 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_WIDTH 1 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_RESET_VAL 1 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_RMASK 0x1 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_MASK 0x800 +#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_FIELD 11,11 +#define UART_INTERRUPT_STATUS 0x0200 +#define UART_RECEIVE_DATA 0x0148 +#define UART_TRANSMIT_DATA 0x0140 +#define UART_TYPE 0x0160 +#define UART_TYPE__SBITS_SHIFT 0 +#define UART_TYPE__SBITS_WIDTH 1 +#define UART_TYPE__SBITS_RESET_VAL 1 +#define UART_TYPE__SBITS_RMASK 0x1 +#define UART_TYPE__SBITS_MASK 0x1 +#define UART_TYPE__SBITS_FIELD 0,0 +#define UART_TYPE__SBITS_VAL_ONE_SBITS 0x0 +#define UART_TYPE__SBITS_VAL_TWO_SBITS 0x1 +#define UART_TYPE__DBITS_SHIFT 2 +#define UART_TYPE__DBITS_WIDTH 1 +#define UART_TYPE__DBITS_RESET_VAL 0 +#define UART_TYPE__DBITS_RMASK 0x1 +#define UART_TYPE__DBITS_MASK 0x4 +#define UART_TYPE__DBITS_FIELD 2,2 +#define UART_TYPE__DBITS_VAL_EIGHT_DBITS 0x0 +#define UART_TYPE__DBITS_VAL_SEVEN_DBITS 0x1 +#define UART_TYPE__PTYPE_SHIFT 4 +#define UART_TYPE__PTYPE_WIDTH 3 +#define UART_TYPE__PTYPE_RESET_VAL 3 +#define UART_TYPE__PTYPE_RMASK 0x7 +#define UART_TYPE__PTYPE_MASK 0x70 +#define UART_TYPE__PTYPE_FIELD 4,6 +#define UART_TYPE__PTYPE_VAL_NONE 0x0 +#define UART_TYPE__PTYPE_VAL_MARK 0x1 +#define UART_TYPE__PTYPE_VAL_SPACE 0x2 +#define UART_TYPE__PTYPE_VAL_EVEN 0x3 +#define UART_TYPE__PTYPE_VAL_ODD 0x4 +#endif /* !defined(__ARCH_UART_DEF_H__) */ diff --git a/arch/tile/include/arch/usb_host.h b/arch/tile/include/arch/usb_host.h new file mode 100644 index 00000000000..d09f3268396 --- /dev/null +++ b/arch/tile/include/arch/usb_host.h @@ -0,0 +1,26 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_USB_HOST_H__ +#define __ARCH_USB_HOST_H__ + +#include <arch/abi.h> +#include <arch/usb_host_def.h> + +#ifndef __ASSEMBLER__ +#endif /* !defined(__ASSEMBLER__) */ + +#endif /* !defined(__ARCH_USB_HOST_H__) */ diff --git a/arch/tile/include/arch/usb_host_def.h b/arch/tile/include/arch/usb_host_def.h new file mode 100644 index 00000000000..aeed7753e8e --- /dev/null +++ b/arch/tile/include/arch/usb_host_def.h @@ -0,0 +1,19 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* Machine-generated file; do not edit. */ + +#ifndef __ARCH_USB_HOST_DEF_H__ +#define __ARCH_USB_HOST_DEF_H__ +#endif /* !defined(__ARCH_USB_HOST_DEF_H__) */ diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 849ab2fa1f5..0aa5675e702 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -1,4 +1,43 @@ -include include/asm-generic/Kbuild.asm -header-y += ucontext.h -header-y += hardwall.h +header-y += ../arch/ + +generic-y += bug.h +generic-y += bugs.h +generic-y += clkdev.h +generic-y += cputime.h +generic-y += div64.h +generic-y += emergency-restart.h +generic-y += errno.h +generic-y += exec.h +generic-y += fb.h +generic-y += fcntl.h +generic-y += hash.h +generic-y += hw_irq.h +generic-y += ioctl.h +generic-y += ioctls.h +generic-y += ipcbuf.h +generic-y += irq_regs.h +generic-y += local.h +generic-y += local64.h +generic-y += mcs_spinlock.h +generic-y += msgbuf.h +generic-y += mutex.h +generic-y += param.h +generic-y += parport.h +generic-y += poll.h +generic-y += posix_types.h +generic-y += preempt.h +generic-y += resource.h +generic-y += scatterlist.h +generic-y += sembuf.h +generic-y += serial.h +generic-y += shmbuf.h +generic-y += shmparam.h +generic-y += socket.h +generic-y += sockios.h +generic-y += statfs.h +generic-y += termbits.h +generic-y += termios.h +generic-y += trace_clock.h +generic-y += types.h +generic-y += xor.h diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index 75a16028a95..70979846076 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -17,10 +17,12 @@ #ifndef _ASM_TILE_ATOMIC_H #define _ASM_TILE_ATOMIC_H +#include <asm/cmpxchg.h> + #ifndef __ASSEMBLY__ #include <linux/compiler.h> -#include <asm/system.h> +#include <linux/types.h> #define ATOMIC_INIT(i) { (i) } @@ -112,36 +114,40 @@ static inline int atomic_read(const atomic_t *v) #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) /** - * atomic_add_negative - add and test if negative + * atomic_xchg - atomically exchange contents of memory with a new value * @v: pointer of type atomic_t - * @i: integer value to add + * @i: integer value to store in memory * - * Atomically adds @i to @v and returns true if the result is - * negative, or false when result is greater than or equal to zero. + * Atomically sets @v to @i and returns old @v */ -#define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) +static inline int atomic_xchg(atomic_t *v, int n) +{ + return xchg(&v->counter, n); +} /** - * atomic_inc_not_zero - increment unless the number is zero + * atomic_cmpxchg - atomically exchange contents of memory if it matches * @v: pointer of type atomic_t + * @o: old value that memory should have + * @n: new value to write to memory if it matches * - * Atomically increments @v by 1, so long as @v is non-zero. - * Returns non-zero if @v was non-zero, and zero otherwise. + * Atomically checks if @v holds @o and replaces it with @n if so. + * Returns the old value at @v. */ -#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) - +static inline int atomic_cmpxchg(atomic_t *v, int o, int n) +{ + return cmpxchg(&v->counter, o, n); +} -/* - * We define xchg() and cmpxchg() in the included headers. - * Note that we do not define __HAVE_ARCH_CMPXCHG, since that would imply - * that cmpxchg() is an efficient operation, which is not particularly true. +/** + * atomic_add_negative - add and test if negative + * @v: pointer of type atomic_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns true if the result is + * negative, or false when result is greater than or equal to zero. */ - -/* Nonexistent functions intended to cause link errors. */ -extern unsigned long __xchg_called_with_bad_pointer(void); -extern unsigned long __cmpxchg_called_with_bad_pointer(void); - -#define tas(ptr) (xchg((ptr), 1)) +#define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) #endif /* __ASSEMBLY__ */ @@ -151,9 +157,52 @@ extern unsigned long __cmpxchg_called_with_bad_pointer(void); #include <asm/atomic_64.h> #endif -/* Provide the appropriate atomic_long_t definitions. */ #ifndef __ASSEMBLY__ -#include <asm-generic/atomic-long.h> -#endif + +/** + * atomic64_xchg - atomically exchange contents of memory with a new value + * @v: pointer of type atomic64_t + * @i: integer value to store in memory + * + * Atomically sets @v to @i and returns old @v + */ +static inline long long atomic64_xchg(atomic64_t *v, long long n) +{ + return xchg64(&v->counter, n); +} + +/** + * atomic64_cmpxchg - atomically exchange contents of memory if it matches + * @v: pointer of type atomic64_t + * @o: old value that memory should have + * @n: new value to write to memory if it matches + * + * Atomically checks if @v holds @o and replaces it with @n if so. + * Returns the old value at @v. + */ +static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, + long long n) +{ + return cmpxchg64(&v->counter, o, n); +} + +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long c, old, dec; + + c = atomic64_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic64_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_TILE_ATOMIC_H */ diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index ed359aee883..1b109fad9ff 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -11,50 +11,17 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. * - * Do not include directly; use <asm/atomic.h>. + * Do not include directly; use <linux/atomic.h>. */ #ifndef _ASM_TILE_ATOMIC_32_H #define _ASM_TILE_ATOMIC_32_H +#include <asm/barrier.h> #include <arch/chip.h> #ifndef __ASSEMBLY__ -/* Tile-specific routines to support <asm/atomic.h>. */ -int _atomic_xchg(atomic_t *v, int n); -int _atomic_xchg_add(atomic_t *v, int i); -int _atomic_xchg_add_unless(atomic_t *v, int a, int u); -int _atomic_cmpxchg(atomic_t *v, int o, int n); - -/** - * atomic_xchg - atomically exchange contents of memory with a new value - * @v: pointer of type atomic_t - * @i: integer value to store in memory - * - * Atomically sets @v to @i and returns old @v - */ -static inline int atomic_xchg(atomic_t *v, int n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic_xchg(v, n); -} - -/** - * atomic_cmpxchg - atomically exchange contents of memory if it matches - * @v: pointer of type atomic_t - * @o: old value that memory should have - * @n: new value to write to memory if it matches - * - * Atomically checks if @v holds @o and replaces it with @n if so. - * Returns the old value at @v. - */ -static inline int atomic_cmpxchg(atomic_t *v, int o, int n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic_cmpxchg(v, o, n); -} - /** * atomic_add - add integer to atomic variable * @i: integer value to add @@ -64,7 +31,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int o, int n) */ static inline void atomic_add(int i, atomic_t *v) { - _atomic_xchg_add(v, i); + _atomic_xchg_add(&v->counter, i); } /** @@ -77,22 +44,22 @@ static inline void atomic_add(int i, atomic_t *v) static inline int atomic_add_return(int i, atomic_t *v) { smp_mb(); /* barrier for proper semantics */ - return _atomic_xchg_add(v, i) + i; + return _atomic_xchg_add(&v->counter, i) + i; } /** - * atomic_add_unless - add unless the number is already a given value + * __atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t * @a: the amount to add to v... * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as @v was not already @u. - * Returns non-zero if @v was not @u, and zero otherwise. + * Returns the old value of @v. */ -static inline int atomic_add_unless(atomic_t *v, int a, int u) +static inline int __atomic_add_unless(atomic_t *v, int a, int u) { smp_mb(); /* barrier for proper semantics */ - return _atomic_xchg_add_unless(v, a, u) != u; + return _atomic_xchg_add_unless(&v->counter, a, u); } /** @@ -107,74 +74,31 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) */ static inline void atomic_set(atomic_t *v, int n) { - _atomic_xchg(v, n); + _atomic_xchg(&v->counter, n); } -#define xchg(ptr, x) ((typeof(*(ptr))) \ - ((sizeof(*(ptr)) == sizeof(atomic_t)) ? \ - atomic_xchg((atomic_t *)(ptr), (long)(x)) : \ - __xchg_called_with_bad_pointer())) - -#define cmpxchg(ptr, o, n) ((typeof(*(ptr))) \ - ((sizeof(*(ptr)) == sizeof(atomic_t)) ? \ - atomic_cmpxchg((atomic_t *)(ptr), (long)(o), (long)(n)) : \ - __cmpxchg_called_with_bad_pointer())) - /* A 64bit atomic type */ typedef struct { - u64 __aligned(8) counter; + long long counter; } atomic64_t; #define ATOMIC64_INIT(val) { (val) } -u64 _atomic64_xchg(atomic64_t *v, u64 n); -u64 _atomic64_xchg_add(atomic64_t *v, u64 i); -u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u); -u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n); - /** * atomic64_read - read atomic variable * @v: pointer of type atomic64_t * * Atomically reads the value of @v. */ -static inline u64 atomic64_read(const atomic64_t *v) +static inline long long atomic64_read(const atomic64_t *v) { /* * Requires an atomic op to read both 32-bit parts consistently. * Casting away const is safe since the atomic support routines * do not write to memory if the value has not been modified. */ - return _atomic64_xchg_add((atomic64_t *)v, 0); -} - -/** - * atomic64_xchg - atomically exchange contents of memory with a new value - * @v: pointer of type atomic64_t - * @i: integer value to store in memory - * - * Atomically sets @v to @i and returns old @v - */ -static inline u64 atomic64_xchg(atomic64_t *v, u64 n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic64_xchg(v, n); -} - -/** - * atomic64_cmpxchg - atomically exchange contents of memory if it matches - * @v: pointer of type atomic64_t - * @o: old value that memory should have - * @n: new value to write to memory if it matches - * - * Atomically checks if @v holds @o and replaces it with @n if so. - * Returns the old value at @v. - */ -static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) -{ - smp_mb(); /* barrier for proper semantics */ - return _atomic64_cmpxchg(v, o, n); + return _atomic64_xchg_add((long long *)&v->counter, 0); } /** @@ -184,9 +108,9 @@ static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) * * Atomically adds @i to @v. */ -static inline void atomic64_add(u64 i, atomic64_t *v) +static inline void atomic64_add(long long i, atomic64_t *v) { - _atomic64_xchg_add(v, i); + _atomic64_xchg_add(&v->counter, i); } /** @@ -196,10 +120,10 @@ static inline void atomic64_add(u64 i, atomic64_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline u64 atomic64_add_return(u64 i, atomic64_t *v) +static inline long long atomic64_add_return(long long i, atomic64_t *v) { smp_mb(); /* barrier for proper semantics */ - return _atomic64_xchg_add(v, i) + i; + return _atomic64_xchg_add(&v->counter, i) + i; } /** @@ -211,10 +135,11 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) * Atomically adds @a to @v, so long as @v was not already @u. * Returns non-zero if @v was not @u, and zero otherwise. */ -static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) +static inline long long atomic64_add_unless(atomic64_t *v, long long a, + long long u) { smp_mb(); /* barrier for proper semantics */ - return _atomic64_xchg_add_unless(v, a, u) != u; + return _atomic64_xchg_add_unless(&v->counter, a, u) != u; } /** @@ -227,9 +152,9 @@ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) * atomic64_set() can't be just a raw store, since it would be lost if it * fell between the load and store of one of the other atomic ops. */ -static inline void atomic64_set(atomic64_t *v, u64 n) +static inline void atomic64_set(atomic64_t *v, long long n) { - _atomic64_xchg(v, n); + _atomic64_xchg(&v->counter, n); } #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) @@ -244,16 +169,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) -/* - * We need to barrier before modifying the word, since the _atomic_xxx() - * routines just tns the lock and then read/modify/write of the word. - * But after the word is updated, the routine issues an "mf" before returning, - * and since it's a function call, we don't even need a compiler barrier. - */ -#define smp_mb__before_atomic_dec() smp_mb() -#define smp_mb__before_atomic_inc() smp_mb() -#define smp_mb__after_atomic_dec() do { } while (0) -#define smp_mb__after_atomic_inc() do { } while (0) #endif /* !__ASSEMBLY__ */ @@ -261,21 +176,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) * Internal definitions only beyond this point. */ -#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \ - (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP)) - -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - -/* Number of entries in atomic_lock_ptr[]. */ -#define ATOMIC_HASH_L1_SHIFT 6 -#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT) - -/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */ -#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2) -#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT) - -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* * Number of atomic locks in atomic_locks[]. Must be a power of two. * There is no reason for more than PAGE_SIZE / 8 entries, since that @@ -290,8 +190,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n) extern int atomic_locks[]; #endif -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* * All the code that may fault while holding an atomic lock must * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code @@ -312,7 +210,14 @@ void __init_atomic_per_cpu(void); void __atomic_fault_unlock(int *lock_ptr); #endif +/* Return a pointer to the lock for the given address. */ +int *__atomic_hashed_lock(volatile void *v); + /* Private helper routines in lib/atomic_asm_32.S */ +struct __get_user { + unsigned long val; + int err; +}; extern struct __get_user __atomic_cmpxchg(volatile int *p, int *lock, int o, int n); extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n); @@ -322,11 +227,16 @@ extern struct __get_user __atomic_xchg_add_unless(volatile int *p, extern struct __get_user __atomic_or(volatile int *p, int *lock, int n); extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n); extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n); -extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n); -extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n); -extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n); -extern u64 __atomic64_xchg_add_unless(volatile u64 *p, - int *lock, u64 o, u64 n); +extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, + long long o, long long n); +extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); +extern long long __atomic64_xchg_add(volatile long long *p, int *lock, + long long n); +extern long long __atomic64_xchg_add_unless(volatile long long *p, + int *lock, long long o, long long n); + +/* Return failure from the atomic wrappers. */ +struct __get_user __atomic_bad_address(int __user *addr); #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h new file mode 100644 index 00000000000..7b11c5fadd4 --- /dev/null +++ b/arch/tile/include/asm/atomic_64.h @@ -0,0 +1,113 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Do not include directly; use <linux/atomic.h>. + */ + +#ifndef _ASM_TILE_ATOMIC_64_H +#define _ASM_TILE_ATOMIC_64_H + +#ifndef __ASSEMBLY__ + +#include <asm/barrier.h> +#include <arch/spr_def.h> + +/* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */ + +#define atomic_set(v, i) ((v)->counter = (i)) + +/* + * The smp_mb() operations throughout are to support the fact that + * Linux requires memory barriers before and after the operation, + * on any routine which updates memory and returns a value. + */ + +static inline void atomic_add(int i, atomic_t *v) +{ + __insn_fetchadd4((void *)&v->counter, i); +} + +static inline int atomic_add_return(int i, atomic_t *v) +{ + int val; + smp_mb(); /* barrier for proper semantics */ + val = __insn_fetchadd4((void *)&v->counter, i) + i; + barrier(); /* the "+ i" above will wait on memory */ + return val; +} + +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = cmpxchg(&v->counter, guess, guess + a); + } while (guess != oldval); + return oldval; +} + +/* Now the true 64-bit operations. */ + +#define ATOMIC64_INIT(i) { (i) } + +#define atomic64_read(v) ((v)->counter) +#define atomic64_set(v, i) ((v)->counter = (i)) + +static inline void atomic64_add(long i, atomic64_t *v) +{ + __insn_fetchadd((void *)&v->counter, i); +} + +static inline long atomic64_add_return(long i, atomic64_t *v) +{ + int val; + smp_mb(); /* barrier for proper semantics */ + val = __insn_fetchadd((void *)&v->counter, i) + i; + barrier(); /* the "+ i" above will wait on memory */ + return val; +} + +static inline long atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = cmpxchg(&v->counter, guess, guess + a); + } while (guess != oldval); + return oldval != u; +} + +#define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_sub(i, v) atomic64_add(-(i), (v)) +#define atomic64_inc_return(v) atomic64_add_return(1, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_inc(v) atomic64_add(1, (v)) +#define atomic64_dec(v) atomic64_sub(1, (v)) + +#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) +#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) +#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0) +#define atomic64_add_negative(i, v) (atomic64_add_return((i), (v)) < 0) + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + +/* Define this to indicate that cmpxchg is an efficient operation. */ +#define __HAVE_ARCH_CMPXCHG + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_TILE_ATOMIC_64_H */ diff --git a/arch/tile/include/asm/backtrace.h b/arch/tile/include/asm/backtrace.h index f18887d8239..bd5399a69ed 100644 --- a/arch/tile/include/asm/backtrace.h +++ b/arch/tile/include/asm/backtrace.h @@ -12,80 +12,41 @@ * more details. */ -#ifndef _TILE_BACKTRACE_H -#define _TILE_BACKTRACE_H - - +#ifndef _ASM_TILE_BACKTRACE_H +#define _ASM_TILE_BACKTRACE_H #include <linux/types.h> -#include <arch/chip.h> - -#if defined(__tile__) -typedef unsigned long VirtualAddress; -#elif CHIP_VA_WIDTH() > 32 -typedef unsigned long long VirtualAddress; -#else -typedef unsigned int VirtualAddress; -#endif - - -/** Reads 'size' bytes from 'address' and writes the data to 'result'. +/* Reads 'size' bytes from 'address' and writes the data to 'result'. * Returns true if successful, else false (e.g. memory not readable). */ typedef bool (*BacktraceMemoryReader)(void *result, - VirtualAddress address, + unsigned long address, unsigned int size, void *extra); typedef struct { - /** Current PC. */ - VirtualAddress pc; + /* Current PC. */ + unsigned long pc; - /** Current stack pointer value. */ - VirtualAddress sp; + /* Current stack pointer value. */ + unsigned long sp; - /** Current frame pointer value (i.e. caller's stack pointer) */ - VirtualAddress fp; + /* Current frame pointer value (i.e. caller's stack pointer) */ + unsigned long fp; - /** Internal use only: caller's PC for first frame. */ - VirtualAddress initial_frame_caller_pc; + /* Internal use only: caller's PC for first frame. */ + unsigned long initial_frame_caller_pc; - /** Internal use only: callback to read memory. */ + /* Internal use only: callback to read memory. */ BacktraceMemoryReader read_memory_func; - /** Internal use only: arbitrary argument to read_memory_func. */ + /* Internal use only: arbitrary argument to read_memory_func. */ void *read_memory_func_extra; } BacktraceIterator; -/** Initializes a backtracer to start from the given location. - * - * If the frame pointer cannot be determined it is set to -1. - * - * @param state The state to be filled in. - * @param read_memory_func A callback that reads memory. If NULL, a default - * value is provided. - * @param read_memory_func_extra An arbitrary argument to read_memory_func. - * @param pc The current PC. - * @param lr The current value of the 'lr' register. - * @param sp The current value of the 'sp' register. - * @param r52 The current value of the 'r52' register. - */ -extern void backtrace_init(BacktraceIterator *state, - BacktraceMemoryReader read_memory_func, - void *read_memory_func_extra, - VirtualAddress pc, VirtualAddress lr, - VirtualAddress sp, VirtualAddress r52); - - -/** Advances the backtracing state to the calling frame, returning - * true iff successful. - */ -extern bool backtrace_next(BacktraceIterator *state); - - typedef enum { /* We have no idea what the caller's pc is. */ @@ -138,7 +99,7 @@ enum { }; -/** Internal constants used to define 'info' operands. */ +/* Internal constants used to define 'info' operands. */ enum { /* 0 and 1 are reserved, as are all negative numbers. */ @@ -147,13 +108,10 @@ enum { CALLER_SP_IN_R52_BASE = 4, CALLER_SP_OFFSET_BASE = 8, - - /* Marks the entry point of certain functions. */ - ENTRY_POINT_INFO_OP = 16 }; -/** Current backtracer state describing where it thinks the caller is. */ +/* Current backtracer state describing where it thinks the caller is. */ typedef struct { /* * Public fields @@ -192,7 +150,13 @@ typedef struct { } CallerLocation; +extern void backtrace_init(BacktraceIterator *state, + BacktraceMemoryReader read_memory_func, + void *read_memory_func_extra, + unsigned long pc, unsigned long lr, + unsigned long sp, unsigned long r52); +extern bool backtrace_next(BacktraceIterator *state); -#endif /* _TILE_BACKTRACE_H */ +#endif /* _ASM_TILE_BACKTRACE_H */ diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h new file mode 100644 index 00000000000..96a42ae79f4 --- /dev/null +++ b/arch/tile/include/asm/barrier.h @@ -0,0 +1,92 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_BARRIER_H +#define _ASM_TILE_BARRIER_H + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> +#include <arch/chip.h> +#include <arch/spr_def.h> +#include <asm/timex.h> + +#define __sync() __insn_mf() + +#include <hv/syscall_public.h> +/* + * Issue an uncacheable load to each memory controller, then + * wait until those loads have completed. + */ +static inline void __mb_incoherent(void) +{ + long clobber_r10; + asm volatile("swint2" + : "=R10" (clobber_r10) + : "R10" (HV_SYS_fence_incoherent) + : "r0", "r1", "r2", "r3", "r4", + "r5", "r6", "r7", "r8", "r9", + "r11", "r12", "r13", "r14", + "r15", "r16", "r17", "r18", "r19", + "r20", "r21", "r22", "r23", "r24", + "r25", "r26", "r27", "r28", "r29"); +} + +/* Fence to guarantee visibility of stores to incoherent memory. */ +static inline void +mb_incoherent(void) +{ + __insn_mf(); + + { +#if CHIP_HAS_TILE_WRITE_PENDING() + const unsigned long WRITE_TIMEOUT_CYCLES = 400; + unsigned long start = get_cycles_low(); + do { + if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0) + return; + } while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES); +#endif /* CHIP_HAS_TILE_WRITE_PENDING() */ + (void) __mb_incoherent(); + } +} + +#define fast_wmb() __sync() +#define fast_rmb() __sync() +#define fast_mb() __sync() +#define fast_iob() mb_incoherent() + +#define wmb() fast_wmb() +#define rmb() fast_rmb() +#define mb() fast_mb() +#define iob() fast_iob() + +#ifndef __tilegx__ /* 32 bit */ +/* + * We need to barrier before modifying the word, since the _atomic_xxx() + * routines just tns the lock and then read/modify/write of the word. + * But after the word is updated, the routine issues an "mf" before returning, + * and since it's a function call, we don't even need a compiler barrier. + */ +#define smp_mb__before_atomic() smp_mb() +#define smp_mb__after_atomic() do { } while (0) +#else /* 64 bit */ +#define smp_mb__before_atomic() smp_mb() +#define smp_mb__after_atomic() smp_mb() +#endif + +#include <asm-generic/barrier.h> + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_TILE_BARRIER_H */ diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h index 132e6bbd07e..20caa346ac0 100644 --- a/arch/tile/include/asm/bitops.h +++ b/arch/tile/include/asm/bitops.h @@ -17,6 +17,7 @@ #define _ASM_TILE_BITOPS_H #include <linux/types.h> +#include <asm/barrier.h> #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly @@ -29,17 +30,6 @@ #endif /** - * __ffs - find first set bit in word - * @word: The word to search - * - * Undefined if no set bit exists, so code should check against 0 first. - */ -static inline unsigned long __ffs(unsigned long word) -{ - return __builtin_ctzl(word); -} - -/** * ffz - find first zero bit in word * @word: The word to search * @@ -50,31 +40,9 @@ static inline unsigned long ffz(unsigned long word) return __builtin_ctzl(~word); } -/** - * __fls - find last set bit in word - * @word: The word to search - * - * Undefined if no set bit exists, so code should check against 0 first. - */ -static inline unsigned long __fls(unsigned long word) -{ - return (sizeof(word) * 8) - 1 - __builtin_clzl(word); -} - -/** - * ffs - find first set bit in word - * @x: the word to search - * - * This is defined the same way as the libc and compiler builtin ffs - * routines, therefore differs in spirit from the other bitops. - * - * ffs(value) returns 0 if value is 0 or the position of the first - * set bit if value is nonzero. The first (least significant) bit - * is at position 1. - */ -static inline int ffs(int x) +static inline int fls64(__u64 w) { - return __builtin_ffs(x); + return (sizeof(__u64) * 8) - __builtin_clzll(w); } /** @@ -90,12 +58,7 @@ static inline int ffs(int x) */ static inline int fls(int x) { - return (sizeof(int) * 8) - __builtin_clz(x); -} - -static inline int fls64(__u64 w) -{ - return (sizeof(__u64) * 8) - __builtin_clzll(w); + return fls64((unsigned int) x); } static inline unsigned int __arch_hweight32(unsigned int w) @@ -118,10 +81,14 @@ static inline unsigned long __arch_hweight64(__u64 w) return __builtin_popcountll(w); } +#include <asm-generic/bitops/builtin-__ffs.h> +#include <asm-generic/bitops/builtin-__fls.h> +#include <asm-generic/bitops/builtin-ffs.h> #include <asm-generic/bitops/const_hweight.h> #include <asm-generic/bitops/lock.h> #include <asm-generic/bitops/find.h> #include <asm-generic/bitops/sched.h> +#include <asm-generic/bitops/non-atomic.h> #include <asm-generic/bitops/le.h> #endif /* _ASM_TILE_BITOPS_H */ diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h index 2638be51a16..bbf7b666f21 100644 --- a/arch/tile/include/asm/bitops_32.h +++ b/arch/tile/include/asm/bitops_32.h @@ -16,8 +16,7 @@ #define _ASM_TILE_BITOPS_32_H #include <linux/compiler.h> -#include <asm/atomic.h> -#include <asm/system.h> +#include <asm/barrier.h> /* Tile-specific routines to support <asm/bitops.h>. */ unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask); @@ -50,8 +49,8 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr) * restricted to acting on a single-word quantity. * * clear_bit() may not contain a memory barrier, so if it is used for - * locking purposes, you should call smp_mb__before_clear_bit() and/or - * smp_mb__after_clear_bit() to ensure changes are visible on other cpus. + * locking purposes, you should call smp_mb__before_atomic() and/or + * smp_mb__after_atomic() to ensure changes are visible on other cpus. */ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) { @@ -122,11 +121,6 @@ static inline int test_and_change_bit(unsigned nr, return (_atomic_xor(addr, mask) & mask) != 0; } -/* See discussion at smp_mb__before_atomic_dec() in <asm/atomic_32.h>. */ -#define smp_mb__before_clear_bit() smp_mb() -#define smp_mb__after_clear_bit() do {} while (0) - -#include <asm-generic/bitops/non-atomic.h> #include <asm-generic/bitops/ext2-atomic.h> #endif /* _ASM_TILE_BITOPS_32_H */ diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h new file mode 100644 index 00000000000..bb1a29221fc --- /dev/null +++ b/arch/tile/include/asm/bitops_64.h @@ -0,0 +1,95 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_BITOPS_64_H +#define _ASM_TILE_BITOPS_64_H + +#include <linux/compiler.h> +#include <asm/cmpxchg.h> + +/* See <asm/bitops.h> for API comments. */ + +static inline void set_bit(unsigned nr, volatile unsigned long *addr) +{ + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + __insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask); +} + +static inline void clear_bit(unsigned nr, volatile unsigned long *addr) +{ + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + __insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask); +} + +static inline void change_bit(unsigned nr, volatile unsigned long *addr) +{ + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + unsigned long guess, oldval; + addr += nr / BITS_PER_LONG; + oldval = *addr; + do { + guess = oldval; + oldval = cmpxchg(addr, guess, guess ^ mask); + } while (guess != oldval); +} + + +/* + * The test_and_xxx_bit() routines require a memory fence before we + * start the operation, and after the operation completes. We use + * smp_mb() before, and rely on the "!= 0" comparison, plus a compiler + * barrier(), to block until the atomic op is complete. + */ + +static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr) +{ + int val; + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + smp_mb(); /* barrier for proper semantics */ + val = (__insn_fetchor((void *)(addr + nr / BITS_PER_LONG), mask) + & mask) != 0; + barrier(); + return val; +} + + +static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr) +{ + int val; + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + smp_mb(); /* barrier for proper semantics */ + val = (__insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask) + & mask) != 0; + barrier(); + return val; +} + + +static inline int test_and_change_bit(unsigned nr, + volatile unsigned long *addr) +{ + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + unsigned long guess, oldval; + addr += nr / BITS_PER_LONG; + oldval = *addr; + do { + guess = oldval; + oldval = cmpxchg(addr, guess, guess ^ mask); + } while (guess != oldval); + return (oldval & mask) != 0; +} + +#include <asm-generic/bitops/ext2-atomic-setbit.h> + +#endif /* _ASM_TILE_BITOPS_64_H */ diff --git a/arch/tile/include/asm/bug.h b/arch/tile/include/asm/bug.h deleted file mode 100644 index b12fd89e42e..00000000000 --- a/arch/tile/include/asm/bug.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bug.h> diff --git a/arch/tile/include/asm/bugs.h b/arch/tile/include/asm/bugs.h deleted file mode 100644 index 61791e1ad9f..00000000000 --- a/arch/tile/include/asm/bugs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bugs.h> diff --git a/arch/tile/include/asm/byteorder.h b/arch/tile/include/asm/byteorder.h deleted file mode 100644 index 9558416d578..00000000000 --- a/arch/tile/include/asm/byteorder.h +++ /dev/null @@ -1 +0,0 @@ -#include <linux/byteorder/little_endian.h> diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h index 392e5333dd8..6160761d5f6 100644 --- a/arch/tile/include/asm/cache.h +++ b/arch/tile/include/asm/cache.h @@ -27,11 +27,17 @@ #define L2_CACHE_ALIGN(x) (((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES) /* - * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN. + * TILEPro I/O is not always coherent (networking typically uses coherent + * I/O, but PCI traffic does not) and setting ARCH_DMA_MINALIGN to the + * L2 cacheline size helps ensure that kernel heap allocations are aligned. + * TILE-Gx I/O is always coherent when used on hash-for-home pages. + * + * However, it's possible at runtime to request not to use hash-for-home + * for the kernel heap, in which case the kernel will use flush-and-inval + * to manage coherence. As a result, we use L2_CACHE_BYTES for the + * DMA minimum alignment to avoid false sharing in the kernel heap. */ -#ifndef __tilegx__ #define ARCH_DMA_MINALIGN L2_CACHE_BYTES -#endif /* use the cache line size for the L2, which is where it counts */ #define SMP_CACHE_BYTES_SHIFT L2_CACHE_SHIFT @@ -43,9 +49,16 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) /* - * Attribute for data that is kept read/write coherent until the end of - * initialization, then bumped to read/only incoherent for performance. + * Originally we used small TLB pages for kernel data and grouped some + * things together as "write once", enforcing the property at the end + * of initialization by making those pages read-only and non-coherent. + * This allowed better cache utilization since cache inclusion did not + * need to be maintained. However, to do this requires an extra TLB + * entry, which on balance is more of a performance hit than the + * non-coherence is a performance gain, so we now just make "read + * mostly" and "write once" be synonyms. We keep the attribute + * separate in case we change our minds at a future date. */ -#define __write_once __attribute__((__section__(".w1data"))) +#define __write_once __read_mostly #endif /* _ASM_TILE_CACHE_H */ diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h index 12fb0fb330e..92ee4c8a4f7 100644 --- a/arch/tile/include/asm/cacheflush.h +++ b/arch/tile/include/asm/cacheflush.h @@ -20,7 +20,6 @@ /* Keep includes the same across arches. */ #include <linux/mm.h> #include <linux/cache.h> -#include <asm/system.h> #include <arch/icache.h> /* Caches are physically-indexed and so don't need special treatment */ @@ -76,23 +75,6 @@ static inline void copy_to_user_page(struct vm_area_struct *vma, #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ memcpy((dst), (src), (len)) -/* - * Invalidate a VA range; pads to L2 cacheline boundaries. - * - * Note that on TILE64, __inv_buffer() actually flushes modified - * cache lines in addition to invalidating them, i.e., it's the - * same as __finv_buffer(). - */ -static inline void __inv_buffer(void *buffer, size_t size) -{ - char *next = (char *)((long)buffer & -L2_CACHE_BYTES); - char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size); - while (next < finish) { - __insn_inv(next); - next += CHIP_INV_STRIDE(); - } -} - /* Flush a VA range; pads to L2 cacheline boundaries. */ static inline void __flush_buffer(void *buffer, size_t size) { @@ -116,27 +98,46 @@ static inline void __finv_buffer(void *buffer, size_t size) } -/* Invalidate a VA range, then memory fence. */ -static inline void inv_buffer(void *buffer, size_t size) +/* + * Flush a locally-homecached VA range and wait for the evicted + * cachelines to hit memory. + */ +static inline void flush_buffer_local(void *buffer, size_t size) { - __inv_buffer(buffer, size); + __flush_buffer(buffer, size); mb_incoherent(); } -/* Flush a VA range, then memory fence. */ -static inline void flush_buffer(void *buffer, size_t size) +/* + * Flush and invalidate a locally-homecached VA range and wait for the + * evicted cachelines to hit memory. + */ +static inline void finv_buffer_local(void *buffer, size_t size) { - __flush_buffer(buffer, size); + __finv_buffer(buffer, size); mb_incoherent(); } -/* Flush & invalidate a VA range, then memory fence. */ -static inline void finv_buffer(void *buffer, size_t size) +#ifdef __tilepro__ +/* Invalidate a VA range; pads to L2 cacheline boundaries. */ +static inline void __inv_buffer(void *buffer, size_t size) { - __finv_buffer(buffer, size); - mb_incoherent(); + char *next = (char *)((long)buffer & -L2_CACHE_BYTES); + char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size); + while (next < finish) { + __insn_inv(next); + next += CHIP_INV_STRIDE(); + } } +/* Invalidate a VA range and wait for it to be complete. */ +static inline void inv_buffer(void *buffer, size_t size) +{ + __inv_buffer(buffer, size); + mb(); +} +#endif + /* * Flush and invalidate a VA range that is homed remotely, waiting * until the memory controller holds the flushed values. If "hfh" is @@ -146,4 +147,14 @@ static inline void finv_buffer(void *buffer, size_t size) */ void finv_buffer_remote(void *buffer, size_t size, int hfh); +/* + * On SMP systems, when the scheduler does migration-cost autodetection, + * it needs a way to flush as much of the CPU's caches as possible: + * + * TODO: fill this in! + */ +static inline void sched_cacheflush(void) +{ +} + #endif /* _ASM_TILE_CACHEFLUSH_H */ diff --git a/arch/tile/include/asm/checksum.h b/arch/tile/include/asm/checksum.h index a120766c726..b21a2fdec9f 100644 --- a/arch/tile/include/asm/checksum.h +++ b/arch/tile/include/asm/checksum.h @@ -21,4 +21,22 @@ __wsum do_csum(const unsigned char *buff, int len); #define do_csum do_csum +/* + * Return the sum of all the 16-bit subwords in a long. + * This sums two subwords on a 32-bit machine, and four on 64 bits. + * The implementation does two vector adds to capture any overflow. + */ +static inline unsigned int csum_long(unsigned long x) +{ + unsigned long ret; +#ifdef __tilegx__ + ret = __insn_v2sadu(x, 0); + ret = __insn_v2sadu(ret, 0); +#else + ret = __insn_sadh_u(x, 0); + ret = __insn_sadh_u(ret, 0); +#endif + return ret; +} + #endif /* _ASM_TILE_CHECKSUM_H */ diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h new file mode 100644 index 00000000000..0ccda3c425b --- /dev/null +++ b/arch/tile/include/asm/cmpxchg.h @@ -0,0 +1,134 @@ +/* + * cmpxchg.h -- forked from asm/atomic.h with this copyright: + * + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#ifndef _ASM_TILE_CMPXCHG_H +#define _ASM_TILE_CMPXCHG_H + +#ifndef __ASSEMBLY__ + +#include <asm/barrier.h> + +/* Nonexistent functions intended to cause compile errors. */ +extern void __xchg_called_with_bad_pointer(void) + __compiletime_error("Bad argument size for xchg"); +extern void __cmpxchg_called_with_bad_pointer(void) + __compiletime_error("Bad argument size for cmpxchg"); + +#ifndef __tilegx__ + +/* Note the _atomic_xxx() routines include a final mb(). */ +int _atomic_xchg(int *ptr, int n); +int _atomic_xchg_add(int *v, int i); +int _atomic_xchg_add_unless(int *v, int a, int u); +int _atomic_cmpxchg(int *ptr, int o, int n); +long long _atomic64_xchg(long long *v, long long n); +long long _atomic64_xchg_add(long long *v, long long i); +long long _atomic64_xchg_add_unless(long long *v, long long a, long long u); +long long _atomic64_cmpxchg(long long *v, long long o, long long n); + +#define xchg(ptr, n) \ + ({ \ + if (sizeof(*(ptr)) != 4) \ + __xchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic_xchg((int *)(ptr), (int)(n)); \ + }) + +#define cmpxchg(ptr, o, n) \ + ({ \ + if (sizeof(*(ptr)) != 4) \ + __cmpxchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o, \ + (int)n); \ + }) + +#define xchg64(ptr, n) \ + ({ \ + if (sizeof(*(ptr)) != 8) \ + __xchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic64_xchg((long long *)(ptr), \ + (long long)(n)); \ + }) + +#define cmpxchg64(ptr, o, n) \ + ({ \ + if (sizeof(*(ptr)) != 8) \ + __cmpxchg_called_with_bad_pointer(); \ + smp_mb(); \ + (typeof(*(ptr)))_atomic64_cmpxchg((long long *)ptr, \ + (long long)o, (long long)n); \ + }) + +#else + +#define xchg(ptr, n) \ + ({ \ + typeof(*(ptr)) __x; \ + smp_mb(); \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(unsigned long) \ + __insn_exch4((ptr), \ + (u32)(unsigned long)(n)); \ + break; \ + case 8: \ + __x = (typeof(__x)) \ + __insn_exch((ptr), (unsigned long)(n)); \ + break; \ + default: \ + __xchg_called_with_bad_pointer(); \ + break; \ + } \ + smp_mb(); \ + __x; \ + }) + +#define cmpxchg(ptr, o, n) \ + ({ \ + typeof(*(ptr)) __x; \ + __insn_mtspr(SPR_CMPEXCH_VALUE, (unsigned long)(o)); \ + smp_mb(); \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(unsigned long) \ + __insn_cmpexch4((ptr), \ + (u32)(unsigned long)(n)); \ + break; \ + case 8: \ + __x = (typeof(__x))__insn_cmpexch((ptr), \ + (long long)(n)); \ + break; \ + default: \ + __cmpxchg_called_with_bad_pointer(); \ + break; \ + } \ + smp_mb(); \ + __x; \ + }) + +#define xchg64 xchg +#define cmpxchg64 cmpxchg + +#endif + +#define tas(ptr) xchg((ptr), 1) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_TILE_CMPXCHG_H */ diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index c3ae570c0a5..ffd4493efc7 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -44,7 +44,6 @@ typedef __kernel_uid32_t __compat_gid32_t; typedef __kernel_mode_t compat_mode_t; typedef __kernel_dev_t compat_dev_t; typedef __kernel_loff_t compat_loff_t; -typedef __kernel_nlink_t compat_nlink_t; typedef __kernel_ipc_pid_t compat_ipc_pid_t; typedef __kernel_daddr_t compat_daddr_t; typedef __kernel_fsid_t compat_fsid_t; @@ -111,6 +110,68 @@ struct compat_flock64 { typedef u32 compat_sigset_word; +typedef union compat_sigval { + compat_int_t sival_int; + compat_uptr_t sival_ptr; +} compat_sigval_t; + +#define COMPAT_SI_PAD_SIZE (128/sizeof(int) - 3) + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[COMPAT_SI_PAD_SIZE]; + + /* kill() */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + int _overrun_incr; /* amount to add to overrun */ + } _timer; + + /* POSIX.1b signals */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + unsigned int _addr; /* faulting insn/memory ref. */ +#ifdef __ARCH_SI_TRAPNO + int _trapno; /* TRAP # which caused the signal */ +#endif + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} compat_siginfo_t; + #define COMPAT_OFF_T_MAX 0x7fffffff #define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL @@ -211,57 +272,26 @@ extern int compat_setup_rt_frame(int sig, struct k_sigaction *ka, struct pt_regs *regs); /* Compat syscalls. */ -struct compat_sigaction; struct compat_siginfo; struct compat_sigaltstack; -long compat_sys_execve(const char __user *path, - const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp, struct pt_regs *); -long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act, - struct compat_sigaction __user *oact, - size_t sigsetsize); -long compat_sys_rt_sigqueueinfo(int pid, int sig, - struct compat_siginfo __user *uinfo); -long compat_sys_rt_sigreturn(struct pt_regs *); -long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, - struct compat_sigaltstack __user *uoss_ptr, - struct pt_regs *); +long compat_sys_rt_sigreturn(void); long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high); long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high); long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count, u32 dummy, u32 low, u32 high); long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count, u32 dummy, u32 low, u32 high); -long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len); long compat_sys_sync_file_range2(int fd, unsigned int flags, u32 offset_lo, u32 offset_hi, u32 nbytes_lo, u32 nbytes_hi); long compat_sys_fallocate(int fd, int mode, u32 offset_lo, u32 offset_hi, u32 len_lo, u32 len_hi); -long compat_sys_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval); - -/* Versions of compat functions that differ from generic Linux. */ -struct compat_msgbuf; -long tile_compat_sys_msgsnd(int msqid, - struct compat_msgbuf __user *msgp, - size_t msgsz, int msgflg); -long tile_compat_sys_msgrcv(int msqid, - struct compat_msgbuf __user *msgp, - size_t msgsz, long msgtyp, int msgflg); -long tile_compat_sys_ptrace(compat_long_t request, compat_long_t pid, - compat_long_t addr, compat_long_t data); - -/* Tilera Linux syscalls that don't have "compat" versions. */ -#define compat_sys_flush_cache sys_flush_cache - -/* These are the intvec_64.S trampolines. */ -long _compat_sys_execve(const char __user *path, - const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp); -long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, - struct compat_sigaltstack __user *uoss_ptr); +long compat_sys_llseek(unsigned int fd, unsigned int offset_high, + unsigned int offset_low, loff_t __user * result, + unsigned int origin); + +/* Assembly trampoline to avoid clobbering r0. */ long _compat_sys_rt_sigreturn(void); #endif /* _ASM_TILE_COMPAT_H */ diff --git a/arch/tile/include/asm/cputime.h b/arch/tile/include/asm/cputime.h deleted file mode 100644 index 6d68ad7e0ea..00000000000 --- a/arch/tile/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/cputime.h> diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h index f0a4c256403..6ab8bf146d4 100644 --- a/arch/tile/include/asm/device.h +++ b/arch/tile/include/asm/device.h @@ -1 +1,36 @@ -#include <asm-generic/device.h> +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * Arch specific extensions to struct device + */ + +#ifndef _ASM_TILE_DEVICE_H +#define _ASM_TILE_DEVICE_H + +struct dev_archdata { + /* DMA operations on that device */ + struct dma_map_ops *dma_ops; + + /* Offset of the DMA address from the PA. */ + dma_addr_t dma_offset; + + /* + * Highest DMA address that can be generated by devices that + * have limited DMA capability, i.e. non 64-bit capable. + */ + dma_addr_t max_direct_dma_addr; +}; + +struct pdev_archdata { +}; + +#endif /* _ASM_TILE_DEVICE_H */ diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h deleted file mode 100644 index 6cd978cefb2..00000000000 --- a/arch/tile/include/asm/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/div64.h> diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 15e1dceecc6..1eae359d831 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -20,68 +20,94 @@ #include <linux/cache.h> #include <linux/io.h> -/* - * Note that on x86 and powerpc, there is a "struct dma_mapping_ops" - * that is used for all the DMA operations. For now, we don't have an - * equivalent on tile, because we only have a single way of doing DMA. - * (Tilera bug 7994 to use dma_mapping_ops.) - */ +#ifdef __tilegx__ +#define ARCH_HAS_DMA_GET_REQUIRED_MASK +#endif + +extern struct dma_map_ops *tile_dma_map_ops; +extern struct dma_map_ops *gx_pci_dma_map_ops; +extern struct dma_map_ops *gx_legacy_pci_dma_map_ops; +extern struct dma_map_ops *gx_hybrid_pci_dma_map_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (dev && dev->archdata.dma_ops) + return dev->archdata.dma_ops; + else + return tile_dma_map_ops; +} + +static inline dma_addr_t get_dma_offset(struct device *dev) +{ + return dev->archdata.dma_offset; +} + +static inline void set_dma_offset(struct device *dev, dma_addr_t off) +{ + dev->archdata.dma_offset = off; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr; +} + +static inline void dma_mark_clean(void *addr, size_t size) {} + +#include <asm-generic/dma-mapping-common.h> + +static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +{ + dev->archdata.dma_ops = ops; +} -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) - -extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction); -extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction); -extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction); -extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nhwentries, enum dma_data_direction); -extern dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction); -extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address, - size_t size, enum dma_data_direction); -extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction); -extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction); - - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - -extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t, - enum dma_data_direction); -extern void dma_sync_single_for_device(struct device *, dma_addr_t, - size_t, enum dma_data_direction); -extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t, - unsigned long offset, size_t, - enum dma_data_direction); -extern void dma_sync_single_range_for_device(struct device *, dma_addr_t, - unsigned long offset, size_t, - enum dma_data_direction); -extern void dma_cache_sync(void *vaddr, size_t, enum dma_data_direction); +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size - 1 <= *dev->dma_mask; +} static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - return 0; + debug_dma_mapping_error(dev, dma_addr); + return get_dma_ops(dev)->mapping_error(dev, dma_addr); } static inline int dma_supported(struct device *dev, u64 mask) { - return 1; + return get_dma_ops(dev)->dma_supported(dev, mask); } static inline int dma_set_mask(struct device *dev, u64 mask) { + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + /* + * For PCI devices with 64-bit DMA addressing capability, promote + * the dma_ops to hybrid, with the consistent memory DMA space limited + * to 32-bit. For 32-bit capable devices, limit the streaming DMA + * address range to max_direct_dma_addr. + */ + if (dma_ops == gx_pci_dma_map_ops || + dma_ops == gx_hybrid_pci_dma_map_ops || + dma_ops == gx_legacy_pci_dma_map_ops) { + if (mask == DMA_BIT_MASK(64) && + dma_ops == gx_legacy_pci_dma_map_ops) + set_dma_ops(dev, gx_hybrid_pci_dma_map_ops); + else if (mask > dev->archdata.max_direct_dma_addr) + mask = dev->archdata.max_direct_dma_addr; + } + if (!dev->dma_mask || !dma_supported(dev, mask)) return -EIO; @@ -90,4 +116,43 @@ dma_set_mask(struct device *dev, u64 mask) return 0; } +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + void *cpu_addr; + + cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs); + + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + + return cpu_addr; +} + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + + dma_ops->free(dev, size, cpu_addr, dma_handle, attrs); +} + +#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) +#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL) +#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL) + +/* + * dma_alloc_noncoherent() is #defined to return coherent memory, + * so there's no need to do any flushing here. + */ +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) +{ +} + #endif /* _ASM_TILE_DMA_MAPPING_H */ diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h index 623a6bb741c..41d9878a968 100644 --- a/arch/tile/include/asm/elf.h +++ b/arch/tile/include/asm/elf.h @@ -30,7 +30,6 @@ typedef unsigned long elf_greg_t; #define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t)) typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -#define EM_TILE64 187 #define EM_TILEPRO 188 #define EM_TILEGX 191 @@ -44,7 +43,11 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; #else #define ELF_CLASS ELFCLASS32 #endif +#ifdef __BIG_ENDIAN__ +#define ELF_DATA ELFDATA2MSB +#else #define ELF_DATA ELFDATA2LSB +#endif /* * There seems to be a bug in how compat_binfmt_elf.c works: it @@ -59,6 +62,7 @@ enum { ELF_ARCH = CHIP_ELF_TYPE() }; */ #define elf_check_arch(x) \ ((x)->e_ident[EI_CLASS] == ELF_CLASS && \ + (x)->e_ident[EI_DATA] == ELF_DATA && \ (x)->e_machine == CHIP_ELF_TYPE()) /* The module loader only handles a few relocation types. */ @@ -127,6 +131,15 @@ extern int dump_task_regs(struct task_struct *, elf_gregset_t *); struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); +#define ARCH_DLINFO \ +do { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \ +} while (0) + +struct mm_struct; +extern unsigned long arch_randomize_brk(struct mm_struct *mm); +#define arch_randomize_brk arch_randomize_brk + #ifdef CONFIG_COMPAT #define COMPAT_ELF_PLATFORM "tilegx-m32" @@ -143,6 +156,7 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define compat_start_thread(regs, ip, usp) do { \ regs->pc = ptr_to_compat_reg((void *)(ip)); \ regs->sp = ptr_to_compat_reg((void *)(usp)); \ + single_step_execve(); \ } while (0) /* @@ -151,12 +165,12 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #undef SET_PERSONALITY #define SET_PERSONALITY(ex) \ do { \ - current->personality = PER_LINUX; \ + set_personality(PER_LINUX | (current->personality & (~PER_MASK))); \ current_thread_info()->status &= ~TS_COMPAT; \ } while (0) #define COMPAT_SET_PERSONALITY(ex) \ do { \ - current->personality = PER_LINUX_32BIT; \ + set_personality(PER_LINUX | (current->personality & (~PER_MASK))); \ current_thread_info()->status |= TS_COMPAT; \ } while (0) @@ -164,4 +178,6 @@ do { \ #endif /* CONFIG_COMPAT */ +#define CORE_DUMP_USE_REGSET + #endif /* _ASM_TILE_ELF_H */ diff --git a/arch/tile/include/asm/emergency-restart.h b/arch/tile/include/asm/emergency-restart.h deleted file mode 100644 index 3711bd9d50b..00000000000 --- a/arch/tile/include/asm/emergency-restart.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/emergency-restart.h> diff --git a/arch/tile/include/asm/errno.h b/arch/tile/include/asm/errno.h deleted file mode 100644 index 4c82b503d92..00000000000 --- a/arch/tile/include/asm/errno.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/errno.h> diff --git a/arch/tile/include/asm/fcntl.h b/arch/tile/include/asm/fcntl.h deleted file mode 100644 index 46ab12db573..00000000000 --- a/arch/tile/include/asm/fcntl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/fcntl.h> diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h index 51537ff9265..ffe2637aeb3 100644 --- a/arch/tile/include/asm/fixmap.h +++ b/arch/tile/include/asm/fixmap.h @@ -25,9 +25,6 @@ #include <asm/kmap_types.h> #endif -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at @@ -45,15 +42,23 @@ * * TLB entries of such buffers will not be flushed across * task switches. - * - * We don't bother with a FIX_HOLE since above the fixmaps - * is unmapped memory in any case. */ enum fixed_addresses { +#ifdef __tilegx__ + /* + * TILEPro has unmapped memory above so the hole isn't needed, + * and in any case the hole pushes us over a single 16MB pmd. + */ + FIX_HOLE, +#endif #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #endif +#ifdef __tilegx__ /* see homecache.c */ + FIX_HOMECACHE_BEGIN, + FIX_HOMECACHE_END = FIX_HOMECACHE_BEGIN+(NR_CPUS)-1, +#endif __end_of_permanent_fixed_addresses, /* @@ -70,54 +75,12 @@ enum fixed_addresses { #endif }; -extern void __set_fixmap(enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); - -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) - -#define clear_fixmap(idx) \ - __set_fixmap(idx, 0, __pgprot(0)) - #define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) #define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE) #define FIXADDR_BOOT_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_BOOT_SIZE) -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static __always_inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} - -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +#include <asm-generic/fixmap.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h index 461459b06d9..13a9bb81a8a 100644 --- a/arch/tile/include/asm/ftrace.h +++ b/arch/tile/include/asm/ftrace.h @@ -15,6 +15,26 @@ #ifndef _ASM_TILE_FTRACE_H #define _ASM_TILE_FTRACE_H -/* empty */ +#ifdef CONFIG_FUNCTION_TRACER + +#define MCOUNT_ADDR ((unsigned long)(__mcount)) +#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +extern void __mcount(void); + +#ifdef CONFIG_DYNAMIC_FTRACE +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +struct dyn_arch_ftrace { +}; +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_FUNCTION_TRACER */ #endif /* _ASM_TILE_FTRACE_H */ diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h index d03ec124a59..1a6ef1b69cb 100644 --- a/arch/tile/include/asm/futex.h +++ b/arch/tile/include/asm/futex.h @@ -28,29 +28,82 @@ #include <linux/futex.h> #include <linux/uaccess.h> #include <linux/errno.h> +#include <asm/atomic.h> -extern struct __get_user futex_set(u32 __user *v, int i); -extern struct __get_user futex_add(u32 __user *v, int n); -extern struct __get_user futex_or(u32 __user *v, int n); -extern struct __get_user futex_andn(u32 __user *v, int n); -extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n); +/* + * Support macros for futex operations. Do not use these macros directly. + * They assume "ret", "val", "oparg", and "uaddr" in the lexical context. + * __futex_cmpxchg() additionally assumes "oldval". + */ + +#ifdef __tilegx__ + +#define __futex_asm(OP) \ + asm("1: {" #OP " %1, %3, %4; movei %0, 0 }\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "0: { movei %0, %5; j 9f }\n" \ + ".section __ex_table,\"a\"\n" \ + ".align 8\n" \ + ".quad 1b, 0b\n" \ + ".popsection\n" \ + "9:" \ + : "=r" (ret), "=r" (val), "+m" (*(uaddr)) \ + : "r" (uaddr), "r" (oparg), "i" (-EFAULT)) + +#define __futex_set() __futex_asm(exch4) +#define __futex_add() __futex_asm(fetchadd4) +#define __futex_or() __futex_asm(fetchor4) +#define __futex_andn() ({ oparg = ~oparg; __futex_asm(fetchand4); }) +#define __futex_cmpxchg() \ + ({ __insn_mtspr(SPR_CMPEXCH_VALUE, oldval); __futex_asm(cmpexch4); }) + +#define __futex_xor() \ + ({ \ + u32 oldval, n = oparg; \ + if ((ret = __get_user(oldval, uaddr)) == 0) { \ + do { \ + oparg = oldval ^ n; \ + __futex_cmpxchg(); \ + } while (ret == 0 && oldval != val); \ + } \ + }) + +/* No need to prefetch, since the atomic ops go to the home cache anyway. */ +#define __futex_prolog() -#ifndef __tilegx__ -extern struct __get_user futex_xor(u32 __user *v, int n); #else -static inline struct __get_user futex_xor(u32 __user *uaddr, int n) -{ - struct __get_user asm_ret = __get_user_4(uaddr); - if (!asm_ret.err) { - int oldval, newval; - do { - oldval = asm_ret.val; - newval = oldval ^ n; - asm_ret = futex_cmpxchg(uaddr, oldval, newval); - } while (asm_ret.err == 0 && oldval != asm_ret.val); + +#define __futex_call(FN) \ + { \ + struct __get_user gu = FN((u32 __force *)uaddr, lock, oparg); \ + val = gu.val; \ + ret = gu.err; \ } - return asm_ret; -} + +#define __futex_set() __futex_call(__atomic_xchg) +#define __futex_add() __futex_call(__atomic_xchg_add) +#define __futex_or() __futex_call(__atomic_or) +#define __futex_andn() __futex_call(__atomic_andn) +#define __futex_xor() __futex_call(__atomic_xor) + +#define __futex_cmpxchg() \ + { \ + struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \ + lock, oldval, oparg); \ + val = gu.val; \ + ret = gu.err; \ + } + +/* + * Find the lock pointer for the atomic calls to use, and issue a + * prefetch to the user address to bring it into cache. Similar to + * __atomic_setup(), but we can't do a read into the L1 since it might + * fault; instead we do a prefetch into the L2. + */ +#define __futex_prolog() \ + int *lock; \ + __insn_prefetch(uaddr); \ + lock = __atomic_hashed_lock((int __force *)uaddr) #endif static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) @@ -59,8 +112,12 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int ret; - struct __get_user asm_ret; + int uninitialized_var(val), ret; + + __futex_prolog(); + + /* The 32-bit futex code makes this assumption, so validate it here. */ + BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int)); if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; @@ -71,46 +128,45 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) pagefault_disable(); switch (op) { case FUTEX_OP_SET: - asm_ret = futex_set(uaddr, oparg); + __futex_set(); break; case FUTEX_OP_ADD: - asm_ret = futex_add(uaddr, oparg); + __futex_add(); break; case FUTEX_OP_OR: - asm_ret = futex_or(uaddr, oparg); + __futex_or(); break; case FUTEX_OP_ANDN: - asm_ret = futex_andn(uaddr, oparg); + __futex_andn(); break; case FUTEX_OP_XOR: - asm_ret = futex_xor(uaddr, oparg); + __futex_xor(); break; default: - asm_ret.err = -ENOSYS; + ret = -ENOSYS; + break; } pagefault_enable(); - ret = asm_ret.err; - if (!ret) { switch (cmp) { case FUTEX_OP_CMP_EQ: - ret = (asm_ret.val == cmparg); + ret = (val == cmparg); break; case FUTEX_OP_CMP_NE: - ret = (asm_ret.val != cmparg); + ret = (val != cmparg); break; case FUTEX_OP_CMP_LT: - ret = (asm_ret.val < cmparg); + ret = (val < cmparg); break; case FUTEX_OP_CMP_GE: - ret = (asm_ret.val >= cmparg); + ret = (val >= cmparg); break; case FUTEX_OP_CMP_LE: - ret = (asm_ret.val <= cmparg); + ret = (val <= cmparg); break; case FUTEX_OP_CMP_GT: - ret = (asm_ret.val > cmparg); + ret = (val > cmparg); break; default: ret = -ENOSYS; @@ -120,22 +176,20 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) } static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) + u32 oldval, u32 oparg) { - struct __get_user asm_ret; + int ret, val; + + __futex_prolog(); if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - asm_ret = futex_cmpxchg(uaddr, oldval, newval); - *uval = asm_ret.val; - return asm_ret.err; -} + __futex_cmpxchg(); -#ifndef __tilegx__ -/* Return failure from the atomic wrappers. */ -struct __get_user __atomic_bad_address(int __user *addr); -#endif + *uval = val; + return ret; +} #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/hardirq.h b/arch/tile/include/asm/hardirq.h index 822390f9a15..54110af2398 100644 --- a/arch/tile/include/asm/hardirq.h +++ b/arch/tile/include/asm/hardirq.h @@ -42,6 +42,4 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ -#define HARDIRQ_BITS 8 - #endif /* _ASM_TILE_HARDIRQ_H */ diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h index 0bed3ec7b42..2f572b6b7bc 100644 --- a/arch/tile/include/asm/hardwall.h +++ b/arch/tile/include/asm/hardwall.h @@ -11,46 +11,20 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. * - * Provide methods for the HARDWALL_FILE for accessing the UDN. + * Provide methods for access control of per-cpu resources like + * UDN, IDN, or IPI. */ - #ifndef _ASM_TILE_HARDWALL_H #define _ASM_TILE_HARDWALL_H -#include <linux/ioctl.h> - -#define HARDWALL_IOCTL_BASE 0xa2 - -/* - * The HARDWALL_CREATE() ioctl is a macro with a "size" argument. - * The resulting ioctl value is passed to the kernel in conjunction - * with a pointer to a little-endian bitmask of cpus, which must be - * physically in a rectangular configuration on the chip. - * The "size" is the number of bytes of cpu mask data. - */ -#define _HARDWALL_CREATE 1 -#define HARDWALL_CREATE(size) \ - _IOC(_IOC_READ, HARDWALL_IOCTL_BASE, _HARDWALL_CREATE, (size)) - -#define _HARDWALL_ACTIVATE 2 -#define HARDWALL_ACTIVATE \ - _IO(HARDWALL_IOCTL_BASE, _HARDWALL_ACTIVATE) - -#define _HARDWALL_DEACTIVATE 3 -#define HARDWALL_DEACTIVATE \ - _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE) - -#ifndef __KERNEL__ - -/* This is the canonical name expected by userspace. */ -#define HARDWALL_FILE "/dev/hardwall" +#include <uapi/asm/hardwall.h> +/* /proc hooks for hardwall. */ +struct proc_dir_entry; +#ifdef CONFIG_HARDWALL +void proc_tile_hardwall_init(struct proc_dir_entry *root); +int proc_pid_hardwall(struct task_struct *task, char *buffer); #else - -/* Hook for /proc/tile/hardwall. */ -struct seq_file; -int proc_tile_hardwall_show(struct seq_file *sf, void *v); - +static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {} #endif - #endif /* _ASM_TILE_HARDWALL_H */ diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h index b2a6c5de79a..fc8429a31c8 100644 --- a/arch/tile/include/asm/highmem.h +++ b/arch/tile/include/asm/highmem.h @@ -59,7 +59,7 @@ void *kmap_fix_kpte(struct page *page, int finished); /* This macro is used only in map_new_virtual() to map "page". */ #define kmap_prot page_to_kpgprot(page) -void *__kmap_atomic(struct page *page); +void *kmap_atomic(struct page *page); void __kunmap_atomic(void *kvaddr); void *kmap_atomic_pfn(unsigned long pfn); void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h index a8243865d49..7ddd1b8d691 100644 --- a/arch/tile/include/asm/homecache.h +++ b/arch/tile/include/asm/homecache.h @@ -33,8 +33,7 @@ struct zone; /* * Is this page immutable (unwritable) and thus able to be cached more - * widely than would otherwise be possible? On tile64 this means we - * mark the PTE to cache locally; on tilepro it means we have "nc" set. + * widely than would otherwise be possible? This means we have "nc" set. */ #define PAGE_HOME_IMMUTABLE -2 @@ -44,16 +43,8 @@ struct zone; */ #define PAGE_HOME_INCOHERENT -3 -#if CHIP_HAS_CBOX_HOME_MAP() /* Home for the page is distributed via hash-for-home. */ #define PAGE_HOME_HASH -4 -#endif - -/* Homing is unknown or unspecified. Not valid for page_home(). */ -#define PAGE_HOME_UNKNOWN -5 - -/* Home on the current cpu. Not valid for page_home(). */ -#define PAGE_HOME_HERE -6 /* Support wrapper to use instead of explicit hv_flush_remote(). */ extern void flush_remote(unsigned long cache_pfn, unsigned long cache_length, @@ -79,10 +70,17 @@ extern void homecache_change_page_home(struct page *, int order, int home); /* * Flush a page out of whatever cache(s) it is in. * This is more than just finv, since it properly handles waiting - * for the data to reach memory on tilepro, but it can be quite - * heavyweight, particularly on hash-for-home memory. + * for the data to reach memory, but it can be quite + * heavyweight, particularly on incoherent or immutable memory. + */ +extern void homecache_finv_page(struct page *); + +/* + * Flush a page out of the specified home cache. + * Note that the specified home need not be the actual home of the page, + * as for example might be the case when coordinating with I/O devices. */ -extern void homecache_flush_cache(struct page *, int order); +extern void homecache_finv_map_page(struct page *, int home); /* * Allocate a page with the given GFP flags, home, and optionally @@ -104,10 +102,10 @@ extern struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, * routines use homecache_change_page_home() to reset the home * back to the default before returning the page to the allocator. */ +void __homecache_free_pages(struct page *, unsigned int order); void homecache_free_pages(unsigned long addr, unsigned int order); -#define homecache_free_page(page) \ - homecache_free_pages((page), 0) - +#define __homecache_free_page(page) __homecache_free_pages((page), 0) +#define homecache_free_page(page) homecache_free_pages((page), 0) /* diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h index d396d180516..3257733003f 100644 --- a/arch/tile/include/asm/hugetlb.h +++ b/arch/tile/include/asm/hugetlb.h @@ -16,6 +16,7 @@ #define _ASM_TILE_HUGETLB_H #include <asm/page.h> +#include <asm-generic/hugetlb.h> static inline int is_hugepage_only_range(struct mm_struct *mm, @@ -106,4 +107,29 @@ static inline void arch_release_hugepage(struct page *page) { } +static inline void arch_clear_hugepage_flags(struct page *page) +{ +} + +#ifdef CONFIG_HUGETLB_SUPER_PAGES +static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable) +{ + size_t pagesize = huge_page_size(hstate_vma(vma)); + if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) + entry = pte_mksuper(entry); + return entry; +} +#define arch_make_huge_pte arch_make_huge_pte + +/* Sizes to scale up page size for PTEs with HV_PTE_SUPER bit. */ +enum { + HUGE_SHIFT_PGDIR = 0, + HUGE_SHIFT_PMD = 1, + HUGE_SHIFT_PAGE = 2, + HUGE_SHIFT_ENTRIES +}; +extern int huge_shift[HUGE_SHIFT_ENTRIES]; +#endif + #endif /* _ASM_TILE_HUGETLB_H */ diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index d3cbb9b14cb..9fe434969fa 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -19,7 +19,8 @@ #include <linux/bug.h> #include <asm/page.h> -#define IO_SPACE_LIMIT 0xfffffffful +/* Maximum PCI I/O space address supported. */ +#define IO_SPACE_LIMIT 0xffffffff /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem @@ -52,6 +53,7 @@ extern void iounmap(volatile void __iomem *addr); #endif #define ioremap_nocache(physaddr, size) ioremap(physaddr, size) +#define ioremap_wc(physaddr, size) ioremap(physaddr, size) #define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) #define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) @@ -61,6 +63,92 @@ extern void iounmap(volatile void __iomem *addr); #define mm_ptov(addr) ((void *)phys_to_virt(addr)) #define mm_vtop(addr) ((unsigned long)virt_to_phys(addr)) +#if CHIP_HAS_MMIO() + +/* + * We use inline assembly to guarantee that the compiler does not + * split an access into multiple byte-sized accesses as it might + * sometimes do if a register data structure is marked "packed". + * Obviously on tile we can't tolerate such an access being + * actually unaligned, but we want to avoid the case where the + * compiler conservatively would generate multiple accesses even + * for an aligned read or write. + */ + +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + return *(const volatile u8 __force *)addr; +} + +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + u16 ret; + asm volatile("ld2u %0, %1" : "=r" (ret) : "r" (addr)); + barrier(); + return le16_to_cpu(ret); +} + +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + u32 ret; + /* Sign-extend to conform to u32 ABI sign-extension convention. */ + asm volatile("ld4s %0, %1" : "=r" (ret) : "r" (addr)); + barrier(); + return le32_to_cpu(ret); +} + +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + u64 ret; + asm volatile("ld %0, %1" : "=r" (ret) : "r" (addr)); + barrier(); + return le64_to_cpu(ret); +} + +static inline void __raw_writeb(u8 val, volatile void __iomem *addr) +{ + *(volatile u8 __force *)addr = val; +} + +static inline void __raw_writew(u16 val, volatile void __iomem *addr) +{ + asm volatile("st2 %0, %1" :: "r" (addr), "r" (cpu_to_le16(val))); +} + +static inline void __raw_writel(u32 val, volatile void __iomem *addr) +{ + asm volatile("st4 %0, %1" :: "r" (addr), "r" (cpu_to_le32(val))); +} + +static inline void __raw_writeq(u64 val, volatile void __iomem *addr) +{ + asm volatile("st %0, %1" :: "r" (addr), "r" (cpu_to_le64(val))); +} + +/* + * The on-chip I/O hardware on tilegx is configured with VA=PA for the + * kernel's PA range. The low-level APIs and field names use "va" and + * "void *" nomenclature, to be consistent with the general notion + * that the addresses in question are virtualizable, but in the kernel + * context we are actually manipulating PA values. (In other contexts, + * e.g. access from user space, we do in fact use real virtual addresses + * in the va fields.) To allow readers of the code to understand what's + * happening, we direct their attention to this comment by using the + * following two functions that just duplicate __va() and __pa(). + */ +typedef unsigned long tile_io_addr_t; +static inline tile_io_addr_t va_to_tile_io_addr(void *va) +{ + BUILD_BUG_ON(sizeof(phys_addr_t) != sizeof(tile_io_addr_t)); + return __pa(va); +} +static inline void *tile_io_addr_to_va(tile_io_addr_t tile_io_addr) +{ + return __va(tile_io_addr); +} + +#else /* CHIP_HAS_MMIO() */ + #ifdef CONFIG_PCI extern u8 _tile_readb(unsigned long addr); @@ -72,10 +160,19 @@ extern void _tile_writew(u16 val, unsigned long addr); extern void _tile_writel(u32 val, unsigned long addr); extern void _tile_writeq(u64 val, unsigned long addr); -#else +#define __raw_readb(addr) _tile_readb((unsigned long)addr) +#define __raw_readw(addr) _tile_readw((unsigned long)addr) +#define __raw_readl(addr) _tile_readl((unsigned long)addr) +#define __raw_readq(addr) _tile_readq((unsigned long)addr) +#define __raw_writeb(val, addr) _tile_writeb(val, (unsigned long)addr) +#define __raw_writew(val, addr) _tile_writew(val, (unsigned long)addr) +#define __raw_writel(val, addr) _tile_writel(val, (unsigned long)addr) +#define __raw_writeq(val, addr) _tile_writeq(val, (unsigned long)addr) + +#else /* CONFIG_PCI */ /* - * The Tile architecture does not support IOMEM unless PCI is enabled. + * The tilepro architecture does not support IOMEM unless PCI is enabled. * Unfortunately we can't yet simply not declare these methods, * since some generic code that compiles into the kernel, but * we never run, uses them unconditionally. @@ -87,65 +184,58 @@ static inline int iomem_panic(void) return 0; } -static inline u8 _tile_readb(unsigned long addr) +static inline u8 readb(unsigned long addr) { return iomem_panic(); } -static inline u16 _tile_readw(unsigned long addr) +static inline u16 _readw(unsigned long addr) { return iomem_panic(); } -static inline u32 _tile_readl(unsigned long addr) +static inline u32 readl(unsigned long addr) { return iomem_panic(); } -static inline u64 _tile_readq(unsigned long addr) +static inline u64 readq(unsigned long addr) { return iomem_panic(); } -static inline void _tile_writeb(u8 val, unsigned long addr) +static inline void writeb(u8 val, unsigned long addr) { iomem_panic(); } -static inline void _tile_writew(u16 val, unsigned long addr) +static inline void writew(u16 val, unsigned long addr) { iomem_panic(); } -static inline void _tile_writel(u32 val, unsigned long addr) +static inline void writel(u32 val, unsigned long addr) { iomem_panic(); } -static inline void _tile_writeq(u64 val, unsigned long addr) +static inline void writeq(u64 val, unsigned long addr) { iomem_panic(); } -#endif +#endif /* CONFIG_PCI */ -#define readb(addr) _tile_readb((unsigned long)addr) -#define readw(addr) _tile_readw((unsigned long)addr) -#define readl(addr) _tile_readl((unsigned long)addr) -#define readq(addr) _tile_readq((unsigned long)addr) -#define writeb(val, addr) _tile_writeb(val, (unsigned long)addr) -#define writew(val, addr) _tile_writew(val, (unsigned long)addr) -#define writel(val, addr) _tile_writel(val, (unsigned long)addr) -#define writeq(val, addr) _tile_writeq(val, (unsigned long)addr) - -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl -#define __raw_readq readq -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel -#define __raw_writeq writeq +#endif /* CHIP_HAS_MMIO() */ + +#define readb __raw_readb +#define readw __raw_readw +#define readl __raw_readl +#define readq __raw_readq +#define writeb __raw_writeb +#define writew __raw_writew +#define writel __raw_writel +#define writeq __raw_writeq #define readb_relaxed readb #define readw_relaxed readw @@ -161,10 +251,21 @@ static inline void _tile_writeq(u64 val, unsigned long addr) #define iowrite32 writel #define iowrite64 writeq +#if CHIP_HAS_MMIO() || defined(CONFIG_PCI) + +static inline void memset_io(volatile void *dst, int val, size_t len) +{ + size_t x; + BUG_ON((unsigned long)dst & 0x3); + val = (val & 0xff) * 0x01010101; + for (x = 0; x < len; x += 4) + writel(val, dst + x); +} + static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t len) { - int x; + size_t x; BUG_ON((unsigned long)src & 0x3); for (x = 0; x < len; x += 4) *(u32 *)(dst + x) = readl(src + x); @@ -173,14 +274,116 @@ static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, static inline void memcpy_toio(volatile void __iomem *dst, const void *src, size_t len) { - int x; + size_t x; BUG_ON((unsigned long)dst & 0x3); for (x = 0; x < len; x += 4) writel(*(u32 *)(src + x), dst + x); } +#endif + +#if CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO) + +static inline u8 inb(unsigned long addr) +{ + return readb((volatile void __iomem *) addr); +} + +static inline u16 inw(unsigned long addr) +{ + return readw((volatile void __iomem *) addr); +} + +static inline u32 inl(unsigned long addr) +{ + return readl((volatile void __iomem *) addr); +} + +static inline void outb(u8 b, unsigned long addr) +{ + writeb(b, (volatile void __iomem *) addr); +} + +static inline void outw(u16 b, unsigned long addr) +{ + writew(b, (volatile void __iomem *) addr); +} + +static inline void outl(u32 b, unsigned long addr) +{ + writel(b, (volatile void __iomem *) addr); +} + +static inline void insb(unsigned long addr, void *buffer, int count) +{ + if (count) { + u8 *buf = buffer; + do { + u8 x = inb(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void insw(unsigned long addr, void *buffer, int count) +{ + if (count) { + u16 *buf = buffer; + do { + u16 x = inw(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void insl(unsigned long addr, void *buffer, int count) +{ + if (count) { + u32 *buf = buffer; + do { + u32 x = inl(addr); + *buf++ = x; + } while (--count); + } +} + +static inline void outsb(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u8 *buf = buffer; + do { + outb(*buf++, addr); + } while (--count); + } +} + +static inline void outsw(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u16 *buf = buffer; + do { + outw(*buf++, addr); + } while (--count); + } +} + +static inline void outsl(unsigned long addr, const void *buffer, int count) +{ + if (count) { + const u32 *buf = buffer; + do { + outl(*buf++, addr); + } while (--count); + } +} + +extern void __iomem *ioport_map(unsigned long port, unsigned int len); +extern void ioport_unmap(void __iomem *addr); + +#else + /* - * The Tile architecture does not support IOPORT, even with PCI. + * The TilePro architecture does not support IOPORT, even with PCI. * Unfortunately we can't yet simply not declare these methods, * since some generic code that compiles into the kernel, but * we never run, uses them unconditionally. @@ -188,13 +391,19 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, static inline long ioport_panic(void) { +#ifdef __tilegx__ + panic("PCI IO space support is disabled. Configure the kernel with" + " CONFIG_TILE_PCI_IO to enable it"); +#else panic("inb/outb and friends do not exist on tile"); +#endif return 0; } static inline void __iomem *ioport_map(unsigned long port, unsigned int len) { - return (void __iomem *) ioport_panic(); + pr_info("ioport_map: mapping IO resources is unsupported on tile.\n"); + return NULL; } static inline void ioport_unmap(void __iomem *addr) @@ -232,13 +441,6 @@ static inline void outl(u32 b, unsigned long addr) ioport_panic(); } -#define inb_p(addr) inb(addr) -#define inw_p(addr) inw(addr) -#define inl_p(addr) inl(addr) -#define outb_p(x, addr) outb((x), (addr)) -#define outw_p(x, addr) outw((x), (addr)) -#define outl_p(x, addr) outl((x), (addr)) - static inline void insb(unsigned long addr, void *buffer, int count) { ioport_panic(); @@ -269,6 +471,20 @@ static inline void outsl(unsigned long addr, const void *buffer, int count) ioport_panic(); } +#endif /* CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO) */ + +#define inb_p(addr) inb(addr) +#define inw_p(addr) inw(addr) +#define inl_p(addr) inl(addr) +#define outb_p(x, addr) outb((x), (addr)) +#define outw_p(x, addr) outw((x), (addr)) +#define outl_p(x, addr) outl((x), (addr)) + +#define ioread16be(addr) be16_to_cpu(ioread16(addr)) +#define ioread32be(addr) be32_to_cpu(ioread32(addr)) +#define iowrite16be(v, addr) iowrite16(be16_to_cpu(v), (addr)) +#define iowrite32be(v, addr) iowrite32(be32_to_cpu(v), (addr)) + #define ioread8_rep(p, dst, count) \ insb((unsigned long) (p), (dst), (count)) #define ioread16_rep(p, dst, count) \ @@ -283,4 +499,7 @@ static inline void outsl(unsigned long addr, const void *buffer, int count) #define iowrite32_rep(p, src, count) \ outsl((unsigned long) (p), (src), (count)) +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt + #endif /* _ASM_TILE_IO_H */ diff --git a/arch/tile/include/asm/ioctl.h b/arch/tile/include/asm/ioctl.h deleted file mode 100644 index b279fe06dfe..00000000000 --- a/arch/tile/include/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctl.h> diff --git a/arch/tile/include/asm/ioctls.h b/arch/tile/include/asm/ioctls.h deleted file mode 100644 index ec34c760665..00000000000 --- a/arch/tile/include/asm/ioctls.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctls.h> diff --git a/arch/tile/include/asm/ipc.h b/arch/tile/include/asm/ipc.h deleted file mode 100644 index a46e3d9c2a3..00000000000 --- a/arch/tile/include/asm/ipc.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipc.h> diff --git a/arch/tile/include/asm/ipcbuf.h b/arch/tile/include/asm/ipcbuf.h deleted file mode 100644 index 84c7e51cb6d..00000000000 --- a/arch/tile/include/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipcbuf.h> diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h index 572fd3ef1d7..1fe86911838 100644 --- a/arch/tile/include/asm/irq.h +++ b/arch/tile/include/asm/irq.h @@ -18,10 +18,14 @@ #include <linux/hardirq.h> /* The hypervisor interface provides 32 IRQs. */ -#define NR_IRQS 32 +#define NR_IRQS 32 /* IRQ numbers used for linux IPIs. */ -#define IRQ_RESCHEDULE 1 +#define IRQ_RESCHEDULE 0 +/* Interrupts for dynamic allocation start at 1. Let the core allocate irq0 */ +#define NR_IRQS_LEGACY 1 + +#define irq_canonicalize(irq) (irq) void ack_bad_irq(unsigned int irq); @@ -72,16 +76,6 @@ enum { */ void tile_irq_activate(unsigned int irq, int tile_irq_type); -/* - * For onboard, non-PCI (e.g. TILE_IRQ_PERCPU) devices, drivers know - * how to use enable/disable_percpu_irq() to manage interrupts on each - * core. We can't use the generic enable/disable_irq() because they - * use a single reference count per irq, rather than per cpu per irq. - */ -void enable_percpu_irq(unsigned int irq); -void disable_percpu_irq(unsigned int irq); - - void setup_irq_regs(void); #endif /* _ASM_TILE_IRQ_H */ diff --git a/arch/tile/include/asm/irq_regs.h b/arch/tile/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b7027..00000000000 --- a/arch/tile/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/irq_regs.h> diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index 5db0ce54284..71af5747874 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h @@ -18,32 +18,20 @@ #include <arch/interrupts.h> #include <arch/chip.h> -#if !defined(__tilegx__) && defined(__ASSEMBLY__) - /* * The set of interrupts we want to allow when interrupts are nominally * disabled. The remainder are effectively "NMI" interrupts from * the point of view of the generic Linux code. Note that synchronous * interrupts (aka "non-queued") are not blocked by the mask in any case. */ -#if CHIP_HAS_AUX_PERF_COUNTERS() -#define LINUX_MASKABLE_INTERRUPTS_HI \ - (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT))) -#else -#define LINUX_MASKABLE_INTERRUPTS_HI \ - (~(INT_MASK_HI(INT_PERF_COUNT))) -#endif - -#else - -#if CHIP_HAS_AUX_PERF_COUNTERS() -#define LINUX_MASKABLE_INTERRUPTS \ - (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT))) -#else #define LINUX_MASKABLE_INTERRUPTS \ - (~(INT_MASK(INT_PERF_COUNT))) -#endif + (~((_AC(1,ULL) << INT_PERF_COUNT) | (_AC(1,ULL) << INT_AUX_PERF_COUNT))) +#if CHIP_HAS_SPLIT_INTR_MASK() +/* The same macro, but for the two 32-bit SPRs separately. */ +#define LINUX_MASKABLE_INTERRUPTS_LO (-1) +#define LINUX_MASKABLE_INTERRUPTS_HI \ + (~((1 << (INT_PERF_COUNT - 32)) | (1 << (INT_AUX_PERF_COUNT - 32)))) #endif #ifndef __ASSEMBLY__ @@ -52,7 +40,15 @@ #include <asm/percpu.h> #include <arch/spr_def.h> -/* Set and clear kernel interrupt masks. */ +/* + * Set and clear kernel interrupt masks. + * + * NOTE: __insn_mtspr() is a compiler builtin marked as a memory + * clobber. We rely on it being equivalent to a compiler barrier in + * this code since arch_local_irq_save() and friends must act as + * compiler barriers. This compiler semantic is baked into enough + * places that the compiler will maintain it going forward. + */ #if CHIP_HAS_SPLIT_INTR_MASK() #if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32 # error Fix assumptions about which word various interrupts are in @@ -90,6 +86,14 @@ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, (unsigned long)(__m)); \ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, (unsigned long)(__m>>32)); \ } while (0) +#define interrupt_mask_save_mask() \ + (__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_0) | \ + (((unsigned long long)__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_1))<<32)) +#define interrupt_mask_restore_mask(mask) do { \ + unsigned long long __m = (mask); \ + __insn_mtspr(SPR_INTERRUPT_MASK_K_0, (unsigned long)(__m)); \ + __insn_mtspr(SPR_INTERRUPT_MASK_K_1, (unsigned long)(__m>>32)); \ +} while (0) #else #define interrupt_mask_set(n) \ __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (1UL << (n))) @@ -101,6 +105,10 @@ __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (mask)) #define interrupt_mask_reset_mask(mask) \ __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (mask)) +#define interrupt_mask_save_mask() \ + __insn_mfspr(SPR_INTERRUPT_MASK_K) +#define interrupt_mask_restore_mask(mask) \ + __insn_mtspr(SPR_INTERRUPT_MASK_K, (mask)) #endif /* @@ -114,7 +122,13 @@ * to know our current state. */ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); -#define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR) +#define INITIAL_INTERRUPTS_ENABLED (1ULL << INT_MEM_ERROR) + +#ifdef CONFIG_DEBUG_PREEMPT +/* Due to inclusion issues, we can't rely on <linux/smp.h> here. */ +extern unsigned int debug_smp_processor_id(void); +# define smp_processor_id() debug_smp_processor_id() +#endif /* Disable interrupts. */ #define arch_local_irq_disable() \ @@ -122,11 +136,20 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Disable all interrupts, including NMIs. */ #define arch_local_irq_disable_all() \ - interrupt_mask_set_mask(-1UL) + interrupt_mask_set_mask(-1ULL) + +/* + * Read the set of maskable interrupts. + * We avoid the preemption warning here via __this_cpu_ptr since even + * if irqs are already enabled, it's harmless to read the wrong cpu's + * enabled mask. + */ +#define arch_local_irqs_enabled() \ + (*__this_cpu_ptr(&interrupts_enabled_mask)) /* Re-enable all maskable interrupts. */ #define arch_local_irq_enable() \ - interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask)) + interrupt_mask_reset_mask(arch_local_irqs_enabled()) /* Disable or enable interrupts based on flag argument. */ #define arch_local_irq_restore(disabled) do { \ @@ -153,7 +176,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Prevent the given interrupt from being enabled next time we enable irqs. */ #define arch_local_irq_mask(interrupt) \ - (__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt)) + this_cpu_and(interrupts_enabled_mask, ~(1ULL << (interrupt))) /* Prevent the given interrupt from being enabled immediately. */ #define arch_local_irq_mask_now(interrupt) do { \ @@ -163,7 +186,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Allow the given interrupt to be enabled next time we enable irqs. */ #define arch_local_irq_unmask(interrupt) \ - (__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt)) + this_cpu_or(interrupts_enabled_mask, (1ULL << (interrupt))) /* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */ #define arch_local_irq_unmask_now(interrupt) do { \ @@ -179,7 +202,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); #ifdef __tilegx__ #if INT_MEM_ERROR != 0 -# error Fix IRQ_DISABLED() macro +# error Fix IRQS_DISABLED() macro #endif /* Return 0 or 1 to indicate whether interrupts are currently disabled. */ @@ -207,9 +230,10 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); mtspr SPR_INTERRUPT_MASK_SET_K, tmp /* Enable interrupts. */ -#define IRQ_ENABLE(tmp0, tmp1) \ +#define IRQ_ENABLE_LOAD(tmp0, tmp1) \ GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \ - ld tmp0, tmp0; \ + ld tmp0, tmp0 +#define IRQ_ENABLE_APPLY(tmp0, tmp1) \ mtspr SPR_INTERRUPT_MASK_RESET_K, tmp0 #else /* !__tilegx__ */ @@ -237,7 +261,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Disable interrupts. */ #define IRQ_DISABLE(tmp0, tmp1) \ { \ - movei tmp0, -1; \ + movei tmp0, LINUX_MASKABLE_INTERRUPTS_LO; \ moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \ }; \ { \ @@ -253,17 +277,22 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp /* Enable interrupts. */ -#define IRQ_ENABLE(tmp0, tmp1) \ +#define IRQ_ENABLE_LOAD(tmp0, tmp1) \ GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \ { \ lw tmp0, tmp0; \ addi tmp1, tmp0, 4 \ }; \ - lw tmp1, tmp1; \ + lw tmp1, tmp1 +#define IRQ_ENABLE_APPLY(tmp0, tmp1) \ mtspr SPR_INTERRUPT_MASK_RESET_K_0, tmp0; \ mtspr SPR_INTERRUPT_MASK_RESET_K_1, tmp1 #endif +#define IRQ_ENABLE(tmp0, tmp1) \ + IRQ_ENABLE_LOAD(tmp0, tmp1); \ + IRQ_ENABLE_APPLY(tmp0, tmp1) + /* * Do the CPU's IRQ-state tracing from assembly code. We call a * C function, but almost everywhere we do, we don't mind clobbering diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h index 6ece1b03766..5bbbfa904c2 100644 --- a/arch/tile/include/asm/kdebug.h +++ b/arch/tile/include/asm/kdebug.h @@ -1 +1,28 @@ -#include <asm-generic/kdebug.h> +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_KDEBUG_H +#define _ASM_TILE_KDEBUG_H + +#include <linux/notifier.h> + +enum die_val { + DIE_OOPS = 1, + DIE_BREAK, + DIE_SSTEPBP, + DIE_PAGE_FAULT, + DIE_COMPILED_BPT +}; + +#endif /* _ASM_TILE_KDEBUG_H */ diff --git a/arch/tile/include/asm/kexec.h b/arch/tile/include/asm/kexec.h index c11a6cc73bb..fc98ccfc98a 100644 --- a/arch/tile/include/asm/kexec.h +++ b/arch/tile/include/asm/kexec.h @@ -19,12 +19,24 @@ #include <asm/page.h> +#ifndef __tilegx__ /* Maximum physical address we can use pages from. */ #define KEXEC_SOURCE_MEMORY_LIMIT TASK_SIZE /* Maximum address we can reach in physical address mode. */ #define KEXEC_DESTINATION_MEMORY_LIMIT TASK_SIZE /* Maximum address we can use for the control code buffer. */ #define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE +#else +/* We need to limit the memory below PGDIR_SIZE since + * we only setup page table for [0, PGDIR_SIZE) before final kexec. + */ +/* Maximum physical address we can use pages from. */ +#define KEXEC_SOURCE_MEMORY_LIMIT PGDIR_SIZE +/* Maximum address we can reach in physical address mode. */ +#define KEXEC_DESTINATION_MEMORY_LIMIT PGDIR_SIZE +/* Maximum address we can use for the control code buffer. */ +#define KEXEC_CONTROL_MEMORY_LIMIT PGDIR_SIZE +#endif #define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE diff --git a/arch/tile/include/asm/kgdb.h b/arch/tile/include/asm/kgdb.h new file mode 100644 index 00000000000..280c181cf0d --- /dev/null +++ b/arch/tile/include/asm/kgdb.h @@ -0,0 +1,71 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE-Gx KGDB support. + */ + +#ifndef __TILE_KGDB_H__ +#define __TILE_KGDB_H__ + +#include <linux/kdebug.h> +#include <arch/opcode.h> + +#define GDB_SIZEOF_REG sizeof(unsigned long) + +/* + * TILE-Gx gdb is expecting the following register layout: + * 56 GPRs(R0 - R52, TP, SP, LR), 8 special GPRs(networks and ZERO), + * plus the PC and the faultnum. + * + * Even though kernel not use the 8 special GPRs, they need to be present + * in the registers sent for correct processing in the host-side gdb. + * + */ +#define DBG_MAX_REG_NUM (56+8+2) +#define NUMREGBYTES (DBG_MAX_REG_NUM * GDB_SIZEOF_REG) + +/* + * BUFMAX defines the maximum number of characters in inbound/outbound + * buffers at least NUMREGBYTES*2 are needed for register packets, + * Longer buffer is needed to list all threads. + */ +#define BUFMAX 2048 + +#define BREAK_INSTR_SIZE TILEGX_BUNDLE_SIZE_IN_BYTES + +/* + * Require cache flush for set/clear a software breakpoint or write memory. + */ +#define CACHE_FLUSH_IS_SAFE 1 + +/* + * The compiled-in breakpoint instruction can be used to "break" into + * the debugger via magic system request key (sysrq-G). + */ +static tile_bundle_bits compiled_bpt = TILEGX_BPT_BUNDLE | DIE_COMPILED_BPT; + +enum tilegx_regnum { + TILEGX_PC_REGNUM = TREG_LAST_GPR + 9, + TILEGX_FAULTNUM_REGNUM, +}; + +/* + * Generate a breakpoint exception to "break" into the debugger. + */ +static inline void arch_kgdb_breakpoint(void) +{ + asm volatile (".quad %0\n\t" + ::""(compiled_bpt)); +} + +#endif /* __TILE_KGDB_H__ */ diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h index 3d0f2024626..92b28e3e997 100644 --- a/arch/tile/include/asm/kmap_types.h +++ b/arch/tile/include/asm/kmap_types.h @@ -23,35 +23,6 @@ * adds 4MB of required address-space. For now we leave KM_TYPE_NR * set to depth 8. */ -enum km_type { - KM_TYPE_NR = 8 -}; - -/* - * We provide dummy definitions of all the stray values that used to be - * required for kmap_atomic() and no longer are. - */ -enum { - KM_BOUNCE_READ, - KM_SKB_SUNRPC_DATA, - KM_SKB_DATA_SOFTIRQ, - KM_USER0, - KM_USER1, - KM_BIO_SRC_IRQ, - KM_BIO_DST_IRQ, - KM_PTE0, - KM_PTE1, - KM_IRQ0, - KM_IRQ1, - KM_SOFTIRQ0, - KM_SOFTIRQ1, - KM_SYNC_ICACHE, - KM_SYNC_DCACHE, - KM_UML_USERCOPY, - KM_IRQ_PTE, - KM_NMI, - KM_NMI_PTE, - KM_KDB -}; +#define KM_TYPE_NR 8 #endif /* _ASM_TILE_KMAP_TYPES_H */ diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h new file mode 100644 index 00000000000..d8f9a83943b --- /dev/null +++ b/arch/tile/include/asm/kprobes.h @@ -0,0 +1,79 @@ +/* + * arch/tile/include/asm/kprobes.h + * + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_KPROBES_H +#define _ASM_TILE_KPROBES_H + +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> + +#include <arch/opcode.h> + +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 2 + +#define kretprobe_blacklist_size 0 + +typedef tile_bundle_bits kprobe_opcode_t; + +#define flush_insn_slot(p) \ + flush_icache_range((unsigned long)p->addr, \ + (unsigned long)p->addr + \ + (MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) + +struct kprobe; + +/* Architecture specific copy of original instruction. */ +struct arch_specific_insn { + kprobe_opcode_t *insn; +}; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; + unsigned long saved_pc; +}; + +#define MAX_JPROBES_STACK_SIZE 128 +#define MAX_JPROBES_STACK_ADDR \ + (((unsigned long)current_thread_info()) + THREAD_SIZE - 32 \ + - sizeof(struct pt_regs)) + +#define MIN_JPROBES_STACK_SIZE(ADDR) \ + ((((ADDR) + MAX_JPROBES_STACK_SIZE) > MAX_JPROBES_STACK_ADDR) \ + ? MAX_JPROBES_STACK_ADDR - (ADDR) \ + : MAX_JPROBES_STACK_SIZE) + +/* per-cpu kprobe control block. */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_saved_pc; + unsigned long jprobe_saved_sp; + struct prev_kprobe prev_kprobe; + struct pt_regs jprobe_saved_regs; + char jprobes_stack[MAX_JPROBES_STACK_SIZE]; +}; + +extern tile_bundle_bits breakpoint2_insn; +extern tile_bundle_bits breakpoint_insn; + +void arch_remove_kprobe(struct kprobe *); + +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + +#endif /* _ASM_TILE_KPROBES_H */ diff --git a/arch/tile/include/asm/local.h b/arch/tile/include/asm/local.h deleted file mode 100644 index c11c530f74d..00000000000 --- a/arch/tile/include/asm/local.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local.h> diff --git a/arch/tile/include/asm/memprof.h b/arch/tile/include/asm/memprof.h deleted file mode 100644 index 359949be28c..00000000000 --- a/arch/tile/include/asm/memprof.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * The hypervisor's memory controller profiling infrastructure allows - * the programmer to find out what fraction of the available memory - * bandwidth is being consumed at each memory controller. The - * profiler provides start, stop, and clear operations to allows - * profiling over a specific time window, as well as an interface for - * reading the most recent profile values. - * - * This header declares IOCTL codes necessary to control memprof. - */ -#ifndef _ASM_TILE_MEMPROF_H -#define _ASM_TILE_MEMPROF_H - -#include <linux/ioctl.h> - -#define MEMPROF_IOCTL_TYPE 0xB4 -#define MEMPROF_IOCTL_START _IO(MEMPROF_IOCTL_TYPE, 0) -#define MEMPROF_IOCTL_STOP _IO(MEMPROF_IOCTL_TYPE, 1) -#define MEMPROF_IOCTL_CLEAR _IO(MEMPROF_IOCTL_TYPE, 2) - -#endif /* _ASM_TILE_MEMPROF_H */ diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h index 92f94c77b6e..0cab1182bde 100644 --- a/arch/tile/include/asm/mmu.h +++ b/arch/tile/include/asm/mmu.h @@ -21,7 +21,8 @@ struct mm_context { * Written under the mmap_sem semaphore; read without the * semaphore but atomically, but it is conservatively set. */ - unsigned int priority_cached; + unsigned long priority_cached; + unsigned long vdso_base; }; typedef struct mm_context mm_context_t; diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h index 9bc0d0725c2..4734215e2ad 100644 --- a/arch/tile/include/asm/mmu_context.h +++ b/arch/tile/include/asm/mmu_context.h @@ -30,18 +30,22 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) return 0; } -/* Note that arch/tile/kernel/head.S also calls hv_install_context() */ +/* + * Note that arch/tile/kernel/head_NN.S and arch/tile/mm/migrate_NN.S + * also call hv_install_context(). + */ static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot) { /* FIXME: DIRECTIO should not always be set. FIXME. */ - int rc = hv_install_context(__pa(pgdir), prot, asid, HV_CTX_DIRECTIO); + int rc = hv_install_context(__pa(pgdir), prot, asid, + HV_CTX_DIRECTIO | CTX_PAGE_FLAG); if (rc < 0) panic("hv_install_context failed: %d", rc); } static inline void install_page_table(pgd_t *pgdir, int asid) { - pte_t *ptep = virt_to_pte(NULL, (unsigned long)pgdir); + pte_t *ptep = virt_to_kpte((unsigned long)pgdir); __install_page_table(pgdir, asid, *ptep); } @@ -100,8 +104,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, __get_cpu_var(current_asid) = asid; /* Clear cpu from the old mm, and set it in the new one. */ - cpumask_clear_cpu(cpu, &prev->cpu_vm_mask); - cpumask_set_cpu(cpu, &next->cpu_vm_mask); + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); /* Re-load page tables */ install_page_table(next->pgd, asid); diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h index c6344c4f32a..804f1098b6c 100644 --- a/arch/tile/include/asm/mmzone.h +++ b/arch/tile/include/asm/mmzone.h @@ -40,20 +40,9 @@ static inline int pfn_to_nid(unsigned long pfn) return highbits_to_node[__pfn_to_highbits(pfn)]; } -/* - * Following are macros that each numa implmentation must define. - */ - -#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) -#define node_end_pfn(nid) \ -({ \ - pg_data_t *__pgdat = NODE_DATA(nid); \ - __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ -}) - #define kern_addr_valid(kaddr) virt_addr_valid((void *)kaddr) -static inline int pfn_valid(int pfn) +static inline int pfn_valid(unsigned long pfn) { int nid = pfn_to_nid(pfn); diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h index 1e4b79fe858..44ed07ccd3d 100644 --- a/arch/tile/include/asm/module.h +++ b/arch/tile/include/asm/module.h @@ -1 +1,40 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_MODULE_H +#define _ASM_TILE_MODULE_H + +#include <arch/chip.h> + #include <asm-generic/module.h> + +/* We can't use modules built with different page sizes. */ +#if defined(CONFIG_PAGE_SIZE_16KB) +# define MODULE_PGSZ " 16KB" +#elif defined(CONFIG_PAGE_SIZE_64KB) +# define MODULE_PGSZ " 64KB" +#else +# define MODULE_PGSZ "" +#endif + +/* We don't really support no-SMP so tag if someone tries. */ +#ifdef CONFIG_SMP +#define MODULE_NOSMP "" +#else +#define MODULE_NOSMP " nosmp" +#endif + +#define MODULE_ARCH_VERMAGIC CHIP_ARCH_NAME MODULE_PGSZ MODULE_NOSMP + +#endif /* _ASM_TILE_MODULE_H */ diff --git a/arch/tile/include/asm/msgbuf.h b/arch/tile/include/asm/msgbuf.h deleted file mode 100644 index 809134c644a..00000000000 --- a/arch/tile/include/asm/msgbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/msgbuf.h> diff --git a/arch/tile/include/asm/mutex.h b/arch/tile/include/asm/mutex.h deleted file mode 100644 index ff6101aa2c7..00000000000 --- a/arch/tile/include/asm/mutex.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/mutex-dec.h> diff --git a/arch/tile/include/asm/opcode-tile_32.h b/arch/tile/include/asm/opcode-tile_32.h deleted file mode 100644 index eda60ecbae3..00000000000 --- a/arch/tile/include/asm/opcode-tile_32.h +++ /dev/null @@ -1,1506 +0,0 @@ -/* tile.h -- Header file for TILE opcode table - Copyright (C) 2005 Free Software Foundation, Inc. - Contributed by Tilera Corp. */ - -#ifndef opcode_tile_h -#define opcode_tile_h - -typedef unsigned long long tile_bundle_bits; - - -enum -{ - TILE_MAX_OPERANDS = 5 /* mm */ -}; - -typedef enum -{ - TILE_OPC_BPT, - TILE_OPC_INFO, - TILE_OPC_INFOL, - TILE_OPC_J, - TILE_OPC_JAL, - TILE_OPC_MOVE, - TILE_OPC_MOVE_SN, - TILE_OPC_MOVEI, - TILE_OPC_MOVEI_SN, - TILE_OPC_MOVELI, - TILE_OPC_MOVELI_SN, - TILE_OPC_MOVELIS, - TILE_OPC_PREFETCH, - TILE_OPC_RAISE, - TILE_OPC_ADD, - TILE_OPC_ADD_SN, - TILE_OPC_ADDB, - TILE_OPC_ADDB_SN, - TILE_OPC_ADDBS_U, - TILE_OPC_ADDBS_U_SN, - TILE_OPC_ADDH, - TILE_OPC_ADDH_SN, - TILE_OPC_ADDHS, - TILE_OPC_ADDHS_SN, - TILE_OPC_ADDI, - TILE_OPC_ADDI_SN, - TILE_OPC_ADDIB, - TILE_OPC_ADDIB_SN, - TILE_OPC_ADDIH, - TILE_OPC_ADDIH_SN, - TILE_OPC_ADDLI, - TILE_OPC_ADDLI_SN, - TILE_OPC_ADDLIS, - TILE_OPC_ADDS, - TILE_OPC_ADDS_SN, - TILE_OPC_ADIFFB_U, - TILE_OPC_ADIFFB_U_SN, - TILE_OPC_ADIFFH, - TILE_OPC_ADIFFH_SN, - TILE_OPC_AND, - TILE_OPC_AND_SN, - TILE_OPC_ANDI, - TILE_OPC_ANDI_SN, - TILE_OPC_AULI, - TILE_OPC_AVGB_U, - TILE_OPC_AVGB_U_SN, - TILE_OPC_AVGH, - TILE_OPC_AVGH_SN, - TILE_OPC_BBNS, - TILE_OPC_BBNS_SN, - TILE_OPC_BBNST, - TILE_OPC_BBNST_SN, - TILE_OPC_BBS, - TILE_OPC_BBS_SN, - TILE_OPC_BBST, - TILE_OPC_BBST_SN, - TILE_OPC_BGEZ, - TILE_OPC_BGEZ_SN, - TILE_OPC_BGEZT, - TILE_OPC_BGEZT_SN, - TILE_OPC_BGZ, - TILE_OPC_BGZ_SN, - TILE_OPC_BGZT, - TILE_OPC_BGZT_SN, - TILE_OPC_BITX, - TILE_OPC_BITX_SN, - TILE_OPC_BLEZ, - TILE_OPC_BLEZ_SN, - TILE_OPC_BLEZT, - TILE_OPC_BLEZT_SN, - TILE_OPC_BLZ, - TILE_OPC_BLZ_SN, - TILE_OPC_BLZT, - TILE_OPC_BLZT_SN, - TILE_OPC_BNZ, - TILE_OPC_BNZ_SN, - TILE_OPC_BNZT, - TILE_OPC_BNZT_SN, - TILE_OPC_BYTEX, - TILE_OPC_BYTEX_SN, - TILE_OPC_BZ, - TILE_OPC_BZ_SN, - TILE_OPC_BZT, - TILE_OPC_BZT_SN, - TILE_OPC_CLZ, - TILE_OPC_CLZ_SN, - TILE_OPC_CRC32_32, - TILE_OPC_CRC32_32_SN, - TILE_OPC_CRC32_8, - TILE_OPC_CRC32_8_SN, - TILE_OPC_CTZ, - TILE_OPC_CTZ_SN, - TILE_OPC_DRAIN, - TILE_OPC_DTLBPR, - TILE_OPC_DWORD_ALIGN, - TILE_OPC_DWORD_ALIGN_SN, - TILE_OPC_FINV, - TILE_OPC_FLUSH, - TILE_OPC_FNOP, - TILE_OPC_ICOH, - TILE_OPC_ILL, - TILE_OPC_INTHB, - TILE_OPC_INTHB_SN, - TILE_OPC_INTHH, - TILE_OPC_INTHH_SN, - TILE_OPC_INTLB, - TILE_OPC_INTLB_SN, - TILE_OPC_INTLH, - TILE_OPC_INTLH_SN, - TILE_OPC_INV, - TILE_OPC_IRET, - TILE_OPC_JALB, - TILE_OPC_JALF, - TILE_OPC_JALR, - TILE_OPC_JALRP, - TILE_OPC_JB, - TILE_OPC_JF, - TILE_OPC_JR, - TILE_OPC_JRP, - TILE_OPC_LB, - TILE_OPC_LB_SN, - TILE_OPC_LB_U, - TILE_OPC_LB_U_SN, - TILE_OPC_LBADD, - TILE_OPC_LBADD_SN, - TILE_OPC_LBADD_U, - TILE_OPC_LBADD_U_SN, - TILE_OPC_LH, - TILE_OPC_LH_SN, - TILE_OPC_LH_U, - TILE_OPC_LH_U_SN, - TILE_OPC_LHADD, - TILE_OPC_LHADD_SN, - TILE_OPC_LHADD_U, - TILE_OPC_LHADD_U_SN, - TILE_OPC_LNK, - TILE_OPC_LNK_SN, - TILE_OPC_LW, - TILE_OPC_LW_SN, - TILE_OPC_LW_NA, - TILE_OPC_LW_NA_SN, - TILE_OPC_LWADD, - TILE_OPC_LWADD_SN, - TILE_OPC_LWADD_NA, - TILE_OPC_LWADD_NA_SN, - TILE_OPC_MAXB_U, - TILE_OPC_MAXB_U_SN, - TILE_OPC_MAXH, - TILE_OPC_MAXH_SN, - TILE_OPC_MAXIB_U, - TILE_OPC_MAXIB_U_SN, - TILE_OPC_MAXIH, - TILE_OPC_MAXIH_SN, - TILE_OPC_MF, - TILE_OPC_MFSPR, - TILE_OPC_MINB_U, - TILE_OPC_MINB_U_SN, - TILE_OPC_MINH, - TILE_OPC_MINH_SN, - TILE_OPC_MINIB_U, - TILE_OPC_MINIB_U_SN, - TILE_OPC_MINIH, - TILE_OPC_MINIH_SN, - TILE_OPC_MM, - TILE_OPC_MNZ, - TILE_OPC_MNZ_SN, - TILE_OPC_MNZB, - TILE_OPC_MNZB_SN, - TILE_OPC_MNZH, - TILE_OPC_MNZH_SN, - TILE_OPC_MTSPR, - TILE_OPC_MULHH_SS, - TILE_OPC_MULHH_SS_SN, - TILE_OPC_MULHH_SU, - TILE_OPC_MULHH_SU_SN, - TILE_OPC_MULHH_UU, - TILE_OPC_MULHH_UU_SN, - TILE_OPC_MULHHA_SS, - TILE_OPC_MULHHA_SS_SN, - TILE_OPC_MULHHA_SU, - TILE_OPC_MULHHA_SU_SN, - TILE_OPC_MULHHA_UU, - TILE_OPC_MULHHA_UU_SN, - TILE_OPC_MULHHSA_UU, - TILE_OPC_MULHHSA_UU_SN, - TILE_OPC_MULHL_SS, - TILE_OPC_MULHL_SS_SN, - TILE_OPC_MULHL_SU, - TILE_OPC_MULHL_SU_SN, - TILE_OPC_MULHL_US, - TILE_OPC_MULHL_US_SN, - TILE_OPC_MULHL_UU, - TILE_OPC_MULHL_UU_SN, - TILE_OPC_MULHLA_SS, - TILE_OPC_MULHLA_SS_SN, - TILE_OPC_MULHLA_SU, - TILE_OPC_MULHLA_SU_SN, - TILE_OPC_MULHLA_US, - TILE_OPC_MULHLA_US_SN, - TILE_OPC_MULHLA_UU, - TILE_OPC_MULHLA_UU_SN, - TILE_OPC_MULHLSA_UU, - TILE_OPC_MULHLSA_UU_SN, - TILE_OPC_MULLL_SS, - TILE_OPC_MULLL_SS_SN, - TILE_OPC_MULLL_SU, - TILE_OPC_MULLL_SU_SN, - TILE_OPC_MULLL_UU, - TILE_OPC_MULLL_UU_SN, - TILE_OPC_MULLLA_SS, - TILE_OPC_MULLLA_SS_SN, - TILE_OPC_MULLLA_SU, - TILE_OPC_MULLLA_SU_SN, - TILE_OPC_MULLLA_UU, - TILE_OPC_MULLLA_UU_SN, - TILE_OPC_MULLLSA_UU, - TILE_OPC_MULLLSA_UU_SN, - TILE_OPC_MVNZ, - TILE_OPC_MVNZ_SN, - TILE_OPC_MVZ, - TILE_OPC_MVZ_SN, - TILE_OPC_MZ, - TILE_OPC_MZ_SN, - TILE_OPC_MZB, - TILE_OPC_MZB_SN, - TILE_OPC_MZH, - TILE_OPC_MZH_SN, - TILE_OPC_NAP, - TILE_OPC_NOP, - TILE_OPC_NOR, - TILE_OPC_NOR_SN, - TILE_OPC_OR, - TILE_OPC_OR_SN, - TILE_OPC_ORI, - TILE_OPC_ORI_SN, - TILE_OPC_PACKBS_U, - TILE_OPC_PACKBS_U_SN, - TILE_OPC_PACKHB, - TILE_OPC_PACKHB_SN, - TILE_OPC_PACKHS, - TILE_OPC_PACKHS_SN, - TILE_OPC_PACKLB, - TILE_OPC_PACKLB_SN, - TILE_OPC_PCNT, - TILE_OPC_PCNT_SN, - TILE_OPC_RL, - TILE_OPC_RL_SN, - TILE_OPC_RLI, - TILE_OPC_RLI_SN, - TILE_OPC_S1A, - TILE_OPC_S1A_SN, - TILE_OPC_S2A, - TILE_OPC_S2A_SN, - TILE_OPC_S3A, - TILE_OPC_S3A_SN, - TILE_OPC_SADAB_U, - TILE_OPC_SADAB_U_SN, - TILE_OPC_SADAH, - TILE_OPC_SADAH_SN, - TILE_OPC_SADAH_U, - TILE_OPC_SADAH_U_SN, - TILE_OPC_SADB_U, - TILE_OPC_SADB_U_SN, - TILE_OPC_SADH, - TILE_OPC_SADH_SN, - TILE_OPC_SADH_U, - TILE_OPC_SADH_U_SN, - TILE_OPC_SB, - TILE_OPC_SBADD, - TILE_OPC_SEQ, - TILE_OPC_SEQ_SN, - TILE_OPC_SEQB, - TILE_OPC_SEQB_SN, - TILE_OPC_SEQH, - TILE_OPC_SEQH_SN, - TILE_OPC_SEQI, - TILE_OPC_SEQI_SN, - TILE_OPC_SEQIB, - TILE_OPC_SEQIB_SN, - TILE_OPC_SEQIH, - TILE_OPC_SEQIH_SN, - TILE_OPC_SH, - TILE_OPC_SHADD, - TILE_OPC_SHL, - TILE_OPC_SHL_SN, - TILE_OPC_SHLB, - TILE_OPC_SHLB_SN, - TILE_OPC_SHLH, - TILE_OPC_SHLH_SN, - TILE_OPC_SHLI, - TILE_OPC_SHLI_SN, - TILE_OPC_SHLIB, - TILE_OPC_SHLIB_SN, - TILE_OPC_SHLIH, - TILE_OPC_SHLIH_SN, - TILE_OPC_SHR, - TILE_OPC_SHR_SN, - TILE_OPC_SHRB, - TILE_OPC_SHRB_SN, - TILE_OPC_SHRH, - TILE_OPC_SHRH_SN, - TILE_OPC_SHRI, - TILE_OPC_SHRI_SN, - TILE_OPC_SHRIB, - TILE_OPC_SHRIB_SN, - TILE_OPC_SHRIH, - TILE_OPC_SHRIH_SN, - TILE_OPC_SLT, - TILE_OPC_SLT_SN, - TILE_OPC_SLT_U, - TILE_OPC_SLT_U_SN, - TILE_OPC_SLTB, - TILE_OPC_SLTB_SN, - TILE_OPC_SLTB_U, - TILE_OPC_SLTB_U_SN, - TILE_OPC_SLTE, - TILE_OPC_SLTE_SN, - TILE_OPC_SLTE_U, - TILE_OPC_SLTE_U_SN, - TILE_OPC_SLTEB, - TILE_OPC_SLTEB_SN, - TILE_OPC_SLTEB_U, - TILE_OPC_SLTEB_U_SN, - TILE_OPC_SLTEH, - TILE_OPC_SLTEH_SN, - TILE_OPC_SLTEH_U, - TILE_OPC_SLTEH_U_SN, - TILE_OPC_SLTH, - TILE_OPC_SLTH_SN, - TILE_OPC_SLTH_U, - TILE_OPC_SLTH_U_SN, - TILE_OPC_SLTI, - TILE_OPC_SLTI_SN, - TILE_OPC_SLTI_U, - TILE_OPC_SLTI_U_SN, - TILE_OPC_SLTIB, - TILE_OPC_SLTIB_SN, - TILE_OPC_SLTIB_U, - TILE_OPC_SLTIB_U_SN, - TILE_OPC_SLTIH, - TILE_OPC_SLTIH_SN, - TILE_OPC_SLTIH_U, - TILE_OPC_SLTIH_U_SN, - TILE_OPC_SNE, - TILE_OPC_SNE_SN, - TILE_OPC_SNEB, - TILE_OPC_SNEB_SN, - TILE_OPC_SNEH, - TILE_OPC_SNEH_SN, - TILE_OPC_SRA, - TILE_OPC_SRA_SN, - TILE_OPC_SRAB, - TILE_OPC_SRAB_SN, - TILE_OPC_SRAH, - TILE_OPC_SRAH_SN, - TILE_OPC_SRAI, - TILE_OPC_SRAI_SN, - TILE_OPC_SRAIB, - TILE_OPC_SRAIB_SN, - TILE_OPC_SRAIH, - TILE_OPC_SRAIH_SN, - TILE_OPC_SUB, - TILE_OPC_SUB_SN, - TILE_OPC_SUBB, - TILE_OPC_SUBB_SN, - TILE_OPC_SUBBS_U, - TILE_OPC_SUBBS_U_SN, - TILE_OPC_SUBH, - TILE_OPC_SUBH_SN, - TILE_OPC_SUBHS, - TILE_OPC_SUBHS_SN, - TILE_OPC_SUBS, - TILE_OPC_SUBS_SN, - TILE_OPC_SW, - TILE_OPC_SWADD, - TILE_OPC_SWINT0, - TILE_OPC_SWINT1, - TILE_OPC_SWINT2, - TILE_OPC_SWINT3, - TILE_OPC_TBLIDXB0, - TILE_OPC_TBLIDXB0_SN, - TILE_OPC_TBLIDXB1, - TILE_OPC_TBLIDXB1_SN, - TILE_OPC_TBLIDXB2, - TILE_OPC_TBLIDXB2_SN, - TILE_OPC_TBLIDXB3, - TILE_OPC_TBLIDXB3_SN, - TILE_OPC_TNS, - TILE_OPC_TNS_SN, - TILE_OPC_WH64, - TILE_OPC_XOR, - TILE_OPC_XOR_SN, - TILE_OPC_XORI, - TILE_OPC_XORI_SN, - TILE_OPC_NONE -} tile_mnemonic; - -/* 64-bit pattern for a { bpt ; nop } bundle. */ -#define TILE_BPT_BUNDLE 0x400b3cae70166000ULL - - -#define TILE_ELF_MACHINE_CODE EM_TILEPRO - -#define TILE_ELF_NAME "elf32-tilepro" - - -static __inline unsigned int -get_BrOff_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0x3ff); -} - -static __inline unsigned int -get_BrOff_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x00007fff) | - (((unsigned int)(n >> 20)) & 0x00018000); -} - -static __inline unsigned int -get_BrType_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 31)) & 0xf); -} - -static __inline unsigned int -get_Dest_Imm8_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 31)) & 0x0000003f) | - (((unsigned int)(n >> 43)) & 0x000000c0); -} - -static __inline unsigned int -get_Dest_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 2)) & 0x3); -} - -static __inline unsigned int -get_Dest_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0x3f); -} - -static __inline unsigned int -get_Dest_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 31)) & 0x3f); -} - -static __inline unsigned int -get_Dest_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0x3f); -} - -static __inline unsigned int -get_Dest_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 31)) & 0x3f); -} - -static __inline unsigned int -get_Imm16_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0xffff); -} - -static __inline unsigned int -get_Imm16_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0xffff); -} - -static __inline unsigned int -get_Imm8_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0xff); -} - -static __inline unsigned int -get_Imm8_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0xff); -} - -static __inline unsigned int -get_Imm8_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0xff); -} - -static __inline unsigned int -get_Imm8_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0xff); -} - -static __inline unsigned int -get_Imm8_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0xff); -} - -static __inline unsigned int -get_ImmOpcodeExtension_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 20)) & 0x7f); -} - -static __inline unsigned int -get_ImmOpcodeExtension_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 51)) & 0x7f); -} - -static __inline unsigned int -get_ImmRROpcodeExtension_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 8)) & 0x3); -} - -static __inline unsigned int -get_JOffLong_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x00007fff) | - (((unsigned int)(n >> 20)) & 0x00018000) | - (((unsigned int)(n >> 14)) & 0x001e0000) | - (((unsigned int)(n >> 16)) & 0x07e00000) | - (((unsigned int)(n >> 31)) & 0x18000000); -} - -static __inline unsigned int -get_JOff_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x00007fff) | - (((unsigned int)(n >> 20)) & 0x00018000) | - (((unsigned int)(n >> 14)) & 0x001e0000) | - (((unsigned int)(n >> 16)) & 0x07e00000) | - (((unsigned int)(n >> 31)) & 0x08000000); -} - -static __inline unsigned int -get_MF_Imm15_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 37)) & 0x00003fff) | - (((unsigned int)(n >> 44)) & 0x00004000); -} - -static __inline unsigned int -get_MMEnd_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 18)) & 0x1f); -} - -static __inline unsigned int -get_MMEnd_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 49)) & 0x1f); -} - -static __inline unsigned int -get_MMStart_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 23)) & 0x1f); -} - -static __inline unsigned int -get_MMStart_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 54)) & 0x1f); -} - -static __inline unsigned int -get_MT_Imm15_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 31)) & 0x0000003f) | - (((unsigned int)(n >> 37)) & 0x00003fc0) | - (((unsigned int)(n >> 44)) & 0x00004000); -} - -static __inline unsigned int -get_Mode(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 63)) & 0x1); -} - -static __inline unsigned int -get_NoRegOpcodeExtension_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0xf); -} - -static __inline unsigned int -get_Opcode_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 10)) & 0x3f); -} - -static __inline unsigned int -get_Opcode_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 28)) & 0x7); -} - -static __inline unsigned int -get_Opcode_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 59)) & 0xf); -} - -static __inline unsigned int -get_Opcode_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 27)) & 0xf); -} - -static __inline unsigned int -get_Opcode_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 59)) & 0xf); -} - -static __inline unsigned int -get_Opcode_Y2(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 56)) & 0x7); -} - -static __inline unsigned int -get_RROpcodeExtension_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 4)) & 0xf); -} - -static __inline unsigned int -get_RRROpcodeExtension_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 18)) & 0x1ff); -} - -static __inline unsigned int -get_RRROpcodeExtension_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 49)) & 0x1ff); -} - -static __inline unsigned int -get_RRROpcodeExtension_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 18)) & 0x3); -} - -static __inline unsigned int -get_RRROpcodeExtension_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 49)) & 0x3); -} - -static __inline unsigned int -get_RouteOpcodeExtension_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0x3ff); -} - -static __inline unsigned int -get_S_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 27)) & 0x1); -} - -static __inline unsigned int -get_S_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 58)) & 0x1); -} - -static __inline unsigned int -get_ShAmt_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x1f); -} - -static __inline unsigned int -get_ShAmt_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x1f); -} - -static __inline unsigned int -get_ShAmt_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x1f); -} - -static __inline unsigned int -get_ShAmt_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x1f); -} - -static __inline unsigned int -get_SrcA_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 6)) & 0x3f); -} - -static __inline unsigned int -get_SrcA_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 37)) & 0x3f); -} - -static __inline unsigned int -get_SrcA_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 6)) & 0x3f); -} - -static __inline unsigned int -get_SrcA_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 37)) & 0x3f); -} - -static __inline unsigned int -get_SrcA_Y2(tile_bundle_bits n) -{ - return (((n >> 26)) & 0x00000001) | - (((unsigned int)(n >> 50)) & 0x0000003e); -} - -static __inline unsigned int -get_SrcBDest_Y2(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 20)) & 0x3f); -} - -static __inline unsigned int -get_SrcB_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x3f); -} - -static __inline unsigned int -get_SrcB_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x3f); -} - -static __inline unsigned int -get_SrcB_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x3f); -} - -static __inline unsigned int -get_SrcB_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x3f); -} - -static __inline unsigned int -get_Src_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0x3); -} - -static __inline unsigned int -get_UnOpcodeExtension_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x1f); -} - -static __inline unsigned int -get_UnOpcodeExtension_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x1f); -} - -static __inline unsigned int -get_UnOpcodeExtension_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x1f); -} - -static __inline unsigned int -get_UnOpcodeExtension_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x1f); -} - -static __inline unsigned int -get_UnShOpcodeExtension_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 17)) & 0x3ff); -} - -static __inline unsigned int -get_UnShOpcodeExtension_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 48)) & 0x3ff); -} - -static __inline unsigned int -get_UnShOpcodeExtension_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 17)) & 0x7); -} - -static __inline unsigned int -get_UnShOpcodeExtension_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 48)) & 0x7); -} - - -static __inline int -sign_extend(int n, int num_bits) -{ - int shift = (int)(sizeof(int) * 8 - num_bits); - return (n << shift) >> shift; -} - - - -static __inline tile_bundle_bits -create_BrOff_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3ff) << 0); -} - -static __inline tile_bundle_bits -create_BrOff_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x00007fff)) << 43) | - (((tile_bundle_bits)(n & 0x00018000)) << 20); -} - -static __inline tile_bundle_bits -create_BrType_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xf)) << 31); -} - -static __inline tile_bundle_bits -create_Dest_Imm8_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x0000003f)) << 31) | - (((tile_bundle_bits)(n & 0x000000c0)) << 43); -} - -static __inline tile_bundle_bits -create_Dest_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3) << 2); -} - -static __inline tile_bundle_bits -create_Dest_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 0); -} - -static __inline tile_bundle_bits -create_Dest_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 31); -} - -static __inline tile_bundle_bits -create_Dest_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 0); -} - -static __inline tile_bundle_bits -create_Dest_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 31); -} - -static __inline tile_bundle_bits -create_Imm16_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xffff) << 12); -} - -static __inline tile_bundle_bits -create_Imm16_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xffff)) << 43); -} - -static __inline tile_bundle_bits -create_Imm8_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xff) << 0); -} - -static __inline tile_bundle_bits -create_Imm8_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xff) << 12); -} - -static __inline tile_bundle_bits -create_Imm8_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xff)) << 43); -} - -static __inline tile_bundle_bits -create_Imm8_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xff) << 12); -} - -static __inline tile_bundle_bits -create_Imm8_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xff)) << 43); -} - -static __inline tile_bundle_bits -create_ImmOpcodeExtension_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x7f) << 20); -} - -static __inline tile_bundle_bits -create_ImmOpcodeExtension_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x7f)) << 51); -} - -static __inline tile_bundle_bits -create_ImmRROpcodeExtension_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3) << 8); -} - -static __inline tile_bundle_bits -create_JOffLong_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x00007fff)) << 43) | - (((tile_bundle_bits)(n & 0x00018000)) << 20) | - (((tile_bundle_bits)(n & 0x001e0000)) << 14) | - (((tile_bundle_bits)(n & 0x07e00000)) << 16) | - (((tile_bundle_bits)(n & 0x18000000)) << 31); -} - -static __inline tile_bundle_bits -create_JOff_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x00007fff)) << 43) | - (((tile_bundle_bits)(n & 0x00018000)) << 20) | - (((tile_bundle_bits)(n & 0x001e0000)) << 14) | - (((tile_bundle_bits)(n & 0x07e00000)) << 16) | - (((tile_bundle_bits)(n & 0x08000000)) << 31); -} - -static __inline tile_bundle_bits -create_MF_Imm15_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x00003fff)) << 37) | - (((tile_bundle_bits)(n & 0x00004000)) << 44); -} - -static __inline tile_bundle_bits -create_MMEnd_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 18); -} - -static __inline tile_bundle_bits -create_MMEnd_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 49); -} - -static __inline tile_bundle_bits -create_MMStart_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 23); -} - -static __inline tile_bundle_bits -create_MMStart_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 54); -} - -static __inline tile_bundle_bits -create_MT_Imm15_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x0000003f)) << 31) | - (((tile_bundle_bits)(n & 0x00003fc0)) << 37) | - (((tile_bundle_bits)(n & 0x00004000)) << 44); -} - -static __inline tile_bundle_bits -create_Mode(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1)) << 63); -} - -static __inline tile_bundle_bits -create_NoRegOpcodeExtension_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xf) << 0); -} - -static __inline tile_bundle_bits -create_Opcode_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 10); -} - -static __inline tile_bundle_bits -create_Opcode_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x7) << 28); -} - -static __inline tile_bundle_bits -create_Opcode_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xf)) << 59); -} - -static __inline tile_bundle_bits -create_Opcode_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xf) << 27); -} - -static __inline tile_bundle_bits -create_Opcode_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xf)) << 59); -} - -static __inline tile_bundle_bits -create_Opcode_Y2(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x7)) << 56); -} - -static __inline tile_bundle_bits -create_RROpcodeExtension_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xf) << 4); -} - -static __inline tile_bundle_bits -create_RRROpcodeExtension_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1ff) << 18); -} - -static __inline tile_bundle_bits -create_RRROpcodeExtension_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1ff)) << 49); -} - -static __inline tile_bundle_bits -create_RRROpcodeExtension_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3) << 18); -} - -static __inline tile_bundle_bits -create_RRROpcodeExtension_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3)) << 49); -} - -static __inline tile_bundle_bits -create_RouteOpcodeExtension_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3ff) << 0); -} - -static __inline tile_bundle_bits -create_S_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1) << 27); -} - -static __inline tile_bundle_bits -create_S_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1)) << 58); -} - -static __inline tile_bundle_bits -create_ShAmt_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 12); -} - -static __inline tile_bundle_bits -create_ShAmt_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 43); -} - -static __inline tile_bundle_bits -create_ShAmt_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 12); -} - -static __inline tile_bundle_bits -create_ShAmt_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 43); -} - -static __inline tile_bundle_bits -create_SrcA_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 6); -} - -static __inline tile_bundle_bits -create_SrcA_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 37); -} - -static __inline tile_bundle_bits -create_SrcA_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 6); -} - -static __inline tile_bundle_bits -create_SrcA_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 37); -} - -static __inline tile_bundle_bits -create_SrcA_Y2(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x00000001) << 26) | - (((tile_bundle_bits)(n & 0x0000003e)) << 50); -} - -static __inline tile_bundle_bits -create_SrcBDest_Y2(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 20); -} - -static __inline tile_bundle_bits -create_SrcB_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 12); -} - -static __inline tile_bundle_bits -create_SrcB_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 43); -} - -static __inline tile_bundle_bits -create_SrcB_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 12); -} - -static __inline tile_bundle_bits -create_SrcB_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 43); -} - -static __inline tile_bundle_bits -create_Src_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3) << 0); -} - -static __inline tile_bundle_bits -create_UnOpcodeExtension_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 12); -} - -static __inline tile_bundle_bits -create_UnOpcodeExtension_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 43); -} - -static __inline tile_bundle_bits -create_UnOpcodeExtension_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 12); -} - -static __inline tile_bundle_bits -create_UnOpcodeExtension_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 43); -} - -static __inline tile_bundle_bits -create_UnShOpcodeExtension_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3ff) << 17); -} - -static __inline tile_bundle_bits -create_UnShOpcodeExtension_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3ff)) << 48); -} - -static __inline tile_bundle_bits -create_UnShOpcodeExtension_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x7) << 17); -} - -static __inline tile_bundle_bits -create_UnShOpcodeExtension_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x7)) << 48); -} - - - -typedef enum -{ - TILE_PIPELINE_X0, - TILE_PIPELINE_X1, - TILE_PIPELINE_Y0, - TILE_PIPELINE_Y1, - TILE_PIPELINE_Y2, -} tile_pipeline; - -#define tile_is_x_pipeline(p) ((int)(p) <= (int)TILE_PIPELINE_X1) - -typedef enum -{ - TILE_OP_TYPE_REGISTER, - TILE_OP_TYPE_IMMEDIATE, - TILE_OP_TYPE_ADDRESS, - TILE_OP_TYPE_SPR -} tile_operand_type; - -/* This is the bit that determines if a bundle is in the Y encoding. */ -#define TILE_BUNDLE_Y_ENCODING_MASK ((tile_bundle_bits)1 << 63) - -enum -{ - /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */ - TILE_MAX_INSTRUCTIONS_PER_BUNDLE = 3, - - /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */ - TILE_NUM_PIPELINE_ENCODINGS = 5, - - /* Log base 2 of TILE_BUNDLE_SIZE_IN_BYTES. */ - TILE_LOG2_BUNDLE_SIZE_IN_BYTES = 3, - - /* Instructions take this many bytes. */ - TILE_BUNDLE_SIZE_IN_BYTES = 1 << TILE_LOG2_BUNDLE_SIZE_IN_BYTES, - - /* Log base 2 of TILE_BUNDLE_ALIGNMENT_IN_BYTES. */ - TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3, - - /* Bundles should be aligned modulo this number of bytes. */ - TILE_BUNDLE_ALIGNMENT_IN_BYTES = - (1 << TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES), - - /* Log base 2 of TILE_SN_INSTRUCTION_SIZE_IN_BYTES. */ - TILE_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES = 1, - - /* Static network instructions take this many bytes. */ - TILE_SN_INSTRUCTION_SIZE_IN_BYTES = - (1 << TILE_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES), - - /* Number of registers (some are magic, such as network I/O). */ - TILE_NUM_REGISTERS = 64, - - /* Number of static network registers. */ - TILE_NUM_SN_REGISTERS = 4 -}; - - -struct tile_operand -{ - /* Is this operand a register, immediate or address? */ - tile_operand_type type; - - /* The default relocation type for this operand. */ - signed int default_reloc : 16; - - /* How many bits is this value? (used for range checking) */ - unsigned int num_bits : 5; - - /* Is the value signed? (used for range checking) */ - unsigned int is_signed : 1; - - /* Is this operand a source register? */ - unsigned int is_src_reg : 1; - - /* Is this operand written? (i.e. is it a destination register) */ - unsigned int is_dest_reg : 1; - - /* Is this operand PC-relative? */ - unsigned int is_pc_relative : 1; - - /* By how many bits do we right shift the value before inserting? */ - unsigned int rightshift : 2; - - /* Return the bits for this operand to be ORed into an existing bundle. */ - tile_bundle_bits (*insert) (int op); - - /* Extract this operand and return it. */ - unsigned int (*extract) (tile_bundle_bits bundle); -}; - - -extern const struct tile_operand tile_operands[]; - -/* One finite-state machine per pipe for rapid instruction decoding. */ -extern const unsigned short * const -tile_bundle_decoder_fsms[TILE_NUM_PIPELINE_ENCODINGS]; - - -struct tile_opcode -{ - /* The opcode mnemonic, e.g. "add" */ - const char *name; - - /* The enum value for this mnemonic. */ - tile_mnemonic mnemonic; - - /* A bit mask of which of the five pipes this instruction - is compatible with: - X0 0x01 - X1 0x02 - Y0 0x04 - Y1 0x08 - Y2 0x10 */ - unsigned char pipes; - - /* How many operands are there? */ - unsigned char num_operands; - - /* Which register does this write implicitly, or TREG_ZERO if none? */ - unsigned char implicitly_written_register; - - /* Can this be bundled with other instructions (almost always true). */ - unsigned char can_bundle; - - /* The description of the operands. Each of these is an - * index into the tile_operands[] table. */ - unsigned char operands[TILE_NUM_PIPELINE_ENCODINGS][TILE_MAX_OPERANDS]; - -}; - -extern const struct tile_opcode tile_opcodes[]; - - -/* Used for non-textual disassembly into structs. */ -struct tile_decoded_instruction -{ - const struct tile_opcode *opcode; - const struct tile_operand *operands[TILE_MAX_OPERANDS]; - int operand_values[TILE_MAX_OPERANDS]; -}; - - -/* Disassemble a bundle into a struct for machine processing. */ -extern int parse_insn_tile(tile_bundle_bits bits, - unsigned int pc, - struct tile_decoded_instruction - decoded[TILE_MAX_INSTRUCTIONS_PER_BUNDLE]); - - - -#endif /* opcode_tile_h */ diff --git a/arch/tile/include/asm/opcode-tile_64.h b/arch/tile/include/asm/opcode-tile_64.h deleted file mode 100644 index eda60ecbae3..00000000000 --- a/arch/tile/include/asm/opcode-tile_64.h +++ /dev/null @@ -1,1506 +0,0 @@ -/* tile.h -- Header file for TILE opcode table - Copyright (C) 2005 Free Software Foundation, Inc. - Contributed by Tilera Corp. */ - -#ifndef opcode_tile_h -#define opcode_tile_h - -typedef unsigned long long tile_bundle_bits; - - -enum -{ - TILE_MAX_OPERANDS = 5 /* mm */ -}; - -typedef enum -{ - TILE_OPC_BPT, - TILE_OPC_INFO, - TILE_OPC_INFOL, - TILE_OPC_J, - TILE_OPC_JAL, - TILE_OPC_MOVE, - TILE_OPC_MOVE_SN, - TILE_OPC_MOVEI, - TILE_OPC_MOVEI_SN, - TILE_OPC_MOVELI, - TILE_OPC_MOVELI_SN, - TILE_OPC_MOVELIS, - TILE_OPC_PREFETCH, - TILE_OPC_RAISE, - TILE_OPC_ADD, - TILE_OPC_ADD_SN, - TILE_OPC_ADDB, - TILE_OPC_ADDB_SN, - TILE_OPC_ADDBS_U, - TILE_OPC_ADDBS_U_SN, - TILE_OPC_ADDH, - TILE_OPC_ADDH_SN, - TILE_OPC_ADDHS, - TILE_OPC_ADDHS_SN, - TILE_OPC_ADDI, - TILE_OPC_ADDI_SN, - TILE_OPC_ADDIB, - TILE_OPC_ADDIB_SN, - TILE_OPC_ADDIH, - TILE_OPC_ADDIH_SN, - TILE_OPC_ADDLI, - TILE_OPC_ADDLI_SN, - TILE_OPC_ADDLIS, - TILE_OPC_ADDS, - TILE_OPC_ADDS_SN, - TILE_OPC_ADIFFB_U, - TILE_OPC_ADIFFB_U_SN, - TILE_OPC_ADIFFH, - TILE_OPC_ADIFFH_SN, - TILE_OPC_AND, - TILE_OPC_AND_SN, - TILE_OPC_ANDI, - TILE_OPC_ANDI_SN, - TILE_OPC_AULI, - TILE_OPC_AVGB_U, - TILE_OPC_AVGB_U_SN, - TILE_OPC_AVGH, - TILE_OPC_AVGH_SN, - TILE_OPC_BBNS, - TILE_OPC_BBNS_SN, - TILE_OPC_BBNST, - TILE_OPC_BBNST_SN, - TILE_OPC_BBS, - TILE_OPC_BBS_SN, - TILE_OPC_BBST, - TILE_OPC_BBST_SN, - TILE_OPC_BGEZ, - TILE_OPC_BGEZ_SN, - TILE_OPC_BGEZT, - TILE_OPC_BGEZT_SN, - TILE_OPC_BGZ, - TILE_OPC_BGZ_SN, - TILE_OPC_BGZT, - TILE_OPC_BGZT_SN, - TILE_OPC_BITX, - TILE_OPC_BITX_SN, - TILE_OPC_BLEZ, - TILE_OPC_BLEZ_SN, - TILE_OPC_BLEZT, - TILE_OPC_BLEZT_SN, - TILE_OPC_BLZ, - TILE_OPC_BLZ_SN, - TILE_OPC_BLZT, - TILE_OPC_BLZT_SN, - TILE_OPC_BNZ, - TILE_OPC_BNZ_SN, - TILE_OPC_BNZT, - TILE_OPC_BNZT_SN, - TILE_OPC_BYTEX, - TILE_OPC_BYTEX_SN, - TILE_OPC_BZ, - TILE_OPC_BZ_SN, - TILE_OPC_BZT, - TILE_OPC_BZT_SN, - TILE_OPC_CLZ, - TILE_OPC_CLZ_SN, - TILE_OPC_CRC32_32, - TILE_OPC_CRC32_32_SN, - TILE_OPC_CRC32_8, - TILE_OPC_CRC32_8_SN, - TILE_OPC_CTZ, - TILE_OPC_CTZ_SN, - TILE_OPC_DRAIN, - TILE_OPC_DTLBPR, - TILE_OPC_DWORD_ALIGN, - TILE_OPC_DWORD_ALIGN_SN, - TILE_OPC_FINV, - TILE_OPC_FLUSH, - TILE_OPC_FNOP, - TILE_OPC_ICOH, - TILE_OPC_ILL, - TILE_OPC_INTHB, - TILE_OPC_INTHB_SN, - TILE_OPC_INTHH, - TILE_OPC_INTHH_SN, - TILE_OPC_INTLB, - TILE_OPC_INTLB_SN, - TILE_OPC_INTLH, - TILE_OPC_INTLH_SN, - TILE_OPC_INV, - TILE_OPC_IRET, - TILE_OPC_JALB, - TILE_OPC_JALF, - TILE_OPC_JALR, - TILE_OPC_JALRP, - TILE_OPC_JB, - TILE_OPC_JF, - TILE_OPC_JR, - TILE_OPC_JRP, - TILE_OPC_LB, - TILE_OPC_LB_SN, - TILE_OPC_LB_U, - TILE_OPC_LB_U_SN, - TILE_OPC_LBADD, - TILE_OPC_LBADD_SN, - TILE_OPC_LBADD_U, - TILE_OPC_LBADD_U_SN, - TILE_OPC_LH, - TILE_OPC_LH_SN, - TILE_OPC_LH_U, - TILE_OPC_LH_U_SN, - TILE_OPC_LHADD, - TILE_OPC_LHADD_SN, - TILE_OPC_LHADD_U, - TILE_OPC_LHADD_U_SN, - TILE_OPC_LNK, - TILE_OPC_LNK_SN, - TILE_OPC_LW, - TILE_OPC_LW_SN, - TILE_OPC_LW_NA, - TILE_OPC_LW_NA_SN, - TILE_OPC_LWADD, - TILE_OPC_LWADD_SN, - TILE_OPC_LWADD_NA, - TILE_OPC_LWADD_NA_SN, - TILE_OPC_MAXB_U, - TILE_OPC_MAXB_U_SN, - TILE_OPC_MAXH, - TILE_OPC_MAXH_SN, - TILE_OPC_MAXIB_U, - TILE_OPC_MAXIB_U_SN, - TILE_OPC_MAXIH, - TILE_OPC_MAXIH_SN, - TILE_OPC_MF, - TILE_OPC_MFSPR, - TILE_OPC_MINB_U, - TILE_OPC_MINB_U_SN, - TILE_OPC_MINH, - TILE_OPC_MINH_SN, - TILE_OPC_MINIB_U, - TILE_OPC_MINIB_U_SN, - TILE_OPC_MINIH, - TILE_OPC_MINIH_SN, - TILE_OPC_MM, - TILE_OPC_MNZ, - TILE_OPC_MNZ_SN, - TILE_OPC_MNZB, - TILE_OPC_MNZB_SN, - TILE_OPC_MNZH, - TILE_OPC_MNZH_SN, - TILE_OPC_MTSPR, - TILE_OPC_MULHH_SS, - TILE_OPC_MULHH_SS_SN, - TILE_OPC_MULHH_SU, - TILE_OPC_MULHH_SU_SN, - TILE_OPC_MULHH_UU, - TILE_OPC_MULHH_UU_SN, - TILE_OPC_MULHHA_SS, - TILE_OPC_MULHHA_SS_SN, - TILE_OPC_MULHHA_SU, - TILE_OPC_MULHHA_SU_SN, - TILE_OPC_MULHHA_UU, - TILE_OPC_MULHHA_UU_SN, - TILE_OPC_MULHHSA_UU, - TILE_OPC_MULHHSA_UU_SN, - TILE_OPC_MULHL_SS, - TILE_OPC_MULHL_SS_SN, - TILE_OPC_MULHL_SU, - TILE_OPC_MULHL_SU_SN, - TILE_OPC_MULHL_US, - TILE_OPC_MULHL_US_SN, - TILE_OPC_MULHL_UU, - TILE_OPC_MULHL_UU_SN, - TILE_OPC_MULHLA_SS, - TILE_OPC_MULHLA_SS_SN, - TILE_OPC_MULHLA_SU, - TILE_OPC_MULHLA_SU_SN, - TILE_OPC_MULHLA_US, - TILE_OPC_MULHLA_US_SN, - TILE_OPC_MULHLA_UU, - TILE_OPC_MULHLA_UU_SN, - TILE_OPC_MULHLSA_UU, - TILE_OPC_MULHLSA_UU_SN, - TILE_OPC_MULLL_SS, - TILE_OPC_MULLL_SS_SN, - TILE_OPC_MULLL_SU, - TILE_OPC_MULLL_SU_SN, - TILE_OPC_MULLL_UU, - TILE_OPC_MULLL_UU_SN, - TILE_OPC_MULLLA_SS, - TILE_OPC_MULLLA_SS_SN, - TILE_OPC_MULLLA_SU, - TILE_OPC_MULLLA_SU_SN, - TILE_OPC_MULLLA_UU, - TILE_OPC_MULLLA_UU_SN, - TILE_OPC_MULLLSA_UU, - TILE_OPC_MULLLSA_UU_SN, - TILE_OPC_MVNZ, - TILE_OPC_MVNZ_SN, - TILE_OPC_MVZ, - TILE_OPC_MVZ_SN, - TILE_OPC_MZ, - TILE_OPC_MZ_SN, - TILE_OPC_MZB, - TILE_OPC_MZB_SN, - TILE_OPC_MZH, - TILE_OPC_MZH_SN, - TILE_OPC_NAP, - TILE_OPC_NOP, - TILE_OPC_NOR, - TILE_OPC_NOR_SN, - TILE_OPC_OR, - TILE_OPC_OR_SN, - TILE_OPC_ORI, - TILE_OPC_ORI_SN, - TILE_OPC_PACKBS_U, - TILE_OPC_PACKBS_U_SN, - TILE_OPC_PACKHB, - TILE_OPC_PACKHB_SN, - TILE_OPC_PACKHS, - TILE_OPC_PACKHS_SN, - TILE_OPC_PACKLB, - TILE_OPC_PACKLB_SN, - TILE_OPC_PCNT, - TILE_OPC_PCNT_SN, - TILE_OPC_RL, - TILE_OPC_RL_SN, - TILE_OPC_RLI, - TILE_OPC_RLI_SN, - TILE_OPC_S1A, - TILE_OPC_S1A_SN, - TILE_OPC_S2A, - TILE_OPC_S2A_SN, - TILE_OPC_S3A, - TILE_OPC_S3A_SN, - TILE_OPC_SADAB_U, - TILE_OPC_SADAB_U_SN, - TILE_OPC_SADAH, - TILE_OPC_SADAH_SN, - TILE_OPC_SADAH_U, - TILE_OPC_SADAH_U_SN, - TILE_OPC_SADB_U, - TILE_OPC_SADB_U_SN, - TILE_OPC_SADH, - TILE_OPC_SADH_SN, - TILE_OPC_SADH_U, - TILE_OPC_SADH_U_SN, - TILE_OPC_SB, - TILE_OPC_SBADD, - TILE_OPC_SEQ, - TILE_OPC_SEQ_SN, - TILE_OPC_SEQB, - TILE_OPC_SEQB_SN, - TILE_OPC_SEQH, - TILE_OPC_SEQH_SN, - TILE_OPC_SEQI, - TILE_OPC_SEQI_SN, - TILE_OPC_SEQIB, - TILE_OPC_SEQIB_SN, - TILE_OPC_SEQIH, - TILE_OPC_SEQIH_SN, - TILE_OPC_SH, - TILE_OPC_SHADD, - TILE_OPC_SHL, - TILE_OPC_SHL_SN, - TILE_OPC_SHLB, - TILE_OPC_SHLB_SN, - TILE_OPC_SHLH, - TILE_OPC_SHLH_SN, - TILE_OPC_SHLI, - TILE_OPC_SHLI_SN, - TILE_OPC_SHLIB, - TILE_OPC_SHLIB_SN, - TILE_OPC_SHLIH, - TILE_OPC_SHLIH_SN, - TILE_OPC_SHR, - TILE_OPC_SHR_SN, - TILE_OPC_SHRB, - TILE_OPC_SHRB_SN, - TILE_OPC_SHRH, - TILE_OPC_SHRH_SN, - TILE_OPC_SHRI, - TILE_OPC_SHRI_SN, - TILE_OPC_SHRIB, - TILE_OPC_SHRIB_SN, - TILE_OPC_SHRIH, - TILE_OPC_SHRIH_SN, - TILE_OPC_SLT, - TILE_OPC_SLT_SN, - TILE_OPC_SLT_U, - TILE_OPC_SLT_U_SN, - TILE_OPC_SLTB, - TILE_OPC_SLTB_SN, - TILE_OPC_SLTB_U, - TILE_OPC_SLTB_U_SN, - TILE_OPC_SLTE, - TILE_OPC_SLTE_SN, - TILE_OPC_SLTE_U, - TILE_OPC_SLTE_U_SN, - TILE_OPC_SLTEB, - TILE_OPC_SLTEB_SN, - TILE_OPC_SLTEB_U, - TILE_OPC_SLTEB_U_SN, - TILE_OPC_SLTEH, - TILE_OPC_SLTEH_SN, - TILE_OPC_SLTEH_U, - TILE_OPC_SLTEH_U_SN, - TILE_OPC_SLTH, - TILE_OPC_SLTH_SN, - TILE_OPC_SLTH_U, - TILE_OPC_SLTH_U_SN, - TILE_OPC_SLTI, - TILE_OPC_SLTI_SN, - TILE_OPC_SLTI_U, - TILE_OPC_SLTI_U_SN, - TILE_OPC_SLTIB, - TILE_OPC_SLTIB_SN, - TILE_OPC_SLTIB_U, - TILE_OPC_SLTIB_U_SN, - TILE_OPC_SLTIH, - TILE_OPC_SLTIH_SN, - TILE_OPC_SLTIH_U, - TILE_OPC_SLTIH_U_SN, - TILE_OPC_SNE, - TILE_OPC_SNE_SN, - TILE_OPC_SNEB, - TILE_OPC_SNEB_SN, - TILE_OPC_SNEH, - TILE_OPC_SNEH_SN, - TILE_OPC_SRA, - TILE_OPC_SRA_SN, - TILE_OPC_SRAB, - TILE_OPC_SRAB_SN, - TILE_OPC_SRAH, - TILE_OPC_SRAH_SN, - TILE_OPC_SRAI, - TILE_OPC_SRAI_SN, - TILE_OPC_SRAIB, - TILE_OPC_SRAIB_SN, - TILE_OPC_SRAIH, - TILE_OPC_SRAIH_SN, - TILE_OPC_SUB, - TILE_OPC_SUB_SN, - TILE_OPC_SUBB, - TILE_OPC_SUBB_SN, - TILE_OPC_SUBBS_U, - TILE_OPC_SUBBS_U_SN, - TILE_OPC_SUBH, - TILE_OPC_SUBH_SN, - TILE_OPC_SUBHS, - TILE_OPC_SUBHS_SN, - TILE_OPC_SUBS, - TILE_OPC_SUBS_SN, - TILE_OPC_SW, - TILE_OPC_SWADD, - TILE_OPC_SWINT0, - TILE_OPC_SWINT1, - TILE_OPC_SWINT2, - TILE_OPC_SWINT3, - TILE_OPC_TBLIDXB0, - TILE_OPC_TBLIDXB0_SN, - TILE_OPC_TBLIDXB1, - TILE_OPC_TBLIDXB1_SN, - TILE_OPC_TBLIDXB2, - TILE_OPC_TBLIDXB2_SN, - TILE_OPC_TBLIDXB3, - TILE_OPC_TBLIDXB3_SN, - TILE_OPC_TNS, - TILE_OPC_TNS_SN, - TILE_OPC_WH64, - TILE_OPC_XOR, - TILE_OPC_XOR_SN, - TILE_OPC_XORI, - TILE_OPC_XORI_SN, - TILE_OPC_NONE -} tile_mnemonic; - -/* 64-bit pattern for a { bpt ; nop } bundle. */ -#define TILE_BPT_BUNDLE 0x400b3cae70166000ULL - - -#define TILE_ELF_MACHINE_CODE EM_TILEPRO - -#define TILE_ELF_NAME "elf32-tilepro" - - -static __inline unsigned int -get_BrOff_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0x3ff); -} - -static __inline unsigned int -get_BrOff_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x00007fff) | - (((unsigned int)(n >> 20)) & 0x00018000); -} - -static __inline unsigned int -get_BrType_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 31)) & 0xf); -} - -static __inline unsigned int -get_Dest_Imm8_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 31)) & 0x0000003f) | - (((unsigned int)(n >> 43)) & 0x000000c0); -} - -static __inline unsigned int -get_Dest_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 2)) & 0x3); -} - -static __inline unsigned int -get_Dest_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0x3f); -} - -static __inline unsigned int -get_Dest_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 31)) & 0x3f); -} - -static __inline unsigned int -get_Dest_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0x3f); -} - -static __inline unsigned int -get_Dest_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 31)) & 0x3f); -} - -static __inline unsigned int -get_Imm16_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0xffff); -} - -static __inline unsigned int -get_Imm16_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0xffff); -} - -static __inline unsigned int -get_Imm8_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0xff); -} - -static __inline unsigned int -get_Imm8_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0xff); -} - -static __inline unsigned int -get_Imm8_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0xff); -} - -static __inline unsigned int -get_Imm8_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0xff); -} - -static __inline unsigned int -get_Imm8_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0xff); -} - -static __inline unsigned int -get_ImmOpcodeExtension_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 20)) & 0x7f); -} - -static __inline unsigned int -get_ImmOpcodeExtension_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 51)) & 0x7f); -} - -static __inline unsigned int -get_ImmRROpcodeExtension_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 8)) & 0x3); -} - -static __inline unsigned int -get_JOffLong_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x00007fff) | - (((unsigned int)(n >> 20)) & 0x00018000) | - (((unsigned int)(n >> 14)) & 0x001e0000) | - (((unsigned int)(n >> 16)) & 0x07e00000) | - (((unsigned int)(n >> 31)) & 0x18000000); -} - -static __inline unsigned int -get_JOff_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x00007fff) | - (((unsigned int)(n >> 20)) & 0x00018000) | - (((unsigned int)(n >> 14)) & 0x001e0000) | - (((unsigned int)(n >> 16)) & 0x07e00000) | - (((unsigned int)(n >> 31)) & 0x08000000); -} - -static __inline unsigned int -get_MF_Imm15_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 37)) & 0x00003fff) | - (((unsigned int)(n >> 44)) & 0x00004000); -} - -static __inline unsigned int -get_MMEnd_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 18)) & 0x1f); -} - -static __inline unsigned int -get_MMEnd_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 49)) & 0x1f); -} - -static __inline unsigned int -get_MMStart_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 23)) & 0x1f); -} - -static __inline unsigned int -get_MMStart_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 54)) & 0x1f); -} - -static __inline unsigned int -get_MT_Imm15_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 31)) & 0x0000003f) | - (((unsigned int)(n >> 37)) & 0x00003fc0) | - (((unsigned int)(n >> 44)) & 0x00004000); -} - -static __inline unsigned int -get_Mode(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 63)) & 0x1); -} - -static __inline unsigned int -get_NoRegOpcodeExtension_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0xf); -} - -static __inline unsigned int -get_Opcode_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 10)) & 0x3f); -} - -static __inline unsigned int -get_Opcode_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 28)) & 0x7); -} - -static __inline unsigned int -get_Opcode_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 59)) & 0xf); -} - -static __inline unsigned int -get_Opcode_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 27)) & 0xf); -} - -static __inline unsigned int -get_Opcode_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 59)) & 0xf); -} - -static __inline unsigned int -get_Opcode_Y2(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 56)) & 0x7); -} - -static __inline unsigned int -get_RROpcodeExtension_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 4)) & 0xf); -} - -static __inline unsigned int -get_RRROpcodeExtension_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 18)) & 0x1ff); -} - -static __inline unsigned int -get_RRROpcodeExtension_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 49)) & 0x1ff); -} - -static __inline unsigned int -get_RRROpcodeExtension_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 18)) & 0x3); -} - -static __inline unsigned int -get_RRROpcodeExtension_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 49)) & 0x3); -} - -static __inline unsigned int -get_RouteOpcodeExtension_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0x3ff); -} - -static __inline unsigned int -get_S_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 27)) & 0x1); -} - -static __inline unsigned int -get_S_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 58)) & 0x1); -} - -static __inline unsigned int -get_ShAmt_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x1f); -} - -static __inline unsigned int -get_ShAmt_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x1f); -} - -static __inline unsigned int -get_ShAmt_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x1f); -} - -static __inline unsigned int -get_ShAmt_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x1f); -} - -static __inline unsigned int -get_SrcA_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 6)) & 0x3f); -} - -static __inline unsigned int -get_SrcA_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 37)) & 0x3f); -} - -static __inline unsigned int -get_SrcA_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 6)) & 0x3f); -} - -static __inline unsigned int -get_SrcA_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 37)) & 0x3f); -} - -static __inline unsigned int -get_SrcA_Y2(tile_bundle_bits n) -{ - return (((n >> 26)) & 0x00000001) | - (((unsigned int)(n >> 50)) & 0x0000003e); -} - -static __inline unsigned int -get_SrcBDest_Y2(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 20)) & 0x3f); -} - -static __inline unsigned int -get_SrcB_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x3f); -} - -static __inline unsigned int -get_SrcB_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x3f); -} - -static __inline unsigned int -get_SrcB_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x3f); -} - -static __inline unsigned int -get_SrcB_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x3f); -} - -static __inline unsigned int -get_Src_SN(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 0)) & 0x3); -} - -static __inline unsigned int -get_UnOpcodeExtension_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x1f); -} - -static __inline unsigned int -get_UnOpcodeExtension_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x1f); -} - -static __inline unsigned int -get_UnOpcodeExtension_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 12)) & 0x1f); -} - -static __inline unsigned int -get_UnOpcodeExtension_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 43)) & 0x1f); -} - -static __inline unsigned int -get_UnShOpcodeExtension_X0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 17)) & 0x3ff); -} - -static __inline unsigned int -get_UnShOpcodeExtension_X1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 48)) & 0x3ff); -} - -static __inline unsigned int -get_UnShOpcodeExtension_Y0(tile_bundle_bits num) -{ - const unsigned int n = (unsigned int)num; - return (((n >> 17)) & 0x7); -} - -static __inline unsigned int -get_UnShOpcodeExtension_Y1(tile_bundle_bits n) -{ - return (((unsigned int)(n >> 48)) & 0x7); -} - - -static __inline int -sign_extend(int n, int num_bits) -{ - int shift = (int)(sizeof(int) * 8 - num_bits); - return (n << shift) >> shift; -} - - - -static __inline tile_bundle_bits -create_BrOff_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3ff) << 0); -} - -static __inline tile_bundle_bits -create_BrOff_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x00007fff)) << 43) | - (((tile_bundle_bits)(n & 0x00018000)) << 20); -} - -static __inline tile_bundle_bits -create_BrType_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xf)) << 31); -} - -static __inline tile_bundle_bits -create_Dest_Imm8_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x0000003f)) << 31) | - (((tile_bundle_bits)(n & 0x000000c0)) << 43); -} - -static __inline tile_bundle_bits -create_Dest_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3) << 2); -} - -static __inline tile_bundle_bits -create_Dest_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 0); -} - -static __inline tile_bundle_bits -create_Dest_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 31); -} - -static __inline tile_bundle_bits -create_Dest_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 0); -} - -static __inline tile_bundle_bits -create_Dest_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 31); -} - -static __inline tile_bundle_bits -create_Imm16_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xffff) << 12); -} - -static __inline tile_bundle_bits -create_Imm16_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xffff)) << 43); -} - -static __inline tile_bundle_bits -create_Imm8_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xff) << 0); -} - -static __inline tile_bundle_bits -create_Imm8_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xff) << 12); -} - -static __inline tile_bundle_bits -create_Imm8_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xff)) << 43); -} - -static __inline tile_bundle_bits -create_Imm8_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xff) << 12); -} - -static __inline tile_bundle_bits -create_Imm8_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xff)) << 43); -} - -static __inline tile_bundle_bits -create_ImmOpcodeExtension_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x7f) << 20); -} - -static __inline tile_bundle_bits -create_ImmOpcodeExtension_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x7f)) << 51); -} - -static __inline tile_bundle_bits -create_ImmRROpcodeExtension_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3) << 8); -} - -static __inline tile_bundle_bits -create_JOffLong_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x00007fff)) << 43) | - (((tile_bundle_bits)(n & 0x00018000)) << 20) | - (((tile_bundle_bits)(n & 0x001e0000)) << 14) | - (((tile_bundle_bits)(n & 0x07e00000)) << 16) | - (((tile_bundle_bits)(n & 0x18000000)) << 31); -} - -static __inline tile_bundle_bits -create_JOff_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x00007fff)) << 43) | - (((tile_bundle_bits)(n & 0x00018000)) << 20) | - (((tile_bundle_bits)(n & 0x001e0000)) << 14) | - (((tile_bundle_bits)(n & 0x07e00000)) << 16) | - (((tile_bundle_bits)(n & 0x08000000)) << 31); -} - -static __inline tile_bundle_bits -create_MF_Imm15_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x00003fff)) << 37) | - (((tile_bundle_bits)(n & 0x00004000)) << 44); -} - -static __inline tile_bundle_bits -create_MMEnd_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 18); -} - -static __inline tile_bundle_bits -create_MMEnd_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 49); -} - -static __inline tile_bundle_bits -create_MMStart_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 23); -} - -static __inline tile_bundle_bits -create_MMStart_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 54); -} - -static __inline tile_bundle_bits -create_MT_Imm15_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x0000003f)) << 31) | - (((tile_bundle_bits)(n & 0x00003fc0)) << 37) | - (((tile_bundle_bits)(n & 0x00004000)) << 44); -} - -static __inline tile_bundle_bits -create_Mode(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1)) << 63); -} - -static __inline tile_bundle_bits -create_NoRegOpcodeExtension_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xf) << 0); -} - -static __inline tile_bundle_bits -create_Opcode_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 10); -} - -static __inline tile_bundle_bits -create_Opcode_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x7) << 28); -} - -static __inline tile_bundle_bits -create_Opcode_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xf)) << 59); -} - -static __inline tile_bundle_bits -create_Opcode_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xf) << 27); -} - -static __inline tile_bundle_bits -create_Opcode_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0xf)) << 59); -} - -static __inline tile_bundle_bits -create_Opcode_Y2(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x7)) << 56); -} - -static __inline tile_bundle_bits -create_RROpcodeExtension_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0xf) << 4); -} - -static __inline tile_bundle_bits -create_RRROpcodeExtension_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1ff) << 18); -} - -static __inline tile_bundle_bits -create_RRROpcodeExtension_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1ff)) << 49); -} - -static __inline tile_bundle_bits -create_RRROpcodeExtension_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3) << 18); -} - -static __inline tile_bundle_bits -create_RRROpcodeExtension_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3)) << 49); -} - -static __inline tile_bundle_bits -create_RouteOpcodeExtension_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3ff) << 0); -} - -static __inline tile_bundle_bits -create_S_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1) << 27); -} - -static __inline tile_bundle_bits -create_S_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1)) << 58); -} - -static __inline tile_bundle_bits -create_ShAmt_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 12); -} - -static __inline tile_bundle_bits -create_ShAmt_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 43); -} - -static __inline tile_bundle_bits -create_ShAmt_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 12); -} - -static __inline tile_bundle_bits -create_ShAmt_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 43); -} - -static __inline tile_bundle_bits -create_SrcA_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 6); -} - -static __inline tile_bundle_bits -create_SrcA_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 37); -} - -static __inline tile_bundle_bits -create_SrcA_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 6); -} - -static __inline tile_bundle_bits -create_SrcA_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 37); -} - -static __inline tile_bundle_bits -create_SrcA_Y2(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x00000001) << 26) | - (((tile_bundle_bits)(n & 0x0000003e)) << 50); -} - -static __inline tile_bundle_bits -create_SrcBDest_Y2(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 20); -} - -static __inline tile_bundle_bits -create_SrcB_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 12); -} - -static __inline tile_bundle_bits -create_SrcB_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 43); -} - -static __inline tile_bundle_bits -create_SrcB_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3f) << 12); -} - -static __inline tile_bundle_bits -create_SrcB_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3f)) << 43); -} - -static __inline tile_bundle_bits -create_Src_SN(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3) << 0); -} - -static __inline tile_bundle_bits -create_UnOpcodeExtension_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 12); -} - -static __inline tile_bundle_bits -create_UnOpcodeExtension_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 43); -} - -static __inline tile_bundle_bits -create_UnOpcodeExtension_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x1f) << 12); -} - -static __inline tile_bundle_bits -create_UnOpcodeExtension_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x1f)) << 43); -} - -static __inline tile_bundle_bits -create_UnShOpcodeExtension_X0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x3ff) << 17); -} - -static __inline tile_bundle_bits -create_UnShOpcodeExtension_X1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x3ff)) << 48); -} - -static __inline tile_bundle_bits -create_UnShOpcodeExtension_Y0(int num) -{ - const unsigned int n = (unsigned int)num; - return ((n & 0x7) << 17); -} - -static __inline tile_bundle_bits -create_UnShOpcodeExtension_Y1(int num) -{ - const unsigned int n = (unsigned int)num; - return (((tile_bundle_bits)(n & 0x7)) << 48); -} - - - -typedef enum -{ - TILE_PIPELINE_X0, - TILE_PIPELINE_X1, - TILE_PIPELINE_Y0, - TILE_PIPELINE_Y1, - TILE_PIPELINE_Y2, -} tile_pipeline; - -#define tile_is_x_pipeline(p) ((int)(p) <= (int)TILE_PIPELINE_X1) - -typedef enum -{ - TILE_OP_TYPE_REGISTER, - TILE_OP_TYPE_IMMEDIATE, - TILE_OP_TYPE_ADDRESS, - TILE_OP_TYPE_SPR -} tile_operand_type; - -/* This is the bit that determines if a bundle is in the Y encoding. */ -#define TILE_BUNDLE_Y_ENCODING_MASK ((tile_bundle_bits)1 << 63) - -enum -{ - /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */ - TILE_MAX_INSTRUCTIONS_PER_BUNDLE = 3, - - /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */ - TILE_NUM_PIPELINE_ENCODINGS = 5, - - /* Log base 2 of TILE_BUNDLE_SIZE_IN_BYTES. */ - TILE_LOG2_BUNDLE_SIZE_IN_BYTES = 3, - - /* Instructions take this many bytes. */ - TILE_BUNDLE_SIZE_IN_BYTES = 1 << TILE_LOG2_BUNDLE_SIZE_IN_BYTES, - - /* Log base 2 of TILE_BUNDLE_ALIGNMENT_IN_BYTES. */ - TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3, - - /* Bundles should be aligned modulo this number of bytes. */ - TILE_BUNDLE_ALIGNMENT_IN_BYTES = - (1 << TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES), - - /* Log base 2 of TILE_SN_INSTRUCTION_SIZE_IN_BYTES. */ - TILE_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES = 1, - - /* Static network instructions take this many bytes. */ - TILE_SN_INSTRUCTION_SIZE_IN_BYTES = - (1 << TILE_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES), - - /* Number of registers (some are magic, such as network I/O). */ - TILE_NUM_REGISTERS = 64, - - /* Number of static network registers. */ - TILE_NUM_SN_REGISTERS = 4 -}; - - -struct tile_operand -{ - /* Is this operand a register, immediate or address? */ - tile_operand_type type; - - /* The default relocation type for this operand. */ - signed int default_reloc : 16; - - /* How many bits is this value? (used for range checking) */ - unsigned int num_bits : 5; - - /* Is the value signed? (used for range checking) */ - unsigned int is_signed : 1; - - /* Is this operand a source register? */ - unsigned int is_src_reg : 1; - - /* Is this operand written? (i.e. is it a destination register) */ - unsigned int is_dest_reg : 1; - - /* Is this operand PC-relative? */ - unsigned int is_pc_relative : 1; - - /* By how many bits do we right shift the value before inserting? */ - unsigned int rightshift : 2; - - /* Return the bits for this operand to be ORed into an existing bundle. */ - tile_bundle_bits (*insert) (int op); - - /* Extract this operand and return it. */ - unsigned int (*extract) (tile_bundle_bits bundle); -}; - - -extern const struct tile_operand tile_operands[]; - -/* One finite-state machine per pipe for rapid instruction decoding. */ -extern const unsigned short * const -tile_bundle_decoder_fsms[TILE_NUM_PIPELINE_ENCODINGS]; - - -struct tile_opcode -{ - /* The opcode mnemonic, e.g. "add" */ - const char *name; - - /* The enum value for this mnemonic. */ - tile_mnemonic mnemonic; - - /* A bit mask of which of the five pipes this instruction - is compatible with: - X0 0x01 - X1 0x02 - Y0 0x04 - Y1 0x08 - Y2 0x10 */ - unsigned char pipes; - - /* How many operands are there? */ - unsigned char num_operands; - - /* Which register does this write implicitly, or TREG_ZERO if none? */ - unsigned char implicitly_written_register; - - /* Can this be bundled with other instructions (almost always true). */ - unsigned char can_bundle; - - /* The description of the operands. Each of these is an - * index into the tile_operands[] table. */ - unsigned char operands[TILE_NUM_PIPELINE_ENCODINGS][TILE_MAX_OPERANDS]; - -}; - -extern const struct tile_opcode tile_opcodes[]; - - -/* Used for non-textual disassembly into structs. */ -struct tile_decoded_instruction -{ - const struct tile_opcode *opcode; - const struct tile_operand *operands[TILE_MAX_OPERANDS]; - int operand_values[TILE_MAX_OPERANDS]; -}; - - -/* Disassemble a bundle into a struct for machine processing. */ -extern int parse_insn_tile(tile_bundle_bits bits, - unsigned int pc, - struct tile_decoded_instruction - decoded[TILE_MAX_INSTRUCTIONS_PER_BUNDLE]); - - - -#endif /* opcode_tile_h */ diff --git a/arch/tile/include/asm/opcode_constants_32.h b/arch/tile/include/asm/opcode_constants_32.h deleted file mode 100644 index 227d033b180..00000000000 --- a/arch/tile/include/asm/opcode_constants_32.h +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -/* This file is machine-generated; DO NOT EDIT! */ - - -#ifndef _TILE_OPCODE_CONSTANTS_H -#define _TILE_OPCODE_CONSTANTS_H -enum -{ - ADDBS_U_SPECIAL_0_OPCODE_X0 = 98, - ADDBS_U_SPECIAL_0_OPCODE_X1 = 68, - ADDB_SPECIAL_0_OPCODE_X0 = 1, - ADDB_SPECIAL_0_OPCODE_X1 = 1, - ADDHS_SPECIAL_0_OPCODE_X0 = 99, - ADDHS_SPECIAL_0_OPCODE_X1 = 69, - ADDH_SPECIAL_0_OPCODE_X0 = 2, - ADDH_SPECIAL_0_OPCODE_X1 = 2, - ADDIB_IMM_0_OPCODE_X0 = 1, - ADDIB_IMM_0_OPCODE_X1 = 1, - ADDIH_IMM_0_OPCODE_X0 = 2, - ADDIH_IMM_0_OPCODE_X1 = 2, - ADDI_IMM_0_OPCODE_X0 = 3, - ADDI_IMM_0_OPCODE_X1 = 3, - ADDI_IMM_1_OPCODE_SN = 1, - ADDI_OPCODE_Y0 = 9, - ADDI_OPCODE_Y1 = 7, - ADDLIS_OPCODE_X0 = 1, - ADDLIS_OPCODE_X1 = 2, - ADDLI_OPCODE_X0 = 2, - ADDLI_OPCODE_X1 = 3, - ADDS_SPECIAL_0_OPCODE_X0 = 96, - ADDS_SPECIAL_0_OPCODE_X1 = 66, - ADD_SPECIAL_0_OPCODE_X0 = 3, - ADD_SPECIAL_0_OPCODE_X1 = 3, - ADD_SPECIAL_0_OPCODE_Y0 = 0, - ADD_SPECIAL_0_OPCODE_Y1 = 0, - ADIFFB_U_SPECIAL_0_OPCODE_X0 = 4, - ADIFFH_SPECIAL_0_OPCODE_X0 = 5, - ANDI_IMM_0_OPCODE_X0 = 1, - ANDI_IMM_0_OPCODE_X1 = 4, - ANDI_OPCODE_Y0 = 10, - ANDI_OPCODE_Y1 = 8, - AND_SPECIAL_0_OPCODE_X0 = 6, - AND_SPECIAL_0_OPCODE_X1 = 4, - AND_SPECIAL_2_OPCODE_Y0 = 0, - AND_SPECIAL_2_OPCODE_Y1 = 0, - AULI_OPCODE_X0 = 3, - AULI_OPCODE_X1 = 4, - AVGB_U_SPECIAL_0_OPCODE_X0 = 7, - AVGH_SPECIAL_0_OPCODE_X0 = 8, - BBNST_BRANCH_OPCODE_X1 = 15, - BBNS_BRANCH_OPCODE_X1 = 14, - BBNS_OPCODE_SN = 63, - BBST_BRANCH_OPCODE_X1 = 13, - BBS_BRANCH_OPCODE_X1 = 12, - BBS_OPCODE_SN = 62, - BGEZT_BRANCH_OPCODE_X1 = 7, - BGEZ_BRANCH_OPCODE_X1 = 6, - BGEZ_OPCODE_SN = 61, - BGZT_BRANCH_OPCODE_X1 = 5, - BGZ_BRANCH_OPCODE_X1 = 4, - BGZ_OPCODE_SN = 58, - BITX_UN_0_SHUN_0_OPCODE_X0 = 1, - BITX_UN_0_SHUN_0_OPCODE_Y0 = 1, - BLEZT_BRANCH_OPCODE_X1 = 11, - BLEZ_BRANCH_OPCODE_X1 = 10, - BLEZ_OPCODE_SN = 59, - BLZT_BRANCH_OPCODE_X1 = 9, - BLZ_BRANCH_OPCODE_X1 = 8, - BLZ_OPCODE_SN = 60, - BNZT_BRANCH_OPCODE_X1 = 3, - BNZ_BRANCH_OPCODE_X1 = 2, - BNZ_OPCODE_SN = 57, - BPT_NOREG_RR_IMM_0_OPCODE_SN = 1, - BRANCH_OPCODE_X1 = 5, - BYTEX_UN_0_SHUN_0_OPCODE_X0 = 2, - BYTEX_UN_0_SHUN_0_OPCODE_Y0 = 2, - BZT_BRANCH_OPCODE_X1 = 1, - BZ_BRANCH_OPCODE_X1 = 0, - BZ_OPCODE_SN = 56, - CLZ_UN_0_SHUN_0_OPCODE_X0 = 3, - CLZ_UN_0_SHUN_0_OPCODE_Y0 = 3, - CRC32_32_SPECIAL_0_OPCODE_X0 = 9, - CRC32_8_SPECIAL_0_OPCODE_X0 = 10, - CTZ_UN_0_SHUN_0_OPCODE_X0 = 4, - CTZ_UN_0_SHUN_0_OPCODE_Y0 = 4, - DRAIN_UN_0_SHUN_0_OPCODE_X1 = 1, - DTLBPR_UN_0_SHUN_0_OPCODE_X1 = 2, - DWORD_ALIGN_SPECIAL_0_OPCODE_X0 = 95, - FINV_UN_0_SHUN_0_OPCODE_X1 = 3, - FLUSH_UN_0_SHUN_0_OPCODE_X1 = 4, - FNOP_NOREG_RR_IMM_0_OPCODE_SN = 3, - FNOP_UN_0_SHUN_0_OPCODE_X0 = 5, - FNOP_UN_0_SHUN_0_OPCODE_X1 = 5, - FNOP_UN_0_SHUN_0_OPCODE_Y0 = 5, - FNOP_UN_0_SHUN_0_OPCODE_Y1 = 1, - HALT_NOREG_RR_IMM_0_OPCODE_SN = 0, - ICOH_UN_0_SHUN_0_OPCODE_X1 = 6, - ILL_UN_0_SHUN_0_OPCODE_X1 = 7, - ILL_UN_0_SHUN_0_OPCODE_Y1 = 2, - IMM_0_OPCODE_SN = 0, - IMM_0_OPCODE_X0 = 4, - IMM_0_OPCODE_X1 = 6, - IMM_1_OPCODE_SN = 1, - IMM_OPCODE_0_X0 = 5, - INTHB_SPECIAL_0_OPCODE_X0 = 11, - INTHB_SPECIAL_0_OPCODE_X1 = 5, - INTHH_SPECIAL_0_OPCODE_X0 = 12, - INTHH_SPECIAL_0_OPCODE_X1 = 6, - INTLB_SPECIAL_0_OPCODE_X0 = 13, - INTLB_SPECIAL_0_OPCODE_X1 = 7, - INTLH_SPECIAL_0_OPCODE_X0 = 14, - INTLH_SPECIAL_0_OPCODE_X1 = 8, - INV_UN_0_SHUN_0_OPCODE_X1 = 8, - IRET_UN_0_SHUN_0_OPCODE_X1 = 9, - JALB_OPCODE_X1 = 13, - JALF_OPCODE_X1 = 12, - JALRP_SPECIAL_0_OPCODE_X1 = 9, - JALRR_IMM_1_OPCODE_SN = 3, - JALR_RR_IMM_0_OPCODE_SN = 5, - JALR_SPECIAL_0_OPCODE_X1 = 10, - JB_OPCODE_X1 = 11, - JF_OPCODE_X1 = 10, - JRP_SPECIAL_0_OPCODE_X1 = 11, - JRR_IMM_1_OPCODE_SN = 2, - JR_RR_IMM_0_OPCODE_SN = 4, - JR_SPECIAL_0_OPCODE_X1 = 12, - LBADD_IMM_0_OPCODE_X1 = 22, - LBADD_U_IMM_0_OPCODE_X1 = 23, - LB_OPCODE_Y2 = 0, - LB_UN_0_SHUN_0_OPCODE_X1 = 10, - LB_U_OPCODE_Y2 = 1, - LB_U_UN_0_SHUN_0_OPCODE_X1 = 11, - LHADD_IMM_0_OPCODE_X1 = 24, - LHADD_U_IMM_0_OPCODE_X1 = 25, - LH_OPCODE_Y2 = 2, - LH_UN_0_SHUN_0_OPCODE_X1 = 12, - LH_U_OPCODE_Y2 = 3, - LH_U_UN_0_SHUN_0_OPCODE_X1 = 13, - LNK_SPECIAL_0_OPCODE_X1 = 13, - LWADD_IMM_0_OPCODE_X1 = 26, - LWADD_NA_IMM_0_OPCODE_X1 = 27, - LW_NA_UN_0_SHUN_0_OPCODE_X1 = 24, - LW_OPCODE_Y2 = 4, - LW_UN_0_SHUN_0_OPCODE_X1 = 14, - MAXB_U_SPECIAL_0_OPCODE_X0 = 15, - MAXB_U_SPECIAL_0_OPCODE_X1 = 14, - MAXH_SPECIAL_0_OPCODE_X0 = 16, - MAXH_SPECIAL_0_OPCODE_X1 = 15, - MAXIB_U_IMM_0_OPCODE_X0 = 4, - MAXIB_U_IMM_0_OPCODE_X1 = 5, - MAXIH_IMM_0_OPCODE_X0 = 5, - MAXIH_IMM_0_OPCODE_X1 = 6, - MFSPR_IMM_0_OPCODE_X1 = 7, - MF_UN_0_SHUN_0_OPCODE_X1 = 15, - MINB_U_SPECIAL_0_OPCODE_X0 = 17, - MINB_U_SPECIAL_0_OPCODE_X1 = 16, - MINH_SPECIAL_0_OPCODE_X0 = 18, - MINH_SPECIAL_0_OPCODE_X1 = 17, - MINIB_U_IMM_0_OPCODE_X0 = 6, - MINIB_U_IMM_0_OPCODE_X1 = 8, - MINIH_IMM_0_OPCODE_X0 = 7, - MINIH_IMM_0_OPCODE_X1 = 9, - MM_OPCODE_X0 = 6, - MM_OPCODE_X1 = 7, - MNZB_SPECIAL_0_OPCODE_X0 = 19, - MNZB_SPECIAL_0_OPCODE_X1 = 18, - MNZH_SPECIAL_0_OPCODE_X0 = 20, - MNZH_SPECIAL_0_OPCODE_X1 = 19, - MNZ_SPECIAL_0_OPCODE_X0 = 21, - MNZ_SPECIAL_0_OPCODE_X1 = 20, - MNZ_SPECIAL_1_OPCODE_Y0 = 0, - MNZ_SPECIAL_1_OPCODE_Y1 = 1, - MOVEI_IMM_1_OPCODE_SN = 0, - MOVE_RR_IMM_0_OPCODE_SN = 8, - MTSPR_IMM_0_OPCODE_X1 = 10, - MULHHA_SS_SPECIAL_0_OPCODE_X0 = 22, - MULHHA_SS_SPECIAL_7_OPCODE_Y0 = 0, - MULHHA_SU_SPECIAL_0_OPCODE_X0 = 23, - MULHHA_UU_SPECIAL_0_OPCODE_X0 = 24, - MULHHA_UU_SPECIAL_7_OPCODE_Y0 = 1, - MULHHSA_UU_SPECIAL_0_OPCODE_X0 = 25, - MULHH_SS_SPECIAL_0_OPCODE_X0 = 26, - MULHH_SS_SPECIAL_6_OPCODE_Y0 = 0, - MULHH_SU_SPECIAL_0_OPCODE_X0 = 27, - MULHH_UU_SPECIAL_0_OPCODE_X0 = 28, - MULHH_UU_SPECIAL_6_OPCODE_Y0 = 1, - MULHLA_SS_SPECIAL_0_OPCODE_X0 = 29, - MULHLA_SU_SPECIAL_0_OPCODE_X0 = 30, - MULHLA_US_SPECIAL_0_OPCODE_X0 = 31, - MULHLA_UU_SPECIAL_0_OPCODE_X0 = 32, - MULHLSA_UU_SPECIAL_0_OPCODE_X0 = 33, - MULHLSA_UU_SPECIAL_5_OPCODE_Y0 = 0, - MULHL_SS_SPECIAL_0_OPCODE_X0 = 34, - MULHL_SU_SPECIAL_0_OPCODE_X0 = 35, - MULHL_US_SPECIAL_0_OPCODE_X0 = 36, - MULHL_UU_SPECIAL_0_OPCODE_X0 = 37, - MULLLA_SS_SPECIAL_0_OPCODE_X0 = 38, - MULLLA_SS_SPECIAL_7_OPCODE_Y0 = 2, - MULLLA_SU_SPECIAL_0_OPCODE_X0 = 39, - MULLLA_UU_SPECIAL_0_OPCODE_X0 = 40, - MULLLA_UU_SPECIAL_7_OPCODE_Y0 = 3, - MULLLSA_UU_SPECIAL_0_OPCODE_X0 = 41, - MULLL_SS_SPECIAL_0_OPCODE_X0 = 42, - MULLL_SS_SPECIAL_6_OPCODE_Y0 = 2, - MULLL_SU_SPECIAL_0_OPCODE_X0 = 43, - MULLL_UU_SPECIAL_0_OPCODE_X0 = 44, - MULLL_UU_SPECIAL_6_OPCODE_Y0 = 3, - MVNZ_SPECIAL_0_OPCODE_X0 = 45, - MVNZ_SPECIAL_1_OPCODE_Y0 = 1, - MVZ_SPECIAL_0_OPCODE_X0 = 46, - MVZ_SPECIAL_1_OPCODE_Y0 = 2, - MZB_SPECIAL_0_OPCODE_X0 = 47, - MZB_SPECIAL_0_OPCODE_X1 = 21, - MZH_SPECIAL_0_OPCODE_X0 = 48, - MZH_SPECIAL_0_OPCODE_X1 = 22, - MZ_SPECIAL_0_OPCODE_X0 = 49, - MZ_SPECIAL_0_OPCODE_X1 = 23, - MZ_SPECIAL_1_OPCODE_Y0 = 3, - MZ_SPECIAL_1_OPCODE_Y1 = 2, - NAP_UN_0_SHUN_0_OPCODE_X1 = 16, - NOP_NOREG_RR_IMM_0_OPCODE_SN = 2, - NOP_UN_0_SHUN_0_OPCODE_X0 = 6, - NOP_UN_0_SHUN_0_OPCODE_X1 = 17, - NOP_UN_0_SHUN_0_OPCODE_Y0 = 6, - NOP_UN_0_SHUN_0_OPCODE_Y1 = 3, - NOREG_RR_IMM_0_OPCODE_SN = 0, - NOR_SPECIAL_0_OPCODE_X0 = 50, - NOR_SPECIAL_0_OPCODE_X1 = 24, - NOR_SPECIAL_2_OPCODE_Y0 = 1, - NOR_SPECIAL_2_OPCODE_Y1 = 1, - ORI_IMM_0_OPCODE_X0 = 8, - ORI_IMM_0_OPCODE_X1 = 11, - ORI_OPCODE_Y0 = 11, - ORI_OPCODE_Y1 = 9, - OR_SPECIAL_0_OPCODE_X0 = 51, - OR_SPECIAL_0_OPCODE_X1 = 25, - OR_SPECIAL_2_OPCODE_Y0 = 2, - OR_SPECIAL_2_OPCODE_Y1 = 2, - PACKBS_U_SPECIAL_0_OPCODE_X0 = 103, - PACKBS_U_SPECIAL_0_OPCODE_X1 = 73, - PACKHB_SPECIAL_0_OPCODE_X0 = 52, - PACKHB_SPECIAL_0_OPCODE_X1 = 26, - PACKHS_SPECIAL_0_OPCODE_X0 = 102, - PACKHS_SPECIAL_0_OPCODE_X1 = 72, - PACKLB_SPECIAL_0_OPCODE_X0 = 53, - PACKLB_SPECIAL_0_OPCODE_X1 = 27, - PCNT_UN_0_SHUN_0_OPCODE_X0 = 7, - PCNT_UN_0_SHUN_0_OPCODE_Y0 = 7, - RLI_SHUN_0_OPCODE_X0 = 1, - RLI_SHUN_0_OPCODE_X1 = 1, - RLI_SHUN_0_OPCODE_Y0 = 1, - RLI_SHUN_0_OPCODE_Y1 = 1, - RL_SPECIAL_0_OPCODE_X0 = 54, - RL_SPECIAL_0_OPCODE_X1 = 28, - RL_SPECIAL_3_OPCODE_Y0 = 0, - RL_SPECIAL_3_OPCODE_Y1 = 0, - RR_IMM_0_OPCODE_SN = 0, - S1A_SPECIAL_0_OPCODE_X0 = 55, - S1A_SPECIAL_0_OPCODE_X1 = 29, - S1A_SPECIAL_0_OPCODE_Y0 = 1, - S1A_SPECIAL_0_OPCODE_Y1 = 1, - S2A_SPECIAL_0_OPCODE_X0 = 56, - S2A_SPECIAL_0_OPCODE_X1 = 30, - S2A_SPECIAL_0_OPCODE_Y0 = 2, - S2A_SPECIAL_0_OPCODE_Y1 = 2, - S3A_SPECIAL_0_OPCODE_X0 = 57, - S3A_SPECIAL_0_OPCODE_X1 = 31, - S3A_SPECIAL_5_OPCODE_Y0 = 1, - S3A_SPECIAL_5_OPCODE_Y1 = 1, - SADAB_U_SPECIAL_0_OPCODE_X0 = 58, - SADAH_SPECIAL_0_OPCODE_X0 = 59, - SADAH_U_SPECIAL_0_OPCODE_X0 = 60, - SADB_U_SPECIAL_0_OPCODE_X0 = 61, - SADH_SPECIAL_0_OPCODE_X0 = 62, - SADH_U_SPECIAL_0_OPCODE_X0 = 63, - SBADD_IMM_0_OPCODE_X1 = 28, - SB_OPCODE_Y2 = 5, - SB_SPECIAL_0_OPCODE_X1 = 32, - SEQB_SPECIAL_0_OPCODE_X0 = 64, - SEQB_SPECIAL_0_OPCODE_X1 = 33, - SEQH_SPECIAL_0_OPCODE_X0 = 65, - SEQH_SPECIAL_0_OPCODE_X1 = 34, - SEQIB_IMM_0_OPCODE_X0 = 9, - SEQIB_IMM_0_OPCODE_X1 = 12, - SEQIH_IMM_0_OPCODE_X0 = 10, - SEQIH_IMM_0_OPCODE_X1 = 13, - SEQI_IMM_0_OPCODE_X0 = 11, - SEQI_IMM_0_OPCODE_X1 = 14, - SEQI_OPCODE_Y0 = 12, - SEQI_OPCODE_Y1 = 10, - SEQ_SPECIAL_0_OPCODE_X0 = 66, - SEQ_SPECIAL_0_OPCODE_X1 = 35, - SEQ_SPECIAL_5_OPCODE_Y0 = 2, - SEQ_SPECIAL_5_OPCODE_Y1 = 2, - SHADD_IMM_0_OPCODE_X1 = 29, - SHL8II_IMM_0_OPCODE_SN = 3, - SHLB_SPECIAL_0_OPCODE_X0 = 67, - SHLB_SPECIAL_0_OPCODE_X1 = 36, - SHLH_SPECIAL_0_OPCODE_X0 = 68, - SHLH_SPECIAL_0_OPCODE_X1 = 37, - SHLIB_SHUN_0_OPCODE_X0 = 2, - SHLIB_SHUN_0_OPCODE_X1 = 2, - SHLIH_SHUN_0_OPCODE_X0 = 3, - SHLIH_SHUN_0_OPCODE_X1 = 3, - SHLI_SHUN_0_OPCODE_X0 = 4, - SHLI_SHUN_0_OPCODE_X1 = 4, - SHLI_SHUN_0_OPCODE_Y0 = 2, - SHLI_SHUN_0_OPCODE_Y1 = 2, - SHL_SPECIAL_0_OPCODE_X0 = 69, - SHL_SPECIAL_0_OPCODE_X1 = 38, - SHL_SPECIAL_3_OPCODE_Y0 = 1, - SHL_SPECIAL_3_OPCODE_Y1 = 1, - SHR1_RR_IMM_0_OPCODE_SN = 9, - SHRB_SPECIAL_0_OPCODE_X0 = 70, - SHRB_SPECIAL_0_OPCODE_X1 = 39, - SHRH_SPECIAL_0_OPCODE_X0 = 71, - SHRH_SPECIAL_0_OPCODE_X1 = 40, - SHRIB_SHUN_0_OPCODE_X0 = 5, - SHRIB_SHUN_0_OPCODE_X1 = 5, - SHRIH_SHUN_0_OPCODE_X0 = 6, - SHRIH_SHUN_0_OPCODE_X1 = 6, - SHRI_SHUN_0_OPCODE_X0 = 7, - SHRI_SHUN_0_OPCODE_X1 = 7, - SHRI_SHUN_0_OPCODE_Y0 = 3, - SHRI_SHUN_0_OPCODE_Y1 = 3, - SHR_SPECIAL_0_OPCODE_X0 = 72, - SHR_SPECIAL_0_OPCODE_X1 = 41, - SHR_SPECIAL_3_OPCODE_Y0 = 2, - SHR_SPECIAL_3_OPCODE_Y1 = 2, - SHUN_0_OPCODE_X0 = 7, - SHUN_0_OPCODE_X1 = 8, - SHUN_0_OPCODE_Y0 = 13, - SHUN_0_OPCODE_Y1 = 11, - SH_OPCODE_Y2 = 6, - SH_SPECIAL_0_OPCODE_X1 = 42, - SLTB_SPECIAL_0_OPCODE_X0 = 73, - SLTB_SPECIAL_0_OPCODE_X1 = 43, - SLTB_U_SPECIAL_0_OPCODE_X0 = 74, - SLTB_U_SPECIAL_0_OPCODE_X1 = 44, - SLTEB_SPECIAL_0_OPCODE_X0 = 75, - SLTEB_SPECIAL_0_OPCODE_X1 = 45, - SLTEB_U_SPECIAL_0_OPCODE_X0 = 76, - SLTEB_U_SPECIAL_0_OPCODE_X1 = 46, - SLTEH_SPECIAL_0_OPCODE_X0 = 77, - SLTEH_SPECIAL_0_OPCODE_X1 = 47, - SLTEH_U_SPECIAL_0_OPCODE_X0 = 78, - SLTEH_U_SPECIAL_0_OPCODE_X1 = 48, - SLTE_SPECIAL_0_OPCODE_X0 = 79, - SLTE_SPECIAL_0_OPCODE_X1 = 49, - SLTE_SPECIAL_4_OPCODE_Y0 = 0, - SLTE_SPECIAL_4_OPCODE_Y1 = 0, - SLTE_U_SPECIAL_0_OPCODE_X0 = 80, - SLTE_U_SPECIAL_0_OPCODE_X1 = 50, - SLTE_U_SPECIAL_4_OPCODE_Y0 = 1, - SLTE_U_SPECIAL_4_OPCODE_Y1 = 1, - SLTH_SPECIAL_0_OPCODE_X0 = 81, - SLTH_SPECIAL_0_OPCODE_X1 = 51, - SLTH_U_SPECIAL_0_OPCODE_X0 = 82, - SLTH_U_SPECIAL_0_OPCODE_X1 = 52, - SLTIB_IMM_0_OPCODE_X0 = 12, - SLTIB_IMM_0_OPCODE_X1 = 15, - SLTIB_U_IMM_0_OPCODE_X0 = 13, - SLTIB_U_IMM_0_OPCODE_X1 = 16, - SLTIH_IMM_0_OPCODE_X0 = 14, - SLTIH_IMM_0_OPCODE_X1 = 17, - SLTIH_U_IMM_0_OPCODE_X0 = 15, - SLTIH_U_IMM_0_OPCODE_X1 = 18, - SLTI_IMM_0_OPCODE_X0 = 16, - SLTI_IMM_0_OPCODE_X1 = 19, - SLTI_OPCODE_Y0 = 14, - SLTI_OPCODE_Y1 = 12, - SLTI_U_IMM_0_OPCODE_X0 = 17, - SLTI_U_IMM_0_OPCODE_X1 = 20, - SLTI_U_OPCODE_Y0 = 15, - SLTI_U_OPCODE_Y1 = 13, - SLT_SPECIAL_0_OPCODE_X0 = 83, - SLT_SPECIAL_0_OPCODE_X1 = 53, - SLT_SPECIAL_4_OPCODE_Y0 = 2, - SLT_SPECIAL_4_OPCODE_Y1 = 2, - SLT_U_SPECIAL_0_OPCODE_X0 = 84, - SLT_U_SPECIAL_0_OPCODE_X1 = 54, - SLT_U_SPECIAL_4_OPCODE_Y0 = 3, - SLT_U_SPECIAL_4_OPCODE_Y1 = 3, - SNEB_SPECIAL_0_OPCODE_X0 = 85, - SNEB_SPECIAL_0_OPCODE_X1 = 55, - SNEH_SPECIAL_0_OPCODE_X0 = 86, - SNEH_SPECIAL_0_OPCODE_X1 = 56, - SNE_SPECIAL_0_OPCODE_X0 = 87, - SNE_SPECIAL_0_OPCODE_X1 = 57, - SNE_SPECIAL_5_OPCODE_Y0 = 3, - SNE_SPECIAL_5_OPCODE_Y1 = 3, - SPECIAL_0_OPCODE_X0 = 0, - SPECIAL_0_OPCODE_X1 = 1, - SPECIAL_0_OPCODE_Y0 = 1, - SPECIAL_0_OPCODE_Y1 = 1, - SPECIAL_1_OPCODE_Y0 = 2, - SPECIAL_1_OPCODE_Y1 = 2, - SPECIAL_2_OPCODE_Y0 = 3, - SPECIAL_2_OPCODE_Y1 = 3, - SPECIAL_3_OPCODE_Y0 = 4, - SPECIAL_3_OPCODE_Y1 = 4, - SPECIAL_4_OPCODE_Y0 = 5, - SPECIAL_4_OPCODE_Y1 = 5, - SPECIAL_5_OPCODE_Y0 = 6, - SPECIAL_5_OPCODE_Y1 = 6, - SPECIAL_6_OPCODE_Y0 = 7, - SPECIAL_7_OPCODE_Y0 = 8, - SRAB_SPECIAL_0_OPCODE_X0 = 88, - SRAB_SPECIAL_0_OPCODE_X1 = 58, - SRAH_SPECIAL_0_OPCODE_X0 = 89, - SRAH_SPECIAL_0_OPCODE_X1 = 59, - SRAIB_SHUN_0_OPCODE_X0 = 8, - SRAIB_SHUN_0_OPCODE_X1 = 8, - SRAIH_SHUN_0_OPCODE_X0 = 9, - SRAIH_SHUN_0_OPCODE_X1 = 9, - SRAI_SHUN_0_OPCODE_X0 = 10, - SRAI_SHUN_0_OPCODE_X1 = 10, - SRAI_SHUN_0_OPCODE_Y0 = 4, - SRAI_SHUN_0_OPCODE_Y1 = 4, - SRA_SPECIAL_0_OPCODE_X0 = 90, - SRA_SPECIAL_0_OPCODE_X1 = 60, - SRA_SPECIAL_3_OPCODE_Y0 = 3, - SRA_SPECIAL_3_OPCODE_Y1 = 3, - SUBBS_U_SPECIAL_0_OPCODE_X0 = 100, - SUBBS_U_SPECIAL_0_OPCODE_X1 = 70, - SUBB_SPECIAL_0_OPCODE_X0 = 91, - SUBB_SPECIAL_0_OPCODE_X1 = 61, - SUBHS_SPECIAL_0_OPCODE_X0 = 101, - SUBHS_SPECIAL_0_OPCODE_X1 = 71, - SUBH_SPECIAL_0_OPCODE_X0 = 92, - SUBH_SPECIAL_0_OPCODE_X1 = 62, - SUBS_SPECIAL_0_OPCODE_X0 = 97, - SUBS_SPECIAL_0_OPCODE_X1 = 67, - SUB_SPECIAL_0_OPCODE_X0 = 93, - SUB_SPECIAL_0_OPCODE_X1 = 63, - SUB_SPECIAL_0_OPCODE_Y0 = 3, - SUB_SPECIAL_0_OPCODE_Y1 = 3, - SWADD_IMM_0_OPCODE_X1 = 30, - SWINT0_UN_0_SHUN_0_OPCODE_X1 = 18, - SWINT1_UN_0_SHUN_0_OPCODE_X1 = 19, - SWINT2_UN_0_SHUN_0_OPCODE_X1 = 20, - SWINT3_UN_0_SHUN_0_OPCODE_X1 = 21, - SW_OPCODE_Y2 = 7, - SW_SPECIAL_0_OPCODE_X1 = 64, - TBLIDXB0_UN_0_SHUN_0_OPCODE_X0 = 8, - TBLIDXB0_UN_0_SHUN_0_OPCODE_Y0 = 8, - TBLIDXB1_UN_0_SHUN_0_OPCODE_X0 = 9, - TBLIDXB1_UN_0_SHUN_0_OPCODE_Y0 = 9, - TBLIDXB2_UN_0_SHUN_0_OPCODE_X0 = 10, - TBLIDXB2_UN_0_SHUN_0_OPCODE_Y0 = 10, - TBLIDXB3_UN_0_SHUN_0_OPCODE_X0 = 11, - TBLIDXB3_UN_0_SHUN_0_OPCODE_Y0 = 11, - TNS_UN_0_SHUN_0_OPCODE_X1 = 22, - UN_0_SHUN_0_OPCODE_X0 = 11, - UN_0_SHUN_0_OPCODE_X1 = 11, - UN_0_SHUN_0_OPCODE_Y0 = 5, - UN_0_SHUN_0_OPCODE_Y1 = 5, - WH64_UN_0_SHUN_0_OPCODE_X1 = 23, - XORI_IMM_0_OPCODE_X0 = 2, - XORI_IMM_0_OPCODE_X1 = 21, - XOR_SPECIAL_0_OPCODE_X0 = 94, - XOR_SPECIAL_0_OPCODE_X1 = 65, - XOR_SPECIAL_2_OPCODE_Y0 = 3, - XOR_SPECIAL_2_OPCODE_Y1 = 3 -}; - -#endif /* !_TILE_OPCODE_CONSTANTS_H */ diff --git a/arch/tile/include/asm/opcode_constants_64.h b/arch/tile/include/asm/opcode_constants_64.h deleted file mode 100644 index 227d033b180..00000000000 --- a/arch/tile/include/asm/opcode_constants_64.h +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -/* This file is machine-generated; DO NOT EDIT! */ - - -#ifndef _TILE_OPCODE_CONSTANTS_H -#define _TILE_OPCODE_CONSTANTS_H -enum -{ - ADDBS_U_SPECIAL_0_OPCODE_X0 = 98, - ADDBS_U_SPECIAL_0_OPCODE_X1 = 68, - ADDB_SPECIAL_0_OPCODE_X0 = 1, - ADDB_SPECIAL_0_OPCODE_X1 = 1, - ADDHS_SPECIAL_0_OPCODE_X0 = 99, - ADDHS_SPECIAL_0_OPCODE_X1 = 69, - ADDH_SPECIAL_0_OPCODE_X0 = 2, - ADDH_SPECIAL_0_OPCODE_X1 = 2, - ADDIB_IMM_0_OPCODE_X0 = 1, - ADDIB_IMM_0_OPCODE_X1 = 1, - ADDIH_IMM_0_OPCODE_X0 = 2, - ADDIH_IMM_0_OPCODE_X1 = 2, - ADDI_IMM_0_OPCODE_X0 = 3, - ADDI_IMM_0_OPCODE_X1 = 3, - ADDI_IMM_1_OPCODE_SN = 1, - ADDI_OPCODE_Y0 = 9, - ADDI_OPCODE_Y1 = 7, - ADDLIS_OPCODE_X0 = 1, - ADDLIS_OPCODE_X1 = 2, - ADDLI_OPCODE_X0 = 2, - ADDLI_OPCODE_X1 = 3, - ADDS_SPECIAL_0_OPCODE_X0 = 96, - ADDS_SPECIAL_0_OPCODE_X1 = 66, - ADD_SPECIAL_0_OPCODE_X0 = 3, - ADD_SPECIAL_0_OPCODE_X1 = 3, - ADD_SPECIAL_0_OPCODE_Y0 = 0, - ADD_SPECIAL_0_OPCODE_Y1 = 0, - ADIFFB_U_SPECIAL_0_OPCODE_X0 = 4, - ADIFFH_SPECIAL_0_OPCODE_X0 = 5, - ANDI_IMM_0_OPCODE_X0 = 1, - ANDI_IMM_0_OPCODE_X1 = 4, - ANDI_OPCODE_Y0 = 10, - ANDI_OPCODE_Y1 = 8, - AND_SPECIAL_0_OPCODE_X0 = 6, - AND_SPECIAL_0_OPCODE_X1 = 4, - AND_SPECIAL_2_OPCODE_Y0 = 0, - AND_SPECIAL_2_OPCODE_Y1 = 0, - AULI_OPCODE_X0 = 3, - AULI_OPCODE_X1 = 4, - AVGB_U_SPECIAL_0_OPCODE_X0 = 7, - AVGH_SPECIAL_0_OPCODE_X0 = 8, - BBNST_BRANCH_OPCODE_X1 = 15, - BBNS_BRANCH_OPCODE_X1 = 14, - BBNS_OPCODE_SN = 63, - BBST_BRANCH_OPCODE_X1 = 13, - BBS_BRANCH_OPCODE_X1 = 12, - BBS_OPCODE_SN = 62, - BGEZT_BRANCH_OPCODE_X1 = 7, - BGEZ_BRANCH_OPCODE_X1 = 6, - BGEZ_OPCODE_SN = 61, - BGZT_BRANCH_OPCODE_X1 = 5, - BGZ_BRANCH_OPCODE_X1 = 4, - BGZ_OPCODE_SN = 58, - BITX_UN_0_SHUN_0_OPCODE_X0 = 1, - BITX_UN_0_SHUN_0_OPCODE_Y0 = 1, - BLEZT_BRANCH_OPCODE_X1 = 11, - BLEZ_BRANCH_OPCODE_X1 = 10, - BLEZ_OPCODE_SN = 59, - BLZT_BRANCH_OPCODE_X1 = 9, - BLZ_BRANCH_OPCODE_X1 = 8, - BLZ_OPCODE_SN = 60, - BNZT_BRANCH_OPCODE_X1 = 3, - BNZ_BRANCH_OPCODE_X1 = 2, - BNZ_OPCODE_SN = 57, - BPT_NOREG_RR_IMM_0_OPCODE_SN = 1, - BRANCH_OPCODE_X1 = 5, - BYTEX_UN_0_SHUN_0_OPCODE_X0 = 2, - BYTEX_UN_0_SHUN_0_OPCODE_Y0 = 2, - BZT_BRANCH_OPCODE_X1 = 1, - BZ_BRANCH_OPCODE_X1 = 0, - BZ_OPCODE_SN = 56, - CLZ_UN_0_SHUN_0_OPCODE_X0 = 3, - CLZ_UN_0_SHUN_0_OPCODE_Y0 = 3, - CRC32_32_SPECIAL_0_OPCODE_X0 = 9, - CRC32_8_SPECIAL_0_OPCODE_X0 = 10, - CTZ_UN_0_SHUN_0_OPCODE_X0 = 4, - CTZ_UN_0_SHUN_0_OPCODE_Y0 = 4, - DRAIN_UN_0_SHUN_0_OPCODE_X1 = 1, - DTLBPR_UN_0_SHUN_0_OPCODE_X1 = 2, - DWORD_ALIGN_SPECIAL_0_OPCODE_X0 = 95, - FINV_UN_0_SHUN_0_OPCODE_X1 = 3, - FLUSH_UN_0_SHUN_0_OPCODE_X1 = 4, - FNOP_NOREG_RR_IMM_0_OPCODE_SN = 3, - FNOP_UN_0_SHUN_0_OPCODE_X0 = 5, - FNOP_UN_0_SHUN_0_OPCODE_X1 = 5, - FNOP_UN_0_SHUN_0_OPCODE_Y0 = 5, - FNOP_UN_0_SHUN_0_OPCODE_Y1 = 1, - HALT_NOREG_RR_IMM_0_OPCODE_SN = 0, - ICOH_UN_0_SHUN_0_OPCODE_X1 = 6, - ILL_UN_0_SHUN_0_OPCODE_X1 = 7, - ILL_UN_0_SHUN_0_OPCODE_Y1 = 2, - IMM_0_OPCODE_SN = 0, - IMM_0_OPCODE_X0 = 4, - IMM_0_OPCODE_X1 = 6, - IMM_1_OPCODE_SN = 1, - IMM_OPCODE_0_X0 = 5, - INTHB_SPECIAL_0_OPCODE_X0 = 11, - INTHB_SPECIAL_0_OPCODE_X1 = 5, - INTHH_SPECIAL_0_OPCODE_X0 = 12, - INTHH_SPECIAL_0_OPCODE_X1 = 6, - INTLB_SPECIAL_0_OPCODE_X0 = 13, - INTLB_SPECIAL_0_OPCODE_X1 = 7, - INTLH_SPECIAL_0_OPCODE_X0 = 14, - INTLH_SPECIAL_0_OPCODE_X1 = 8, - INV_UN_0_SHUN_0_OPCODE_X1 = 8, - IRET_UN_0_SHUN_0_OPCODE_X1 = 9, - JALB_OPCODE_X1 = 13, - JALF_OPCODE_X1 = 12, - JALRP_SPECIAL_0_OPCODE_X1 = 9, - JALRR_IMM_1_OPCODE_SN = 3, - JALR_RR_IMM_0_OPCODE_SN = 5, - JALR_SPECIAL_0_OPCODE_X1 = 10, - JB_OPCODE_X1 = 11, - JF_OPCODE_X1 = 10, - JRP_SPECIAL_0_OPCODE_X1 = 11, - JRR_IMM_1_OPCODE_SN = 2, - JR_RR_IMM_0_OPCODE_SN = 4, - JR_SPECIAL_0_OPCODE_X1 = 12, - LBADD_IMM_0_OPCODE_X1 = 22, - LBADD_U_IMM_0_OPCODE_X1 = 23, - LB_OPCODE_Y2 = 0, - LB_UN_0_SHUN_0_OPCODE_X1 = 10, - LB_U_OPCODE_Y2 = 1, - LB_U_UN_0_SHUN_0_OPCODE_X1 = 11, - LHADD_IMM_0_OPCODE_X1 = 24, - LHADD_U_IMM_0_OPCODE_X1 = 25, - LH_OPCODE_Y2 = 2, - LH_UN_0_SHUN_0_OPCODE_X1 = 12, - LH_U_OPCODE_Y2 = 3, - LH_U_UN_0_SHUN_0_OPCODE_X1 = 13, - LNK_SPECIAL_0_OPCODE_X1 = 13, - LWADD_IMM_0_OPCODE_X1 = 26, - LWADD_NA_IMM_0_OPCODE_X1 = 27, - LW_NA_UN_0_SHUN_0_OPCODE_X1 = 24, - LW_OPCODE_Y2 = 4, - LW_UN_0_SHUN_0_OPCODE_X1 = 14, - MAXB_U_SPECIAL_0_OPCODE_X0 = 15, - MAXB_U_SPECIAL_0_OPCODE_X1 = 14, - MAXH_SPECIAL_0_OPCODE_X0 = 16, - MAXH_SPECIAL_0_OPCODE_X1 = 15, - MAXIB_U_IMM_0_OPCODE_X0 = 4, - MAXIB_U_IMM_0_OPCODE_X1 = 5, - MAXIH_IMM_0_OPCODE_X0 = 5, - MAXIH_IMM_0_OPCODE_X1 = 6, - MFSPR_IMM_0_OPCODE_X1 = 7, - MF_UN_0_SHUN_0_OPCODE_X1 = 15, - MINB_U_SPECIAL_0_OPCODE_X0 = 17, - MINB_U_SPECIAL_0_OPCODE_X1 = 16, - MINH_SPECIAL_0_OPCODE_X0 = 18, - MINH_SPECIAL_0_OPCODE_X1 = 17, - MINIB_U_IMM_0_OPCODE_X0 = 6, - MINIB_U_IMM_0_OPCODE_X1 = 8, - MINIH_IMM_0_OPCODE_X0 = 7, - MINIH_IMM_0_OPCODE_X1 = 9, - MM_OPCODE_X0 = 6, - MM_OPCODE_X1 = 7, - MNZB_SPECIAL_0_OPCODE_X0 = 19, - MNZB_SPECIAL_0_OPCODE_X1 = 18, - MNZH_SPECIAL_0_OPCODE_X0 = 20, - MNZH_SPECIAL_0_OPCODE_X1 = 19, - MNZ_SPECIAL_0_OPCODE_X0 = 21, - MNZ_SPECIAL_0_OPCODE_X1 = 20, - MNZ_SPECIAL_1_OPCODE_Y0 = 0, - MNZ_SPECIAL_1_OPCODE_Y1 = 1, - MOVEI_IMM_1_OPCODE_SN = 0, - MOVE_RR_IMM_0_OPCODE_SN = 8, - MTSPR_IMM_0_OPCODE_X1 = 10, - MULHHA_SS_SPECIAL_0_OPCODE_X0 = 22, - MULHHA_SS_SPECIAL_7_OPCODE_Y0 = 0, - MULHHA_SU_SPECIAL_0_OPCODE_X0 = 23, - MULHHA_UU_SPECIAL_0_OPCODE_X0 = 24, - MULHHA_UU_SPECIAL_7_OPCODE_Y0 = 1, - MULHHSA_UU_SPECIAL_0_OPCODE_X0 = 25, - MULHH_SS_SPECIAL_0_OPCODE_X0 = 26, - MULHH_SS_SPECIAL_6_OPCODE_Y0 = 0, - MULHH_SU_SPECIAL_0_OPCODE_X0 = 27, - MULHH_UU_SPECIAL_0_OPCODE_X0 = 28, - MULHH_UU_SPECIAL_6_OPCODE_Y0 = 1, - MULHLA_SS_SPECIAL_0_OPCODE_X0 = 29, - MULHLA_SU_SPECIAL_0_OPCODE_X0 = 30, - MULHLA_US_SPECIAL_0_OPCODE_X0 = 31, - MULHLA_UU_SPECIAL_0_OPCODE_X0 = 32, - MULHLSA_UU_SPECIAL_0_OPCODE_X0 = 33, - MULHLSA_UU_SPECIAL_5_OPCODE_Y0 = 0, - MULHL_SS_SPECIAL_0_OPCODE_X0 = 34, - MULHL_SU_SPECIAL_0_OPCODE_X0 = 35, - MULHL_US_SPECIAL_0_OPCODE_X0 = 36, - MULHL_UU_SPECIAL_0_OPCODE_X0 = 37, - MULLLA_SS_SPECIAL_0_OPCODE_X0 = 38, - MULLLA_SS_SPECIAL_7_OPCODE_Y0 = 2, - MULLLA_SU_SPECIAL_0_OPCODE_X0 = 39, - MULLLA_UU_SPECIAL_0_OPCODE_X0 = 40, - MULLLA_UU_SPECIAL_7_OPCODE_Y0 = 3, - MULLLSA_UU_SPECIAL_0_OPCODE_X0 = 41, - MULLL_SS_SPECIAL_0_OPCODE_X0 = 42, - MULLL_SS_SPECIAL_6_OPCODE_Y0 = 2, - MULLL_SU_SPECIAL_0_OPCODE_X0 = 43, - MULLL_UU_SPECIAL_0_OPCODE_X0 = 44, - MULLL_UU_SPECIAL_6_OPCODE_Y0 = 3, - MVNZ_SPECIAL_0_OPCODE_X0 = 45, - MVNZ_SPECIAL_1_OPCODE_Y0 = 1, - MVZ_SPECIAL_0_OPCODE_X0 = 46, - MVZ_SPECIAL_1_OPCODE_Y0 = 2, - MZB_SPECIAL_0_OPCODE_X0 = 47, - MZB_SPECIAL_0_OPCODE_X1 = 21, - MZH_SPECIAL_0_OPCODE_X0 = 48, - MZH_SPECIAL_0_OPCODE_X1 = 22, - MZ_SPECIAL_0_OPCODE_X0 = 49, - MZ_SPECIAL_0_OPCODE_X1 = 23, - MZ_SPECIAL_1_OPCODE_Y0 = 3, - MZ_SPECIAL_1_OPCODE_Y1 = 2, - NAP_UN_0_SHUN_0_OPCODE_X1 = 16, - NOP_NOREG_RR_IMM_0_OPCODE_SN = 2, - NOP_UN_0_SHUN_0_OPCODE_X0 = 6, - NOP_UN_0_SHUN_0_OPCODE_X1 = 17, - NOP_UN_0_SHUN_0_OPCODE_Y0 = 6, - NOP_UN_0_SHUN_0_OPCODE_Y1 = 3, - NOREG_RR_IMM_0_OPCODE_SN = 0, - NOR_SPECIAL_0_OPCODE_X0 = 50, - NOR_SPECIAL_0_OPCODE_X1 = 24, - NOR_SPECIAL_2_OPCODE_Y0 = 1, - NOR_SPECIAL_2_OPCODE_Y1 = 1, - ORI_IMM_0_OPCODE_X0 = 8, - ORI_IMM_0_OPCODE_X1 = 11, - ORI_OPCODE_Y0 = 11, - ORI_OPCODE_Y1 = 9, - OR_SPECIAL_0_OPCODE_X0 = 51, - OR_SPECIAL_0_OPCODE_X1 = 25, - OR_SPECIAL_2_OPCODE_Y0 = 2, - OR_SPECIAL_2_OPCODE_Y1 = 2, - PACKBS_U_SPECIAL_0_OPCODE_X0 = 103, - PACKBS_U_SPECIAL_0_OPCODE_X1 = 73, - PACKHB_SPECIAL_0_OPCODE_X0 = 52, - PACKHB_SPECIAL_0_OPCODE_X1 = 26, - PACKHS_SPECIAL_0_OPCODE_X0 = 102, - PACKHS_SPECIAL_0_OPCODE_X1 = 72, - PACKLB_SPECIAL_0_OPCODE_X0 = 53, - PACKLB_SPECIAL_0_OPCODE_X1 = 27, - PCNT_UN_0_SHUN_0_OPCODE_X0 = 7, - PCNT_UN_0_SHUN_0_OPCODE_Y0 = 7, - RLI_SHUN_0_OPCODE_X0 = 1, - RLI_SHUN_0_OPCODE_X1 = 1, - RLI_SHUN_0_OPCODE_Y0 = 1, - RLI_SHUN_0_OPCODE_Y1 = 1, - RL_SPECIAL_0_OPCODE_X0 = 54, - RL_SPECIAL_0_OPCODE_X1 = 28, - RL_SPECIAL_3_OPCODE_Y0 = 0, - RL_SPECIAL_3_OPCODE_Y1 = 0, - RR_IMM_0_OPCODE_SN = 0, - S1A_SPECIAL_0_OPCODE_X0 = 55, - S1A_SPECIAL_0_OPCODE_X1 = 29, - S1A_SPECIAL_0_OPCODE_Y0 = 1, - S1A_SPECIAL_0_OPCODE_Y1 = 1, - S2A_SPECIAL_0_OPCODE_X0 = 56, - S2A_SPECIAL_0_OPCODE_X1 = 30, - S2A_SPECIAL_0_OPCODE_Y0 = 2, - S2A_SPECIAL_0_OPCODE_Y1 = 2, - S3A_SPECIAL_0_OPCODE_X0 = 57, - S3A_SPECIAL_0_OPCODE_X1 = 31, - S3A_SPECIAL_5_OPCODE_Y0 = 1, - S3A_SPECIAL_5_OPCODE_Y1 = 1, - SADAB_U_SPECIAL_0_OPCODE_X0 = 58, - SADAH_SPECIAL_0_OPCODE_X0 = 59, - SADAH_U_SPECIAL_0_OPCODE_X0 = 60, - SADB_U_SPECIAL_0_OPCODE_X0 = 61, - SADH_SPECIAL_0_OPCODE_X0 = 62, - SADH_U_SPECIAL_0_OPCODE_X0 = 63, - SBADD_IMM_0_OPCODE_X1 = 28, - SB_OPCODE_Y2 = 5, - SB_SPECIAL_0_OPCODE_X1 = 32, - SEQB_SPECIAL_0_OPCODE_X0 = 64, - SEQB_SPECIAL_0_OPCODE_X1 = 33, - SEQH_SPECIAL_0_OPCODE_X0 = 65, - SEQH_SPECIAL_0_OPCODE_X1 = 34, - SEQIB_IMM_0_OPCODE_X0 = 9, - SEQIB_IMM_0_OPCODE_X1 = 12, - SEQIH_IMM_0_OPCODE_X0 = 10, - SEQIH_IMM_0_OPCODE_X1 = 13, - SEQI_IMM_0_OPCODE_X0 = 11, - SEQI_IMM_0_OPCODE_X1 = 14, - SEQI_OPCODE_Y0 = 12, - SEQI_OPCODE_Y1 = 10, - SEQ_SPECIAL_0_OPCODE_X0 = 66, - SEQ_SPECIAL_0_OPCODE_X1 = 35, - SEQ_SPECIAL_5_OPCODE_Y0 = 2, - SEQ_SPECIAL_5_OPCODE_Y1 = 2, - SHADD_IMM_0_OPCODE_X1 = 29, - SHL8II_IMM_0_OPCODE_SN = 3, - SHLB_SPECIAL_0_OPCODE_X0 = 67, - SHLB_SPECIAL_0_OPCODE_X1 = 36, - SHLH_SPECIAL_0_OPCODE_X0 = 68, - SHLH_SPECIAL_0_OPCODE_X1 = 37, - SHLIB_SHUN_0_OPCODE_X0 = 2, - SHLIB_SHUN_0_OPCODE_X1 = 2, - SHLIH_SHUN_0_OPCODE_X0 = 3, - SHLIH_SHUN_0_OPCODE_X1 = 3, - SHLI_SHUN_0_OPCODE_X0 = 4, - SHLI_SHUN_0_OPCODE_X1 = 4, - SHLI_SHUN_0_OPCODE_Y0 = 2, - SHLI_SHUN_0_OPCODE_Y1 = 2, - SHL_SPECIAL_0_OPCODE_X0 = 69, - SHL_SPECIAL_0_OPCODE_X1 = 38, - SHL_SPECIAL_3_OPCODE_Y0 = 1, - SHL_SPECIAL_3_OPCODE_Y1 = 1, - SHR1_RR_IMM_0_OPCODE_SN = 9, - SHRB_SPECIAL_0_OPCODE_X0 = 70, - SHRB_SPECIAL_0_OPCODE_X1 = 39, - SHRH_SPECIAL_0_OPCODE_X0 = 71, - SHRH_SPECIAL_0_OPCODE_X1 = 40, - SHRIB_SHUN_0_OPCODE_X0 = 5, - SHRIB_SHUN_0_OPCODE_X1 = 5, - SHRIH_SHUN_0_OPCODE_X0 = 6, - SHRIH_SHUN_0_OPCODE_X1 = 6, - SHRI_SHUN_0_OPCODE_X0 = 7, - SHRI_SHUN_0_OPCODE_X1 = 7, - SHRI_SHUN_0_OPCODE_Y0 = 3, - SHRI_SHUN_0_OPCODE_Y1 = 3, - SHR_SPECIAL_0_OPCODE_X0 = 72, - SHR_SPECIAL_0_OPCODE_X1 = 41, - SHR_SPECIAL_3_OPCODE_Y0 = 2, - SHR_SPECIAL_3_OPCODE_Y1 = 2, - SHUN_0_OPCODE_X0 = 7, - SHUN_0_OPCODE_X1 = 8, - SHUN_0_OPCODE_Y0 = 13, - SHUN_0_OPCODE_Y1 = 11, - SH_OPCODE_Y2 = 6, - SH_SPECIAL_0_OPCODE_X1 = 42, - SLTB_SPECIAL_0_OPCODE_X0 = 73, - SLTB_SPECIAL_0_OPCODE_X1 = 43, - SLTB_U_SPECIAL_0_OPCODE_X0 = 74, - SLTB_U_SPECIAL_0_OPCODE_X1 = 44, - SLTEB_SPECIAL_0_OPCODE_X0 = 75, - SLTEB_SPECIAL_0_OPCODE_X1 = 45, - SLTEB_U_SPECIAL_0_OPCODE_X0 = 76, - SLTEB_U_SPECIAL_0_OPCODE_X1 = 46, - SLTEH_SPECIAL_0_OPCODE_X0 = 77, - SLTEH_SPECIAL_0_OPCODE_X1 = 47, - SLTEH_U_SPECIAL_0_OPCODE_X0 = 78, - SLTEH_U_SPECIAL_0_OPCODE_X1 = 48, - SLTE_SPECIAL_0_OPCODE_X0 = 79, - SLTE_SPECIAL_0_OPCODE_X1 = 49, - SLTE_SPECIAL_4_OPCODE_Y0 = 0, - SLTE_SPECIAL_4_OPCODE_Y1 = 0, - SLTE_U_SPECIAL_0_OPCODE_X0 = 80, - SLTE_U_SPECIAL_0_OPCODE_X1 = 50, - SLTE_U_SPECIAL_4_OPCODE_Y0 = 1, - SLTE_U_SPECIAL_4_OPCODE_Y1 = 1, - SLTH_SPECIAL_0_OPCODE_X0 = 81, - SLTH_SPECIAL_0_OPCODE_X1 = 51, - SLTH_U_SPECIAL_0_OPCODE_X0 = 82, - SLTH_U_SPECIAL_0_OPCODE_X1 = 52, - SLTIB_IMM_0_OPCODE_X0 = 12, - SLTIB_IMM_0_OPCODE_X1 = 15, - SLTIB_U_IMM_0_OPCODE_X0 = 13, - SLTIB_U_IMM_0_OPCODE_X1 = 16, - SLTIH_IMM_0_OPCODE_X0 = 14, - SLTIH_IMM_0_OPCODE_X1 = 17, - SLTIH_U_IMM_0_OPCODE_X0 = 15, - SLTIH_U_IMM_0_OPCODE_X1 = 18, - SLTI_IMM_0_OPCODE_X0 = 16, - SLTI_IMM_0_OPCODE_X1 = 19, - SLTI_OPCODE_Y0 = 14, - SLTI_OPCODE_Y1 = 12, - SLTI_U_IMM_0_OPCODE_X0 = 17, - SLTI_U_IMM_0_OPCODE_X1 = 20, - SLTI_U_OPCODE_Y0 = 15, - SLTI_U_OPCODE_Y1 = 13, - SLT_SPECIAL_0_OPCODE_X0 = 83, - SLT_SPECIAL_0_OPCODE_X1 = 53, - SLT_SPECIAL_4_OPCODE_Y0 = 2, - SLT_SPECIAL_4_OPCODE_Y1 = 2, - SLT_U_SPECIAL_0_OPCODE_X0 = 84, - SLT_U_SPECIAL_0_OPCODE_X1 = 54, - SLT_U_SPECIAL_4_OPCODE_Y0 = 3, - SLT_U_SPECIAL_4_OPCODE_Y1 = 3, - SNEB_SPECIAL_0_OPCODE_X0 = 85, - SNEB_SPECIAL_0_OPCODE_X1 = 55, - SNEH_SPECIAL_0_OPCODE_X0 = 86, - SNEH_SPECIAL_0_OPCODE_X1 = 56, - SNE_SPECIAL_0_OPCODE_X0 = 87, - SNE_SPECIAL_0_OPCODE_X1 = 57, - SNE_SPECIAL_5_OPCODE_Y0 = 3, - SNE_SPECIAL_5_OPCODE_Y1 = 3, - SPECIAL_0_OPCODE_X0 = 0, - SPECIAL_0_OPCODE_X1 = 1, - SPECIAL_0_OPCODE_Y0 = 1, - SPECIAL_0_OPCODE_Y1 = 1, - SPECIAL_1_OPCODE_Y0 = 2, - SPECIAL_1_OPCODE_Y1 = 2, - SPECIAL_2_OPCODE_Y0 = 3, - SPECIAL_2_OPCODE_Y1 = 3, - SPECIAL_3_OPCODE_Y0 = 4, - SPECIAL_3_OPCODE_Y1 = 4, - SPECIAL_4_OPCODE_Y0 = 5, - SPECIAL_4_OPCODE_Y1 = 5, - SPECIAL_5_OPCODE_Y0 = 6, - SPECIAL_5_OPCODE_Y1 = 6, - SPECIAL_6_OPCODE_Y0 = 7, - SPECIAL_7_OPCODE_Y0 = 8, - SRAB_SPECIAL_0_OPCODE_X0 = 88, - SRAB_SPECIAL_0_OPCODE_X1 = 58, - SRAH_SPECIAL_0_OPCODE_X0 = 89, - SRAH_SPECIAL_0_OPCODE_X1 = 59, - SRAIB_SHUN_0_OPCODE_X0 = 8, - SRAIB_SHUN_0_OPCODE_X1 = 8, - SRAIH_SHUN_0_OPCODE_X0 = 9, - SRAIH_SHUN_0_OPCODE_X1 = 9, - SRAI_SHUN_0_OPCODE_X0 = 10, - SRAI_SHUN_0_OPCODE_X1 = 10, - SRAI_SHUN_0_OPCODE_Y0 = 4, - SRAI_SHUN_0_OPCODE_Y1 = 4, - SRA_SPECIAL_0_OPCODE_X0 = 90, - SRA_SPECIAL_0_OPCODE_X1 = 60, - SRA_SPECIAL_3_OPCODE_Y0 = 3, - SRA_SPECIAL_3_OPCODE_Y1 = 3, - SUBBS_U_SPECIAL_0_OPCODE_X0 = 100, - SUBBS_U_SPECIAL_0_OPCODE_X1 = 70, - SUBB_SPECIAL_0_OPCODE_X0 = 91, - SUBB_SPECIAL_0_OPCODE_X1 = 61, - SUBHS_SPECIAL_0_OPCODE_X0 = 101, - SUBHS_SPECIAL_0_OPCODE_X1 = 71, - SUBH_SPECIAL_0_OPCODE_X0 = 92, - SUBH_SPECIAL_0_OPCODE_X1 = 62, - SUBS_SPECIAL_0_OPCODE_X0 = 97, - SUBS_SPECIAL_0_OPCODE_X1 = 67, - SUB_SPECIAL_0_OPCODE_X0 = 93, - SUB_SPECIAL_0_OPCODE_X1 = 63, - SUB_SPECIAL_0_OPCODE_Y0 = 3, - SUB_SPECIAL_0_OPCODE_Y1 = 3, - SWADD_IMM_0_OPCODE_X1 = 30, - SWINT0_UN_0_SHUN_0_OPCODE_X1 = 18, - SWINT1_UN_0_SHUN_0_OPCODE_X1 = 19, - SWINT2_UN_0_SHUN_0_OPCODE_X1 = 20, - SWINT3_UN_0_SHUN_0_OPCODE_X1 = 21, - SW_OPCODE_Y2 = 7, - SW_SPECIAL_0_OPCODE_X1 = 64, - TBLIDXB0_UN_0_SHUN_0_OPCODE_X0 = 8, - TBLIDXB0_UN_0_SHUN_0_OPCODE_Y0 = 8, - TBLIDXB1_UN_0_SHUN_0_OPCODE_X0 = 9, - TBLIDXB1_UN_0_SHUN_0_OPCODE_Y0 = 9, - TBLIDXB2_UN_0_SHUN_0_OPCODE_X0 = 10, - TBLIDXB2_UN_0_SHUN_0_OPCODE_Y0 = 10, - TBLIDXB3_UN_0_SHUN_0_OPCODE_X0 = 11, - TBLIDXB3_UN_0_SHUN_0_OPCODE_Y0 = 11, - TNS_UN_0_SHUN_0_OPCODE_X1 = 22, - UN_0_SHUN_0_OPCODE_X0 = 11, - UN_0_SHUN_0_OPCODE_X1 = 11, - UN_0_SHUN_0_OPCODE_Y0 = 5, - UN_0_SHUN_0_OPCODE_Y1 = 5, - WH64_UN_0_SHUN_0_OPCODE_X1 = 23, - XORI_IMM_0_OPCODE_X0 = 2, - XORI_IMM_0_OPCODE_X1 = 21, - XOR_SPECIAL_0_OPCODE_X0 = 94, - XOR_SPECIAL_0_OPCODE_X1 = 65, - XOR_SPECIAL_2_OPCODE_Y0 = 3, - XOR_SPECIAL_2_OPCODE_Y1 = 3 -}; - -#endif /* !_TILE_OPCODE_CONSTANTS_H */ diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index 3eb53525bf9..67276800861 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -16,11 +16,21 @@ #define _ASM_TILE_PAGE_H #include <linux/const.h> -#include <hv/pagesize.h> +#include <hv/hypervisor.h> +#include <arch/chip.h> /* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */ -#define PAGE_SHIFT HV_LOG2_PAGE_SIZE_SMALL -#define HPAGE_SHIFT HV_LOG2_PAGE_SIZE_LARGE +#if defined(CONFIG_PAGE_SIZE_16KB) +#define PAGE_SHIFT 14 +#define CTX_PAGE_FLAG HV_CTX_PG_SM_16K +#elif defined(CONFIG_PAGE_SIZE_64KB) +#define PAGE_SHIFT 16 +#define CTX_PAGE_FLAG HV_CTX_PG_SM_64K +#else +#define PAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_SMALL +#define CTX_PAGE_FLAG 0 +#endif +#define HPAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_LARGE #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) @@ -28,7 +38,11 @@ #define PAGE_MASK (~(PAGE_SIZE - 1)) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) -#ifdef __KERNEL__ +/* + * We do define AT_SYSINFO_EHDR to support vDSO, + * but don't use the gate mechanism. + */ +#define __HAVE_ARCH_GATE_AREA 1 /* * If the Kconfig doesn't specify, set a maximum zone order that @@ -39,9 +53,6 @@ #define CONFIG_FORCE_MAX_ZONEORDER (HPAGE_SHIFT - PAGE_SHIFT + 1) #endif -#include <hv/hypervisor.h> -#include <arch/chip.h> - #ifndef __ASSEMBLY__ #include <linux/types.h> @@ -82,8 +93,7 @@ typedef HV_PTE pgprot_t; /* * User L2 page tables are managed as one L2 page table per page, * because we use the page allocator for them. This keeps the allocation - * simple and makes it potentially useful to implement HIGHPTE at some point. - * However, it's also inefficient, since L2 page tables are much smaller + * simple, but it's also inefficient, since L2 page tables are much smaller * than pages (currently 2KB vs 64KB). So we should revisit this. */ typedef struct page *pgtable_t; @@ -91,6 +101,10 @@ typedef struct page *pgtable_t; /* Must be a macro since it is used to create constants. */ #define __pgprot(val) hv_pte(val) +/* Rarely-used initializers, typically with a "zero" value. */ +#define __pte(x) hv_pte(x) +#define __pgd(x) hv_pte(x) + static inline u64 pgprot_val(pgprot_t pgprot) { return hv_pte_val(pgprot); @@ -110,6 +124,8 @@ static inline u64 pgd_val(pgd_t pgd) typedef HV_PTE pmd_t; +#define __pmd(x) hv_pte(x) + static inline u64 pmd_val(pmd_t pmd) { return hv_pte_val(pmd); @@ -126,14 +142,18 @@ static inline __attribute_const__ int get_order(unsigned long size) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#define HUGE_MAX_HSTATE 2 +#define HUGE_MAX_HSTATE 6 #ifdef CONFIG_HUGETLB_PAGE #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif +/* Allow overriding how much VA or PA the kernel will use. */ +#define MAX_PA_WIDTH CHIP_PA_WIDTH() +#define MAX_VA_WIDTH CHIP_VA_WIDTH() + /* Each memory controller has PAs distinct in their high bits. */ -#define NR_PA_HIGHBIT_SHIFT (CHIP_PA_WIDTH() - CHIP_LOG_NUM_MSHIMS()) +#define NR_PA_HIGHBIT_SHIFT (MAX_PA_WIDTH - CHIP_LOG_NUM_MSHIMS()) #define NR_PA_HIGHBIT_VALUES (1 << CHIP_LOG_NUM_MSHIMS()) #define __pa_to_highbits(pa) ((phys_addr_t)(pa) >> NR_PA_HIGHBIT_SHIFT) #define __pfn_to_highbits(pfn) ((pfn) >> (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT)) @@ -144,7 +164,7 @@ static inline __attribute_const__ int get_order(unsigned long size) * We reserve the lower half of memory for user-space programs, and the * upper half for system code. We re-map all of physical memory in the * upper half, which takes a quarter of our VA space. Then we have - * the vmalloc regions. The supervisor code lives at 0xfffffff700000000, + * the vmalloc regions. The supervisor code lives at the highest address, * with the hypervisor above that. * * Loadable kernel modules are placed immediately after the static @@ -156,27 +176,18 @@ static inline __attribute_const__ int get_order(unsigned long size) * Similarly, for now we don't play any struct page mapping games. */ -#if CHIP_PA_WIDTH() + 2 > CHIP_VA_WIDTH() +#if MAX_PA_WIDTH + 2 > MAX_VA_WIDTH # error Too much PA to map with the VA available! #endif -#define HALF_VA_SPACE (_AC(1, UL) << (CHIP_VA_WIDTH() - 1)) - -#define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */ -#define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */ -#define PAGE_OFFSET MEM_HIGH_START -#define _VMALLOC_START _AC(0xfffffff500000000, UL) /* 4 GB */ -#define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */ -#define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */ -#define MEM_SV_INTRPT MEM_SV_START -#define MEM_MODULE_START _AC(0xfffffff710000000, UL) /* 256 MB */ -#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024)) -#define MEM_HV_START _AC(0xfffffff800000000, UL) /* 32 GB */ -/* Highest DTLB address we will use */ -#define KERNEL_HIGH_VADDR MEM_SV_START - -/* Since we don't currently provide any fixmaps, we use an impossible VA. */ -#define FIXADDR_TOP MEM_HV_START +#define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1))) +#define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */ +#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */ +#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */ +#define _VMALLOC_START FIXADDR_TOP +#define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */ +#define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */ +#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024)) #else /* !__tilegx__ */ @@ -198,25 +209,18 @@ static inline __attribute_const__ int get_order(unsigned long size) * values, and after that, we show "typical" values, since the actual * addresses depend on kernel #defines. * - * MEM_HV_INTRPT 0xfe000000 - * MEM_SV_INTRPT (kernel code) 0xfd000000 + * MEM_HV_START 0xfe000000 + * MEM_SV_START (kernel code) 0xfd000000 * MEM_USER_INTRPT (user vector) 0xfc000000 - * FIX_KMAP_xxx 0xf8000000 (via NR_CPUS * KM_TYPE_NR) - * PKMAP_BASE 0xf7000000 (via LAST_PKMAP) - * HUGE_VMAP 0xf3000000 (via CONFIG_NR_HUGE_VMAPS) - * VMALLOC_START 0xf0000000 (via __VMALLOC_RESERVE) + * FIX_KMAP_xxx 0xfa000000 (via NR_CPUS * KM_TYPE_NR) + * PKMAP_BASE 0xf9000000 (via LAST_PKMAP) + * VMALLOC_START 0xf7000000 (via VMALLOC_RESERVE) * mapped LOWMEM 0xc0000000 */ #define MEM_USER_INTRPT _AC(0xfc000000, UL) -#if CONFIG_KERNEL_PL == 1 -#define MEM_SV_INTRPT _AC(0xfd000000, UL) -#define MEM_HV_INTRPT _AC(0xfe000000, UL) -#else -#define MEM_GUEST_INTRPT _AC(0xfd000000, UL) -#define MEM_SV_INTRPT _AC(0xfe000000, UL) -#define MEM_HV_INTRPT _AC(0xff000000, UL) -#endif +#define MEM_SV_START _AC(0xfd000000, UL) +#define MEM_HV_START _AC(0xfe000000, UL) #define INTRPT_SIZE 0x4000 @@ -237,7 +241,7 @@ static inline __attribute_const__ int get_order(unsigned long size) #endif /* __tilegx__ */ -#ifndef __ASSEMBLY__ +#if !defined(__ASSEMBLY__) && !defined(VDSO_BUILD) #ifdef CONFIG_HIGHMEM @@ -318,11 +322,12 @@ static inline int pfn_valid(unsigned long pfn) /* Provide as macros since these require some other headers included. */ #define page_to_pa(page) ((phys_addr_t)(page_to_pfn(page)) << PAGE_SHIFT) -#define virt_to_page(kaddr) pfn_to_page(kaddr_to_pfn(kaddr)) +#define virt_to_page(kaddr) pfn_to_page(kaddr_to_pfn((void *)(kaddr))) #define page_to_virt(page) pfn_to_kaddr(page_to_pfn(page)) struct mm_struct; extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); +extern pte_t *virt_to_kpte(unsigned long kaddr); #endif /* !__ASSEMBLY__ */ @@ -331,6 +336,4 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); #include <asm-generic/memory_model.h> -#endif /* __KERNEL__ */ - #endif /* _ASM_TILE_PAGE_H */ diff --git a/arch/tile/include/asm/param.h b/arch/tile/include/asm/param.h deleted file mode 100644 index 965d4542797..00000000000 --- a/arch/tile/include/asm/param.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/param.h> diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index c3fc458a0d3..dfedd7ac729 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -15,7 +15,11 @@ #ifndef _ASM_TILE_PCI_H #define _ASM_TILE_PCI_H +#include <linux/dma-mapping.h> #include <linux/pci.h> +#include <asm-generic/pci_iomap.h> + +#ifndef __tilegx__ /* * Structure of a PCI controller (host bridge) @@ -24,7 +28,6 @@ struct pci_controller { int index; /* PCI domain number */ struct pci_bus *root_bus; - int first_busno; int last_busno; int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */ @@ -40,20 +43,161 @@ struct pci_controller { }; /* + * This flag tells if the platform is TILEmpower that needs + * special configuration for the PLX switch chip. + */ +extern int tile_plx_gen1; + +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} + +#define TILE_NUM_PCIE 2 + +/* * The hypervisor maps the entirety of CPA-space as bus addresses, so * bus addresses are physical addresses. The networking and block * device layers use this boolean for bounce buffer decisions. */ #define PCI_DMA_BUS_IS_PHYS 1 -int __init tile_pci_init(void); +/* generic pci stuff */ +#include <asm-generic/pci.h> -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); -static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} +#else -void __devinit pcibios_fixup_bus(struct pci_bus *bus); +#include <asm/page.h> +#include <gxio/trio.h> -#define TILE_NUM_PCIE 2 +/** + * We reserve the hugepage-size address range at the top of the 64-bit address + * space to serve as the PCI window, emulating the BAR0 space of an endpoint + * device. This window is used by the chip-to-chip applications running on + * the RC node. The reason for carving out this window is that Mem-Maps that + * back up this window will not overlap with those that map the real physical + * memory. + */ +#define PCIE_HOST_BAR0_SIZE HPAGE_SIZE +#define PCIE_HOST_BAR0_START HPAGE_MASK + +/** + * The first PAGE_SIZE of the above "BAR" window is mapped to the + * gxpci_host_regs structure. + */ +#define PCIE_HOST_REGS_SIZE PAGE_SIZE + +/* + * This is the PCI address where the Mem-Map interrupt regions start. + * We use the 2nd to the last huge page of the 64-bit address space. + * The last huge page is used for the rootcomplex "bar", for C2C purpose. + */ +#define MEM_MAP_INTR_REGIONS_BASE (HPAGE_MASK - HPAGE_SIZE) + +/* + * Each Mem-Map interrupt region occupies 4KB. + */ +#define MEM_MAP_INTR_REGION_SIZE (1 << TRIO_MAP_MEM_LIM__ADDR_SHIFT) + +/* + * Allocate the PCI BAR window right below 4GB. + */ +#define TILE_PCI_BAR_WINDOW_TOP (1ULL << 32) + +/* + * Allocate 1GB for the PCI BAR window. + */ +#define TILE_PCI_BAR_WINDOW_SIZE (1 << 30) + +/* + * This is the highest bus address targeting the host memory that + * can be generated by legacy PCI devices with 32-bit or less + * DMA capability, dictated by the BAR window size and location. + */ +#define TILE_PCI_MAX_DIRECT_DMA_ADDRESS \ + (TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE - 1) + +/* + * We shift the PCI bus range for all the physical memory up by the whole PA + * range. The corresponding CPA of an incoming PCI request will be the PCI + * address minus TILE_PCI_MEM_MAP_BASE_OFFSET. This also implies + * that the 64-bit capable devices will be given DMA addresses as + * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit + * devices, we create a separate map region that handles the low + * 4GB. + * + * This design lets us avoid the "PCI hole" problem where the host bridge + * won't pass DMA traffic with target addresses that happen to fall within the + * BAR space. This enables us to use all the physical memory for DMA, instead + * of wasting the same amount of physical memory as the BAR window size. + */ +#define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH()) + +/* + * Start of the PCI memory resource, which starts at the end of the + * maximum system physical RAM address. + */ +#define TILE_PCI_MEM_START (1ULL << CHIP_PA_WIDTH()) + +/* + * Structure of a PCI controller (host bridge) on Gx. + */ +struct pci_controller { + + /* Pointer back to the TRIO that this PCIe port is connected to. */ + gxio_trio_context_t *trio; + int mac; /* PCIe mac index on the TRIO shim */ + int trio_index; /* Index of TRIO shim that contains the MAC. */ + + int pio_mem_index; /* PIO region index for memory access */ + +#ifdef CONFIG_TILE_PCI_IO + int pio_io_index; /* PIO region index for I/O space access */ +#endif + + /* + * Mem-Map regions for all the memory controllers so that Linux can + * map all of its physical memory space to the PCI bus. + */ + int mem_maps[MAX_NUMNODES]; + + int index; /* PCI domain number */ + struct pci_bus *root_bus; + + /* PCI I/O space resource for this controller. */ + struct resource io_space; + char io_space_name[32]; + + /* PCI memory space resource for this controller. */ + struct resource mem_space; + char mem_space_name[32]; + + uint64_t mem_offset; /* cpu->bus memory mapping offset. */ + + int first_busno; + + struct pci_ops *ops; + + /* Table that maps the INTx numbers to Linux irq numbers. */ + int irq_intx_table[4]; +}; + +extern struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES]; +extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO]; +extern int num_trio_shims; + +extern void pci_iounmap(struct pci_dev *dev, void __iomem *); + +/* + * The PCI address space does not equal the physical memory address + * space (we have an IOMMU). The IDE and SCSI device layers use this + * boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS 0 + +#endif /* __tilegx__ */ + +int __init tile_pci_init(void); +int __init pcibios_init(void); + +void pcibios_fixup_bus(struct pci_bus *bus); #define pci_domain_nr(bus) (((struct pci_controller *)(bus)->sysdata)->index) @@ -75,21 +219,9 @@ static inline int pcibios_assign_all_busses(void) return 1; } -/* - * No special bus mastering setup handling. - */ -static inline void pcibios_set_master(struct pci_dev *dev) -{ -} - #define PCIBIOS_MIN_MEM 0 -#define PCIBIOS_MIN_IO 0 - -/* - * This flag tells if the platform is TILEmpower that needs - * special configuration for the PLX switch chip. - */ -extern int tile_plx_gen1; +/* Minimum PCI I/O address, starting at the page boundary. */ +#define PCIBIOS_MIN_IO PAGE_SIZE /* Use any cpu for PCI. */ #define cpumask_of_pcibus(bus) cpu_online_mask @@ -97,7 +229,4 @@ extern int tile_plx_gen1; /* implement the pci_ DMA API in terms of the generic device dma_ one */ #include <asm-generic/pci-dma-compat.h> -/* generic pci stuff */ -#include <asm-generic/pci.h> - #endif /* _ASM_TILE_PCI_H */ diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h index 63294f5a8ef..4f7ae39fa20 100644 --- a/arch/tile/include/asm/percpu.h +++ b/arch/tile/include/asm/percpu.h @@ -15,9 +15,37 @@ #ifndef _ASM_TILE_PERCPU_H #define _ASM_TILE_PERCPU_H -register unsigned long __my_cpu_offset __asm__("tp"); -#define __my_cpu_offset __my_cpu_offset -#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp)) +register unsigned long my_cpu_offset_reg asm("tp"); + +#ifdef CONFIG_PREEMPT +/* + * For full preemption, we can't just use the register variable + * directly, since we need barrier() to hazard against it, causing the + * compiler to reload anything computed from a previous "tp" value. + * But we also don't want to use volatile asm, since we'd like the + * compiler to be able to cache the value across multiple percpu reads. + * So we use a fake stack read as a hazard against barrier(). + * The 'U' constraint is like 'm' but disallows postincrement. + */ +static inline unsigned long __my_cpu_offset(void) +{ + unsigned long tp; + register unsigned long *sp asm("sp"); + asm("move %0, tp" : "=r" (tp) : "U" (*sp)); + return tp; +} +#define __my_cpu_offset __my_cpu_offset() +#else +/* + * We don't need to hazard against barrier() since "tp" doesn't ever + * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only + * changes at function call points, at which we are already re-reading + * the value of "tp" due to "my_cpu_offset_reg" being a global variable. + */ +#define __my_cpu_offset my_cpu_offset_reg +#endif + +#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp)) #include <asm-generic/percpu.h> diff --git a/arch/tile/include/asm/perf_event.h b/arch/tile/include/asm/perf_event.h new file mode 100644 index 00000000000..59c5b164e5b --- /dev/null +++ b/arch/tile/include/asm/perf_event.h @@ -0,0 +1,22 @@ +/* + * Copyright 2014 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_PERF_EVENT_H +#define _ASM_TILE_PERF_EVENT_H + +#include <linux/percpu.h> +DECLARE_PER_CPU(u64, perf_irqs); + +unsigned long handle_syscall_link_address(void); +#endif /* _ASM_TILE_PERF_EVENT_H */ diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h index e919c0bdc22..1b902508b66 100644 --- a/arch/tile/include/asm/pgalloc.h +++ b/arch/tile/include/asm/pgalloc.h @@ -19,24 +19,24 @@ #include <linux/mm.h> #include <linux/mmzone.h> #include <asm/fixmap.h> +#include <asm/page.h> #include <hv/hypervisor.h> /* Bits for the size of the second-level page table. */ -#define L2_KERNEL_PGTABLE_SHIFT \ - (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL + HV_LOG2_PTE_SIZE) +#define L2_KERNEL_PGTABLE_SHIFT _HV_LOG2_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT) + +/* How big is a kernel L2 page table? */ +#define L2_KERNEL_PGTABLE_SIZE (1UL << L2_KERNEL_PGTABLE_SHIFT) /* We currently allocate user L2 page tables by page (unlike kernel L2s). */ -#if L2_KERNEL_PGTABLE_SHIFT < HV_LOG2_PAGE_SIZE_SMALL -#define L2_USER_PGTABLE_SHIFT HV_LOG2_PAGE_SIZE_SMALL +#if L2_KERNEL_PGTABLE_SHIFT < PAGE_SHIFT +#define L2_USER_PGTABLE_SHIFT PAGE_SHIFT #else #define L2_USER_PGTABLE_SHIFT L2_KERNEL_PGTABLE_SHIFT #endif /* How many pages do we need, as an "order", for a user L2 page table? */ -#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - HV_LOG2_PAGE_SIZE_SMALL) - -/* How big is a kernel L2 page table? */ -#define L2_KERNEL_PGTABLE_SIZE (1 << L2_KERNEL_PGTABLE_SHIFT) +#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - PAGE_SHIFT) static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { @@ -50,14 +50,14 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *ptep) { - set_pmd(pmd, ptfn_pmd(__pa(ptep) >> HV_LOG2_PAGE_TABLE_ALIGN, + set_pmd(pmd, ptfn_pmd(HV_CPA_TO_PTFN(__pa(ptep)), __pgprot(_PAGE_PRESENT))); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page) { - set_pmd(pmd, ptfn_pmd(HV_PFN_TO_PTFN(page_to_pfn(page)), + set_pmd(pmd, ptfn_pmd(HV_CPA_TO_PTFN(PFN_PHYS(page_to_pfn(page))), __pgprot(_PAGE_PRESENT))); } @@ -68,8 +68,20 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address); -extern void pte_free(struct mm_struct *mm, struct page *pte); +extern pgtable_t pgtable_alloc_one(struct mm_struct *mm, unsigned long address, + int order); +extern void pgtable_free(struct mm_struct *mm, struct page *pte, int order); + +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + return pgtable_alloc_one(mm, address, L2_USER_PGTABLE_ORDER); +} + +static inline void pte_free(struct mm_struct *mm, struct page *pte) +{ + pgtable_free(mm, pte, L2_USER_PGTABLE_ORDER); +} #define pmd_pgtable(pmd) pmd_page(pmd) @@ -85,8 +97,13 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) pte_free(mm, virt_to_page(pte)); } -extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, - unsigned long address); +extern void __pgtable_free_tlb(struct mmu_gather *tlb, struct page *pte, + unsigned long address, int order); +static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, + unsigned long address) +{ + __pgtable_free_tlb(tlb, pte, address, L2_USER_PGTABLE_ORDER); +} #define check_pgt_cache() do { } while (0) @@ -104,19 +121,44 @@ void shatter_pmd(pmd_t *pmd); void shatter_huge_page(unsigned long addr); #ifdef __tilegx__ -/* We share a single page allocator for both L1 and L2 page tables. */ -#if HV_L1_SIZE != HV_L2_SIZE -# error Rework assumption that L1 and L2 page tables are same size. -#endif -#define L1_USER_PGTABLE_ORDER L2_USER_PGTABLE_ORDER + #define pud_populate(mm, pud, pmd) \ pmd_populate_kernel((mm), (pmd_t *)(pud), (pte_t *)(pmd)) -#define pmd_alloc_one(mm, addr) \ - ((pmd_t *)page_to_virt(pte_alloc_one((mm), (addr)))) -#define pmd_free(mm, pmdp) \ - pte_free((mm), virt_to_page(pmdp)) -#define __pmd_free_tlb(tlb, pmdp, address) \ - __pte_free_tlb((tlb), virt_to_page(pmdp), (address)) + +/* Bits for the size of the L1 (intermediate) page table. */ +#define L1_KERNEL_PGTABLE_SHIFT _HV_LOG2_L1_SIZE(HPAGE_SHIFT) + +/* How big is a kernel L2 page table? */ +#define L1_KERNEL_PGTABLE_SIZE (1UL << L1_KERNEL_PGTABLE_SHIFT) + +/* We currently allocate L1 page tables by page. */ +#if L1_KERNEL_PGTABLE_SHIFT < PAGE_SHIFT +#define L1_USER_PGTABLE_SHIFT PAGE_SHIFT +#else +#define L1_USER_PGTABLE_SHIFT L1_KERNEL_PGTABLE_SHIFT #endif +/* How many pages do we need, as an "order", for an L1 page table? */ +#define L1_USER_PGTABLE_ORDER (L1_USER_PGTABLE_SHIFT - PAGE_SHIFT) + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *p = pgtable_alloc_one(mm, address, L1_USER_PGTABLE_ORDER); + return (pmd_t *)page_to_virt(p); +} + +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp) +{ + pgtable_free(mm, virt_to_page(pmdp), L1_USER_PGTABLE_ORDER); +} + +static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, + unsigned long address) +{ + __pgtable_free_tlb(tlb, virt_to_page(pmdp), address, + L1_USER_PGTABLE_ORDER); +} + +#endif /* __tilegx__ */ + #endif /* _ASM_TILE_PGALLOC_H */ diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index 1a20b7ef8ea..33587f16c15 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -27,9 +27,10 @@ #include <linux/slab.h> #include <linux/list.h> #include <linux/spinlock.h> +#include <linux/pfn.h> #include <asm/processor.h> #include <asm/fixmap.h> -#include <asm/system.h> +#include <asm/page.h> struct mm_struct; struct vm_area_struct; @@ -70,6 +71,7 @@ extern void set_page_homes(void); #define _PAGE_PRESENT HV_PTE_PRESENT #define _PAGE_HUGE_PAGE HV_PTE_PAGE +#define _PAGE_SUPER_PAGE HV_PTE_SUPER #define _PAGE_READABLE HV_PTE_READABLE #define _PAGE_WRITABLE HV_PTE_WRITABLE #define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE @@ -86,6 +88,7 @@ extern void set_page_homes(void); #define _PAGE_ALL (\ _PAGE_PRESENT | \ _PAGE_HUGE_PAGE | \ + _PAGE_SUPER_PAGE | \ _PAGE_READABLE | \ _PAGE_WRITABLE | \ _PAGE_EXECUTABLE | \ @@ -163,7 +166,7 @@ extern void set_page_homes(void); (pgprot_t) { ((oldprot).val & ~_PAGE_ALL) | (newprot).val } /* Just setting the PFN to zero suffices. */ -#define pte_pgprot(x) hv_pte_set_pfn((x), 0) +#define pte_pgprot(x) hv_pte_set_pa((x), 0) /* * For PTEs and PDEs, we must clear the Present bit first when @@ -188,6 +191,7 @@ static inline void __pte_clear(pte_t *ptep) * Undefined behaviour if not.. */ #define pte_present hv_pte_get_present +#define pte_mknotpresent hv_pte_clear_present #define pte_user hv_pte_get_user #define pte_read hv_pte_get_readable #define pte_dirty hv_pte_get_dirty @@ -195,6 +199,7 @@ static inline void __pte_clear(pte_t *ptep) #define pte_write hv_pte_get_writable #define pte_exec hv_pte_get_executable #define pte_huge hv_pte_get_page +#define pte_super hv_pte_get_super #define pte_rdprotect hv_pte_clear_readable #define pte_exprotect hv_pte_clear_executable #define pte_mkclean hv_pte_clear_dirty @@ -207,6 +212,7 @@ static inline void __pte_clear(pte_t *ptep) #define pte_mkyoung hv_pte_set_accessed #define pte_mkwrite hv_pte_set_writable #define pte_mkhuge hv_pte_set_page +#define pte_mksuper hv_pte_set_super #define pte_special(pte) 0 #define pte_mkspecial(pte) (pte) @@ -262,7 +268,7 @@ static inline int pte_none(pte_t pte) static inline unsigned long pte_pfn(pte_t pte) { - return hv_pte_get_pfn(pte); + return PFN_DOWN(hv_pte_get_pa(pte)); } /* Set or get the remote cache cpu in a pgprot with remote caching. */ @@ -271,7 +277,7 @@ extern int get_remote_cache_cpu(pgprot_t prot); static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) { - return hv_pte_set_pfn(prot, pfn); + return hv_pte_set_pa(prot, PFN_PHYS(pfn)); } /* Support for priority mappings. */ @@ -313,7 +319,7 @@ extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next); */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - return pfn_pte(hv_pte_get_pfn(pte), newprot); + return pfn_pte(pte_pfn(pte), newprot); } /* @@ -336,13 +342,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) -#if defined(CONFIG_HIGHPTE) -extern pte_t *pte_offset_map(pmd_t *, unsigned long address); -#define pte_unmap(pte) kunmap_atomic(pte) -#else #define pte_offset_map(dir, address) pte_offset_kernel(dir, address) #define pte_unmap(pte) do { } while (0) -#endif /* Clear a non-executable kernel PTE and flush it from the TLB. */ #define kpte_clear_flush(ptep, vaddr) \ @@ -361,9 +362,6 @@ do { \ #define kern_addr_valid(addr) (1) #endif /* CONFIG_FLATMEM */ -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - extern void vmalloc_sync_all(void); #endif /* !__ASSEMBLY__ */ @@ -411,6 +409,46 @@ static inline unsigned long pmd_index(unsigned long address) return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); } +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pmd_t *pmdp) +{ + return ptep_test_and_clear_young(vma, address, pmdp_ptep(pmdp)); +} + +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +static inline void pmdp_set_wrprotect(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + ptep_set_wrprotect(mm, address, pmdp_ptep(pmdp)); +} + + +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, + unsigned long address, + pmd_t *pmdp) +{ + return pte_pmd(ptep_get_and_clear(mm, address, pmdp_ptep(pmdp))); +} + +static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + set_pte(pmdp_ptep(pmdp), pmd_pte(pmdval)); +} + +#define set_pmd_at(mm, addr, pmdp, pmdval) __set_pmd(pmdp, pmdval) + +/* Create a pmd from a PTFN. */ +static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) +{ + return pte_pmd(hv_pte_set_ptfn(prot, ptfn)); +} + +/* Return the page-table frame number (ptfn) that a pmd_t points at. */ +#define pmd_ptfn(pmd) hv_pte_get_ptfn(pmd_pte(pmd)) + /* * A given kernel pmd_t maps to a specific virtual address (either a * kernel huge page or a kernel pte_t table). Since kernel pte_t @@ -431,7 +469,48 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) * OK for pte_lockptr(), since we just end up with potentially one * lock being used for several pte_t arrays. */ -#define pmd_page(pmd) pfn_to_page(HV_PTFN_TO_PFN(pmd_ptfn(pmd))) +#define pmd_page(pmd) pfn_to_page(PFN_DOWN(HV_PTFN_TO_CPA(pmd_ptfn(pmd)))) + +static inline void pmd_clear(pmd_t *pmdp) +{ + __pte_clear(pmdp_ptep(pmdp)); +} + +#define pmd_mknotpresent(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd))) +#define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) +#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) +#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd)) +#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) +#define __HAVE_ARCH_PMD_WRITE + +#define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot))) +#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) +#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + return pfn_pmd(pmd_pfn(pmd), newprot); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define has_transparent_hugepage() 1 +#define pmd_trans_huge pmd_huge_page + +static inline pmd_t pmd_mksplitting(pmd_t pmd) +{ + return pte_pmd(hv_pte_set_client2(pmd_pte(pmd))); +} + +static inline int pmd_trans_splitting(pmd_t pmd) +{ + return hv_pte_get_client2(pmd_pte(pmd)); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* * The pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] @@ -449,17 +528,13 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); } -static inline int pmd_huge_page(pmd_t pmd) -{ - return pmd_val(pmd) & _PAGE_HUGE_PAGE; -} - #include <asm-generic/pgtable.h> /* Support /proc/NN/pgtable API. */ struct seq_file; int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm, - unsigned long vaddr, pte_t *ptep, void **datap); + unsigned long vaddr, unsigned long pagesize, + pte_t *ptep, void **datap); #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index 9f98529761f..d26a4227903 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -20,11 +20,12 @@ * The level-1 index is defined by the huge page size. A PGD is composed * of PTRS_PER_PGD pgd_t's and is the top level of the page table. */ -#define PGDIR_SHIFT HV_LOG2_PAGE_SIZE_LARGE -#define PGDIR_SIZE HV_PAGE_SIZE_LARGE +#define PGDIR_SHIFT HPAGE_SHIFT +#define PGDIR_SIZE HPAGE_SIZE #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) -#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t)) +#define PTRS_PER_PGD _HV_L1_ENTRIES(HPAGE_SHIFT) +#define PGD_INDEX(va) _HV_L1_INDEX(va, HPAGE_SHIFT) +#define SIZEOF_PGD _HV_L1_SIZE(HPAGE_SHIFT) /* * The level-2 index is defined by the difference between the huge @@ -33,8 +34,9 @@ * Note that the hypervisor docs use PTE for what we call pte_t, so * this nomenclature is somewhat confusing. */ -#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) -#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t)) +#define PTRS_PER_PTE _HV_L2_ENTRIES(HPAGE_SHIFT, PAGE_SHIFT) +#define PTE_INDEX(va) _HV_L2_INDEX(va, HPAGE_SHIFT, PAGE_SHIFT) +#define SIZEOF_PTE _HV_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT) #ifndef __ASSEMBLY__ @@ -53,17 +55,9 @@ #define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK) #ifdef CONFIG_HIGHMEM -# define __VMAPPING_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) +# define _VMALLOC_END (PKMAP_BASE & ~(HPAGE_SIZE-1)) #else -# define __VMAPPING_END (FIXADDR_START & ~(HPAGE_SIZE-1)) -#endif - -#ifdef CONFIG_HUGEVMAP -#define HUGE_VMAP_END __VMAPPING_END -#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE) -#define _VMALLOC_END HUGE_VMAP_BASE -#else -#define _VMALLOC_END __VMAPPING_END +# define _VMALLOC_END (FIXADDR_START & ~(HPAGE_SIZE-1)) #endif /* @@ -82,10 +76,12 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */; /* We have no pmd or pud since we are strictly a two-level page table */ #include <asm-generic/pgtable-nopmd.h> +static inline int pud_huge_page(pud_t pud) { return 0; } + /* We don't define any pgds for these addresses. */ static inline int pgd_addr_invalid(unsigned long addr) { - return addr >= MEM_HV_INTRPT; + return addr >= MEM_HV_START; } /* @@ -111,24 +107,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, return pte; } -static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval) -{ - set_pte(&pmdp->pud.pgd, pmdval.pud.pgd); -} - -/* Create a pmd from a PTFN. */ -static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) -{ - return (pmd_t){ { hv_pte_set_ptfn(prot, ptfn) } }; -} - -/* Return the page-table frame number (ptfn) that a pmd_t points at. */ -#define pmd_ptfn(pmd) hv_pte_get_ptfn((pmd).pud.pgd) - -static inline void pmd_clear(pmd_t *pmdp) -{ - __pte_clear(&pmdp->pud.pgd); -} +/* + * pmds are wrappers around pgds, which are the same as ptes. + * It's often convenient to "cast" back and forth and use the pte methods, + * which are the methods supplied by the hypervisor. + */ +#define pmd_pte(pmd) ((pmd).pud.pgd) +#define pmdp_ptep(pmdp) (&(pmdp)->pud.pgd) +#define pte_pmd(pte) ((pmd_t){ { (pte) } }) #endif /* __ASSEMBLY__ */ diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h new file mode 100644 index 00000000000..2c8a9cd102d --- /dev/null +++ b/arch/tile/include/asm/pgtable_64.h @@ -0,0 +1,171 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#ifndef _ASM_TILE_PGTABLE_64_H +#define _ASM_TILE_PGTABLE_64_H + +/* The level-0 page table breaks the address space into 32-bit chunks. */ +#define PGDIR_SHIFT HV_LOG2_L1_SPAN +#define PGDIR_SIZE HV_L1_SPAN +#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PTRS_PER_PGD HV_L0_ENTRIES +#define PGD_INDEX(va) HV_L0_INDEX(va) +#define SIZEOF_PGD HV_L0_SIZE + +/* + * The level-1 index is defined by the huge page size. A PMD is composed + * of PTRS_PER_PMD pgd_t's and is the middle level of the page table. + */ +#define PMD_SHIFT HPAGE_SHIFT +#define PMD_SIZE HPAGE_SIZE +#define PMD_MASK (~(PMD_SIZE-1)) +#define PTRS_PER_PMD _HV_L1_ENTRIES(HPAGE_SHIFT) +#define PMD_INDEX(va) _HV_L1_INDEX(va, HPAGE_SHIFT) +#define SIZEOF_PMD _HV_L1_SIZE(HPAGE_SHIFT) + +/* + * The level-2 index is defined by the difference between the huge + * page size and the normal page size. A PTE is composed of + * PTRS_PER_PTE pte_t's and is the bottom level of the page table. + * Note that the hypervisor docs use PTE for what we call pte_t, so + * this nomenclature is somewhat confusing. + */ +#define PTRS_PER_PTE _HV_L2_ENTRIES(HPAGE_SHIFT, PAGE_SHIFT) +#define PTE_INDEX(va) _HV_L2_INDEX(va, HPAGE_SHIFT, PAGE_SHIFT) +#define SIZEOF_PTE _HV_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT) + +/* + * Align the vmalloc area to an L2 page table. Omit guard pages at + * the beginning and end for simplicity (particularly in the per-cpu + * memory allocation code). The vmalloc code puts in an internal + * guard page between each allocation. + */ +#define _VMALLOC_END MEM_SV_START +#define VMALLOC_END _VMALLOC_END +#define VMALLOC_START _VMALLOC_START + +#ifndef __ASSEMBLY__ + +/* We have no pud since we are a three-level page table. */ +#include <asm-generic/pgtable-nopud.h> + +/* + * pmds are the same as pgds and ptes, so converting is a no-op. + */ +#define pmd_pte(pmd) (pmd) +#define pmdp_ptep(pmdp) (pmdp) +#define pte_pmd(pte) (pte) + +#define pud_pte(pud) ((pud).pgd) + +static inline int pud_none(pud_t pud) +{ + return pud_val(pud) == 0; +} + +static inline int pud_present(pud_t pud) +{ + return pud_val(pud) & _PAGE_PRESENT; +} + +static inline int pud_huge_page(pud_t pud) +{ + return pud_val(pud) & _PAGE_HUGE_PAGE; +} + +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e)) + +static inline void pud_clear(pud_t *pudp) +{ + __pte_clear(&pudp->pgd); +} + +static inline int pud_bad(pud_t pud) +{ + return ((pud_val(pud) & _PAGE_ALL) != _PAGE_TABLE); +} + +/* Return the page-table frame number (ptfn) that a pud_t points at. */ +#define pud_ptfn(pud) hv_pte_get_ptfn((pud).pgd) + +/* Return the page frame number (pfn) that a pud_t points at. */ +#define pud_pfn(pud) pte_pfn(pud_pte(pud)) + +/* + * A given kernel pud_t maps to a kernel pmd_t table at a specific + * virtual address. Since kernel pmd_t tables can be aligned at + * sub-page granularity, this macro can return non-page-aligned + * pointers, despite its name. + */ +#define pud_page_vaddr(pud) \ + (__va((phys_addr_t)pud_ptfn(pud) << HV_LOG2_PAGE_TABLE_ALIGN)) + +/* + * A pud_t points to a pmd_t array. Since we can have multiple per + * page, we don't have a one-to-one mapping of pud_t's to pages. + */ +#define pud_page(pud) pfn_to_page(PFN_DOWN(HV_PTFN_TO_CPA(pud_ptfn(pud)))) + +static inline unsigned long pud_index(unsigned long address) +{ + return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); +} + +#define pmd_offset(pud, address) \ + ((pmd_t *)pud_page_vaddr(*(pud)) + pmd_index(address)) + +/* Normalize an address to having the correct high bits set. */ +#define pgd_addr_normalize pgd_addr_normalize +static inline unsigned long pgd_addr_normalize(unsigned long addr) +{ + return ((long)addr << (CHIP_WORD_SIZE() - CHIP_VA_WIDTH())) >> + (CHIP_WORD_SIZE() - CHIP_VA_WIDTH()); +} + +/* We don't define any pgds for these addresses. */ +static inline int pgd_addr_invalid(unsigned long addr) +{ + return addr >= KERNEL_HIGH_VADDR || addr != pgd_addr_normalize(addr); +} + +/* + * Use atomic instructions to provide atomicity against the hypervisor. + */ +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + return (__insn_fetchand(&ptep->val, ~HV_PTE_ACCESSED) >> + HV_PTE_INDEX_ACCESSED) & 0x1; +} + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + __insn_fetchand(&ptep->val, ~HV_PTE_WRITABLE); +} + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return hv_pte(__insn_exch(&ptep->val, 0UL)); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_TILE_PGTABLE_64_H */ diff --git a/arch/tile/include/asm/pmc.h b/arch/tile/include/asm/pmc.h new file mode 100644 index 00000000000..7ae3956d900 --- /dev/null +++ b/arch/tile/include/asm/pmc.h @@ -0,0 +1,64 @@ +/* + * Copyright 2014 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_PMC_H +#define _ASM_TILE_PMC_H + +#include <linux/ptrace.h> + +#define TILE_BASE_COUNTERS 2 + +/* Bitfields below are derived from SPR PERF_COUNT_CTL*/ +#ifndef __tilegx__ +/* PERF_COUNT_CTL on TILEPro */ +#define TILE_CTL_EXCL_USER (1 << 7) /* exclude user level */ +#define TILE_CTL_EXCL_KERNEL (1 << 8) /* exclude kernel level */ +#define TILE_CTL_EXCL_HV (1 << 9) /* exclude hypervisor level */ + +#define TILE_SEL_MASK 0x7f /* 7 bits for event SEL, + COUNT_0_SEL */ +#define TILE_PLM_MASK 0x780 /* 4 bits priv level msks, + COUNT_0_MASK*/ +#define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_PLM_MASK) + +#else /* __tilegx__*/ +/* PERF_COUNT_CTL on TILEGx*/ +#define TILE_CTL_EXCL_USER (1 << 10) /* exclude user level */ +#define TILE_CTL_EXCL_KERNEL (1 << 11) /* exclude kernel level */ +#define TILE_CTL_EXCL_HV (1 << 12) /* exclude hypervisor level */ + +#define TILE_SEL_MASK 0x3f /* 6 bits for event SEL, + COUNT_0_SEL*/ +#define TILE_BOX_MASK 0x1c0 /* 3 bits box msks, + COUNT_0_BOX */ +#define TILE_PLM_MASK 0x3c00 /* 4 bits priv level msks, + COUNT_0_MASK */ +#define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_BOX_MASK | TILE_PLM_MASK) +#endif /* __tilegx__*/ + +/* Takes register and fault number. Returns error to disable the interrupt. */ +typedef int (*perf_irq_t)(struct pt_regs *, int); + +int userspace_perf_handler(struct pt_regs *regs, int fault); + +perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq); +void release_pmc_hardware(void); + +unsigned long pmc_get_overflow(void); +void pmc_ack_overflow(unsigned long status); + +void unmask_pmc_interrupts(void); +void mask_pmc_interrupts(void); + +#endif /* _ASM_TILE_PMC_H */ diff --git a/arch/tile/include/asm/poll.h b/arch/tile/include/asm/poll.h deleted file mode 100644 index c98509d3149..00000000000 --- a/arch/tile/include/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/poll.h> diff --git a/arch/tile/include/asm/posix_types.h b/arch/tile/include/asm/posix_types.h deleted file mode 100644 index 22cae6230ce..00000000000 --- a/arch/tile/include/asm/posix_types.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/posix_types.h> diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index e6889474038..42323636c45 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -15,6 +15,8 @@ #ifndef _ASM_TILE_PROCESSOR_H #define _ASM_TILE_PROCESSOR_H +#include <arch/chip.h> + #ifndef __ASSEMBLY__ /* @@ -25,7 +27,6 @@ #include <asm/ptrace.h> #include <asm/percpu.h> -#include <arch/chip.h> #include <arch/spr_def.h> struct task_struct; @@ -76,6 +77,17 @@ struct async_tlb { #ifdef CONFIG_HARDWALL struct hardwall_info; +struct hardwall_task { + /* Which hardwall is this task tied to? (or NULL if none) */ + struct hardwall_info *info; + /* Chains this task into the list at info->task_head. */ + struct list_head list; +}; +#ifdef __tilepro__ +#define HARDWALL_TYPES 1 /* udn */ +#else +#define HARDWALL_TYPES 3 /* udn, idn, and ipi */ +#endif #endif struct thread_struct { @@ -99,47 +111,38 @@ struct thread_struct { unsigned long long interrupt_mask; /* User interrupt-control 0 state */ unsigned long intctrl_0; -#if CHIP_HAS_PROC_STATUS_SPR() + /* Is this task currently doing a backtrace? */ + bool in_backtrace; /* Any other miscellaneous processor state bits */ unsigned long proc_status; -#endif #if !CHIP_HAS_FIXED_INTVEC_BASE() /* Interrupt base for PL0 interrupts */ unsigned long interrupt_vector_base; #endif -#if CHIP_HAS_TILE_RTF_HWM() /* Tile cache retry fifo high-water mark */ unsigned long tile_rtf_hwm; -#endif #if CHIP_HAS_DSTREAM_PF() /* Data stream prefetch control */ unsigned long dstream_pf; #endif #ifdef CONFIG_HARDWALL - /* Is this task tied to an activated hardwall? */ - struct hardwall_info *hardwall; - /* Chains this task into the list at hardwall->list. */ - struct list_head hardwall_list; + /* Hardwall information for various resources. */ + struct hardwall_task hardwall[HARDWALL_TYPES]; #endif #if CHIP_HAS_TILE_DMA() /* Async DMA TLB fault information */ struct async_tlb dma_async_tlb; #endif -#if CHIP_HAS_SN_PROC() - /* Was static network processor when we were switched out? */ - int sn_proc_running; - /* Async SNI TLB fault information */ - struct async_tlb sn_async_tlb; -#endif }; #endif /* !__ASSEMBLY__ */ /* * Start with "sp" this many bytes below the top of the kernel stack. - * This preserves the invariant that a called function may write to *sp. + * This allows us to be cache-aware when handling the initial save + * of the pt_regs value to the stack. */ -#define STACK_TOP_DELTA 8 +#define STACK_TOP_DELTA 64 /* * When entering the kernel via a fault, start with the top of the @@ -155,7 +158,7 @@ struct thread_struct { #ifndef __ASSEMBLY__ #ifdef __tilegx__ -#define TASK_SIZE_MAX (MEM_LOW_END + 1) +#define TASK_SIZE_MAX (_AC(1, UL) << (MAX_VA_WIDTH - 1)) #else #define TASK_SIZE_MAX PAGE_OFFSET #endif @@ -169,10 +172,10 @@ struct thread_struct { #define TASK_SIZE TASK_SIZE_MAX #endif -/* We provide a minimal "vdso" a la x86; just the sigreturn code for now. */ -#define VDSO_BASE (TASK_SIZE - PAGE_SIZE) +#define VDSO_BASE ((unsigned long)current->active_mm->context.vdso_base) +#define VDSO_SYM(x) (VDSO_BASE + (unsigned long)(x)) -#define STACK_TOP VDSO_BASE +#define STACK_TOP TASK_SIZE /* STACK_TOP_MAX is used temporarily in execve and should not check COMPAT. */ #define STACK_TOP_MAX TASK_SIZE_MAX @@ -202,6 +205,7 @@ static inline void start_thread(struct pt_regs *regs, { regs->pc = pc; regs->sp = usp; + single_step_execve(); } /* Free all resources held by a thread. */ @@ -210,33 +214,40 @@ static inline void release_thread(struct task_struct *dead_task) /* Nothing for now */ } -/* Prepare to copy thread state - unlazy all lazy status. */ -#define prepare_to_copy(tsk) do { } while (0) - -extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); +extern int do_work_pending(struct pt_regs *regs, u32 flags); /* * Return saved (kernel) PC of a blocked thread. - * Only used in a printk() in kernel/sched.c, so don't work too hard. + * Only used in a printk() in kernel/sched/core.c, so don't work too hard. */ #define thread_saved_pc(t) ((t)->thread.pc) unsigned long get_wchan(struct task_struct *p); /* Return initial ksp value for given task. */ -#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE) +#define task_ksp0(task) \ + ((unsigned long)(task)->stack + THREAD_SIZE - STACK_TOP_DELTA) /* Return some info about the user process TASK. */ -#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA) #define task_pt_regs(task) \ - ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) + ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1) +#define current_pt_regs() \ + ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \ + STACK_TOP_DELTA - (KSTK_PTREGS_GAP - 1)) - 1) #define task_sp(task) (task_pt_regs(task)->sp) #define task_pc(task) (task_pt_regs(task)->pc) /* Aliases for pc and sp (used in fs/proc/array.c) */ #define KSTK_EIP(task) task_pc(task) #define KSTK_ESP(task) task_sp(task) +/* Fine-grained unaligned JIT support */ +#define GET_UNALIGN_CTL(tsk, adr) get_unalign_ctl((tsk), (adr)) +#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val)) + +extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr); +extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); + /* Standard format for printing registers and other word-size data. */ #ifdef __tilegx__ # define REGFMT "0x%016lx" @@ -255,10 +266,6 @@ static inline void cpu_relax(void) barrier(); } -struct siginfo; -extern void arch_coredump_signal(struct siginfo *, struct pt_regs *); -#define arch_coredump_signal arch_coredump_signal - /* Info on this processor (see fs/proc/cpuinfo.c) */ struct seq_operations; extern const struct seq_operations cpuinfo_op; @@ -269,10 +276,6 @@ extern char chip_model[64]; /* Data on which physical memory controller corresponds to which NUMA node. */ extern int node_controller[]; -/* Do we dump information to the console when a user application crashes? */ -extern int show_crashinfo; - -#if CHIP_HAS_CBOX_HOME_MAP() /* Does the heap allocator return hash-for-home pages by default? */ extern int hash_default; @@ -282,11 +285,6 @@ extern int kstack_hash; /* Does MAP_ANONYMOUS return hash-for-home pages by default? */ #define uheap_hash hash_default -#else -#define hash_default 0 -#define kstack_hash 0 -#define uheap_hash 0 -#endif /* Are we using huge pages in the TLB for kernel data? */ extern int kdata_huge; @@ -334,7 +332,6 @@ extern int kdata_huge; /* * Provide symbolic constants for PLs. - * Note that assembly code assumes that USER_PL is zero. */ #define USER_PL 0 #if CONFIG_KERNEL_PL == 2 @@ -343,20 +340,38 @@ extern int kdata_huge; #define KERNEL_PL CONFIG_KERNEL_PL /* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */ -#define CPU_LOG_MASK_VALUE 12 -#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1) -#if CONFIG_NR_CPUS > CPU_MASK_VALUE -# error Too many cpus! +#ifdef __tilegx__ +#define CPU_SHIFT 48 +#if CHIP_VA_WIDTH() > CPU_SHIFT +# error Too many VA bits! #endif +#define MAX_CPU_ID ((1 << (64 - CPU_SHIFT)) - 1) #define raw_smp_processor_id() \ - ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE) + ((int)(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) >> CPU_SHIFT)) #define get_current_ksp0() \ - (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE) + ((unsigned long)(((long)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) << \ + (64 - CPU_SHIFT)) >> (64 - CPU_SHIFT))) +#define next_current_ksp0(task) ({ \ + unsigned long __ksp0 = task_ksp0(task) & ((1UL << CPU_SHIFT) - 1); \ + unsigned long __cpu = (long)raw_smp_processor_id() << CPU_SHIFT; \ + __ksp0 | __cpu; \ +}) +#else +#define LOG2_NR_CPU_IDS 6 +#define MAX_CPU_ID ((1 << LOG2_NR_CPU_IDS) - 1) +#define raw_smp_processor_id() \ + ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & MAX_CPU_ID) +#define get_current_ksp0() \ + (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~MAX_CPU_ID) #define next_current_ksp0(task) ({ \ unsigned long __ksp0 = task_ksp0(task); \ int __cpu = raw_smp_processor_id(); \ - BUG_ON(__ksp0 & CPU_MASK_VALUE); \ + BUG_ON(__ksp0 & MAX_CPU_ID); \ __ksp0 | __cpu; \ }) +#endif +#if CONFIG_NR_CPUS > (MAX_CPU_ID + 1) +# error Too many cpus! +#endif #endif /* _ASM_TILE_PROCESSOR_H */ diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h index 6be2246e015..b9620c077ab 100644 --- a/arch/tile/include/asm/ptrace.h +++ b/arch/tile/include/asm/ptrace.h @@ -11,87 +11,20 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. */ - #ifndef _ASM_TILE_PTRACE_H #define _ASM_TILE_PTRACE_H -#include <arch/chip.h> -#include <arch/abi.h> - -/* These must match struct pt_regs, below. */ -#if CHIP_WORD_SIZE() == 32 -#define PTREGS_OFFSET_REG(n) ((n)*4) -#else -#define PTREGS_OFFSET_REG(n) ((n)*8) -#endif -#define PTREGS_OFFSET_BASE 0 -#define PTREGS_OFFSET_TP PTREGS_OFFSET_REG(53) -#define PTREGS_OFFSET_SP PTREGS_OFFSET_REG(54) -#define PTREGS_OFFSET_LR PTREGS_OFFSET_REG(55) -#define PTREGS_NR_GPRS 56 -#define PTREGS_OFFSET_PC PTREGS_OFFSET_REG(56) -#define PTREGS_OFFSET_EX1 PTREGS_OFFSET_REG(57) -#define PTREGS_OFFSET_FAULTNUM PTREGS_OFFSET_REG(58) -#define PTREGS_OFFSET_ORIG_R0 PTREGS_OFFSET_REG(59) -#define PTREGS_OFFSET_FLAGS PTREGS_OFFSET_REG(60) -#if CHIP_HAS_CMPEXCH() -#define PTREGS_OFFSET_CMPEXCH PTREGS_OFFSET_REG(61) -#endif -#define PTREGS_SIZE PTREGS_OFFSET_REG(64) +#include <linux/compiler.h> #ifndef __ASSEMBLY__ - -#ifdef __KERNEL__ /* Benefit from consistent use of "long" on all chips. */ typedef unsigned long pt_reg_t; -#else -/* Provide appropriate length type to userspace regardless of -m32/-m64. */ -typedef uint_reg_t pt_reg_t; #endif -/* - * This struct defines the way the registers are stored on the stack during a - * system call or exception. "struct sigcontext" has the same shape. - */ -struct pt_regs { - /* Saved main processor registers; 56..63 are special. */ - /* tp, sp, and lr must immediately follow regs[] for aliasing. */ - pt_reg_t regs[53]; - pt_reg_t tp; /* aliases regs[TREG_TP] */ - pt_reg_t sp; /* aliases regs[TREG_SP] */ - pt_reg_t lr; /* aliases regs[TREG_LR] */ - - /* Saved special registers. */ - pt_reg_t pc; /* stored in EX_CONTEXT_K_0 */ - pt_reg_t ex1; /* stored in EX_CONTEXT_K_1 (PL and ICS bit) */ - pt_reg_t faultnum; /* fault number (INT_SWINT_1 for syscall) */ - pt_reg_t orig_r0; /* r0 at syscall entry, else zero */ - pt_reg_t flags; /* flags (see below) */ -#if !CHIP_HAS_CMPEXCH() - pt_reg_t pad[3]; -#else - pt_reg_t cmpexch; /* value of CMPEXCH_VALUE SPR at interrupt */ - pt_reg_t pad[2]; -#endif -}; - -#endif /* __ASSEMBLY__ */ +#include <uapi/asm/ptrace.h> -#define PTRACE_GETREGS 12 -#define PTRACE_SETREGS 13 -#define PTRACE_GETFPREGS 14 -#define PTRACE_SETFPREGS 15 - -/* Support TILE-specific ptrace options, with events starting at 16. */ -#define PTRACE_O_TRACEMIGRATE 0x00010000 -#define PTRACE_EVENT_MIGRATE 16 -#ifdef __KERNEL__ #define PTRACE_O_MASK_TILE (PTRACE_O_TRACEMIGRATE) -#define PT_TRACE_MIGRATE 0x00080000 -#define PT_TRACE_MASK_TILE (PT_TRACE_MIGRATE) -#endif - -#ifdef __KERNEL__ +#define PT_TRACE_MIGRATE PT_EVENT_FLAG(PTRACE_EVENT_MIGRATE) /* Flag bits in pt_regs.flags */ #define PT_FLAGS_DISABLE_IRQ 1 /* on return to kernel, disable irqs */ @@ -100,19 +33,20 @@ struct pt_regs { #ifndef __ASSEMBLY__ +#define regs_return_value(regs) ((regs)->regs[0]) #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) +#define user_stack_pointer(regs) ((regs)->sp) /* Does the process account for user or for system time? */ -#define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL) +#define user_mode(regs) (EX1_PL((regs)->ex1) < KERNEL_PL) /* Fill in a struct pt_regs with the current kernel registers. */ struct pt_regs *get_pt_regs(struct pt_regs *); /* Trace the current syscall. */ -extern void do_syscall_trace(void); - -extern void show_regs(struct pt_regs *); +extern int do_syscall_trace_enter(struct pt_regs *regs); +extern void do_syscall_trace_exit(struct pt_regs *regs); #define arch_has_single_step() (1) @@ -146,8 +80,7 @@ extern void single_step_execve(void); struct task_struct; -extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code); +extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs); #ifdef __tilegx__ /* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */ @@ -161,6 +94,4 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, #define SINGLESTEP_STATE_TARGET_LB 2 #define SINGLESTEP_STATE_TARGET_UB 7 -#endif /* !__KERNEL__ */ - #endif /* _ASM_TILE_PTRACE_H */ diff --git a/arch/tile/include/asm/resource.h b/arch/tile/include/asm/resource.h deleted file mode 100644 index 04bc4db8921..00000000000 --- a/arch/tile/include/asm/resource.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/resource.h> diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h deleted file mode 100644 index 35d786fe93a..00000000000 --- a/arch/tile/include/asm/scatterlist.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/scatterlist.h> diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h index d062d463fca..5d5d3b739a6 100644 --- a/arch/tile/include/asm/sections.h +++ b/arch/tile/include/asm/sections.h @@ -25,16 +25,22 @@ extern char _sinitdata[], _einitdata[]; /* Write-once data is writable only till the end of initialization. */ extern char __w1data_begin[], __w1data_end[]; +extern char vdso_start[], vdso_end[]; +#ifdef CONFIG_COMPAT +extern char vdso32_start[], vdso32_end[]; +#endif /* Not exactly sections, but PC comparison points in the code. */ extern char __rt_sigreturn[], __rt_sigreturn_end[]; -#ifndef __tilegx__ +#ifdef __tilegx__ +extern char __start_unalign_asm_code[], __end_unalign_asm_code[]; +#else extern char sys_cmpxchg[], __sys_cmpxchg_end[]; extern char __sys_cmpxchg_grab_lock[]; extern char __start_atomic_asm_code[], __end_atomic_asm_code[]; #endif -/* Handle the discontiguity between _sdata and _stext. */ +/* Handle the discontiguity between _sdata and _text. */ static inline int arch_is_kernel_data(unsigned long addr) { return addr >= (unsigned long)_sdata && diff --git a/arch/tile/include/asm/sembuf.h b/arch/tile/include/asm/sembuf.h deleted file mode 100644 index 7673b83cfef..00000000000 --- a/arch/tile/include/asm/sembuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sembuf.h> diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h index 7caf0f36b03..e98909033e5 100644 --- a/arch/tile/include/asm/setup.h +++ b/arch/tile/include/asm/setup.h @@ -11,26 +11,42 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. */ - #ifndef _ASM_TILE_SETUP_H #define _ASM_TILE_SETUP_H -#define COMMAND_LINE_SIZE 2048 - -#ifdef __KERNEL__ #include <linux/pfn.h> #include <linux/init.h> +#include <uapi/asm/setup.h> /* * Reserved space for vmalloc and iomap - defined in asm/page.h */ #define MAXMEM_PFN PFN_DOWN(MAXMEM) +int tile_console_write(const char *buf, int count); void early_panic(const char *fmt, ...); -void warn_early_printk(void); -void __init disable_early_printk(void); -#endif /* __KERNEL__ */ +/* Init-time routine to do tile-specific per-cpu setup. */ +void setup_cpu(int boot); + +/* User-level DMA management functions */ +void grant_dma_mpls(void); +void restrict_dma_mpls(void); + +#ifdef CONFIG_HARDWALL +/* User-level network management functions */ +void reset_network_state(void); +struct task_struct; +void hardwall_switch_tasks(struct task_struct *prev, struct task_struct *next); +void hardwall_deactivate_all(struct task_struct *task); +int hardwall_ipi_valid(int cpu); + +/* Hook hardwall code into changes in affinity. */ +#define arch_set_cpus_allowed(p, new_mask) do { \ + if (!cpumask_equal(&p->cpus_allowed, new_mask)) \ + hardwall_deactivate_all(p); \ +} while (0) +#endif #endif /* _ASM_TILE_SETUP_H */ diff --git a/arch/tile/include/asm/shmbuf.h b/arch/tile/include/asm/shmbuf.h deleted file mode 100644 index 83c05fc2de3..00000000000 --- a/arch/tile/include/asm/shmbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/shmbuf.h> diff --git a/arch/tile/include/asm/shmparam.h b/arch/tile/include/asm/shmparam.h deleted file mode 100644 index 93f30deb95d..00000000000 --- a/arch/tile/include/asm/shmparam.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/shmparam.h> diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h index 81d92a45cd4..10e183de96d 100644 --- a/arch/tile/include/asm/signal.h +++ b/arch/tile/include/asm/signal.h @@ -11,23 +11,19 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. */ - #ifndef _ASM_TILE_SIGNAL_H #define _ASM_TILE_SIGNAL_H -/* Do not notify a ptracer when this signal is handled. */ -#define SA_NOPTRACE 0x02000000u - -/* Used in earlier Tilera releases, so keeping for binary compatibility. */ -#define SA_RESTORER 0x04000000u +#include <uapi/asm/signal.h> -#include <asm-generic/signal.h> - -#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#if !defined(__ASSEMBLY__) struct pt_regs; int restore_sigcontext(struct pt_regs *, struct sigcontext __user *); int setup_sigcontext(struct sigcontext __user *, struct pt_regs *); void do_signal(struct pt_regs *regs); +void signal_fault(const char *type, struct pt_regs *, + void __user *frame, int sig); +void trace_unhandled_signal(const char *type, struct pt_regs *regs, + unsigned long address, int signo); #endif - #endif /* _ASM_TILE_SIGNAL_H */ diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h index 532124ae4b1..9a326b64f7a 100644 --- a/arch/tile/include/asm/smp.h +++ b/arch/tile/include/asm/smp.h @@ -43,10 +43,6 @@ void evaluate_message(int tag); /* Boot a secondary cpu */ void online_secondary(void); -/* Call a function on a specified set of CPUs (may include this one). */ -extern void on_each_cpu_mask(const struct cpumask *mask, - void (*func)(void *), void *info, bool wait); - /* Topology of the supervisor tile grid, and coordinates of boot processor */ extern HV_Topology smp_topology; @@ -91,9 +87,6 @@ void print_disabled_cpus(void); #else /* !CONFIG_SMP */ -#define on_each_cpu_mask(mask, func, info, wait) \ - do { if (cpumask_test_cpu(0, (mask))) func(info); } while (0) - #define smp_master_cpu 0 #define smp_height 1 #define smp_width 1 @@ -108,10 +101,8 @@ void print_disabled_cpus(void); extern struct cpumask cpu_lotar_map; #define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map) -#if CHIP_HAS_CBOX_HOME_MAP() /* Which processors are used for hash-for-home mapping */ extern struct cpumask hash_for_home_map; -#endif /* Which cpus can have their cache flushed by hv_flush_remote(). */ extern struct cpumask cpu_cacheable_map; diff --git a/arch/tile/include/asm/socket.h b/arch/tile/include/asm/socket.h deleted file mode 100644 index 6b71384b9d8..00000000000 --- a/arch/tile/include/asm/socket.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/socket.h> diff --git a/arch/tile/include/asm/sockios.h b/arch/tile/include/asm/sockios.h deleted file mode 100644 index def6d4746ee..00000000000 --- a/arch/tile/include/asm/sockios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sockios.h> diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h index a8f2c6e31a8..c0a77b38d39 100644 --- a/arch/tile/include/asm/spinlock_32.h +++ b/arch/tile/include/asm/spinlock_32.h @@ -17,9 +17,8 @@ #ifndef _ASM_TILE_SPINLOCK_32_H #define _ASM_TILE_SPINLOCK_32_H -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/page.h> -#include <asm/system.h> #include <linux/compiler.h> /* diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h new file mode 100644 index 00000000000..9a12b9c7e5d --- /dev/null +++ b/arch/tile/include/asm/spinlock_64.h @@ -0,0 +1,161 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * 64-bit SMP ticket spinlocks, allowing only a single CPU anywhere + * (the type definitions are in asm/spinlock_types.h) + */ + +#ifndef _ASM_TILE_SPINLOCK_64_H +#define _ASM_TILE_SPINLOCK_64_H + +/* Shifts and masks for the various fields in "lock". */ +#define __ARCH_SPIN_CURRENT_SHIFT 17 +#define __ARCH_SPIN_NEXT_MASK 0x7fff +#define __ARCH_SPIN_NEXT_OVERFLOW 0x8000 + +/* + * Return the "current" portion of a ticket lock value, + * i.e. the number that currently owns the lock. + */ +static inline u32 arch_spin_current(u32 val) +{ + return val >> __ARCH_SPIN_CURRENT_SHIFT; +} + +/* + * Return the "next" portion of a ticket lock value, + * i.e. the number that the next task to try to acquire the lock will get. + */ +static inline u32 arch_spin_next(u32 val) +{ + return val & __ARCH_SPIN_NEXT_MASK; +} + +/* The lock is locked if a task would have to wait to get it. */ +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + u32 val = lock->lock; + return arch_spin_current(val) != arch_spin_next(val); +} + +/* Bump the current ticket so the next task owns the lock. */ +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + wmb(); /* guarantee anything modified under the lock is visible */ + __insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT); +} + +void arch_spin_unlock_wait(arch_spinlock_t *lock); + +void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val); + +/* Grab the "next" ticket number and bump it atomically. + * If the current ticket is not ours, go to the slow path. + * We also take the slow path if the "next" value overflows. + */ +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + u32 val = __insn_fetchadd4(&lock->lock, 1); + u32 ticket = val & (__ARCH_SPIN_NEXT_MASK | __ARCH_SPIN_NEXT_OVERFLOW); + if (unlikely(arch_spin_current(val) != ticket)) + arch_spin_lock_slow(lock, ticket); +} + +/* Try to get the lock, and return whether we succeeded. */ +int arch_spin_trylock(arch_spinlock_t *lock); + +/* We cannot take an interrupt after getting a ticket, so don't enable them. */ +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * We use fetchadd() for readers, and fetchor() with the sign bit + * for writers. + */ + +#define __WRITE_LOCK_BIT (1 << 31) + +static inline int arch_write_val_locked(int val) +{ + return val < 0; /* Optimize "val & __WRITE_LOCK_BIT". */ +} + +/** + * read_can_lock - would read_trylock() succeed? + * @lock: the rwlock in question. + */ +static inline int arch_read_can_lock(arch_rwlock_t *rw) +{ + return !arch_write_val_locked(rw->lock); +} + +/** + * write_can_lock - would write_trylock() succeed? + * @lock: the rwlock in question. + */ +static inline int arch_write_can_lock(arch_rwlock_t *rw) +{ + return rw->lock == 0; +} + +extern void __read_lock_failed(arch_rwlock_t *rw); + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + u32 val = __insn_fetchaddgez4(&rw->lock, 1); + if (unlikely(arch_write_val_locked(val))) + __read_lock_failed(rw); +} + +extern void __write_lock_failed(arch_rwlock_t *rw, u32 val); + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT); + if (unlikely(val != 0)) + __write_lock_failed(rw, val); +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + __insn_mf(); + __insn_fetchadd4(&rw->lock, -1); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + __insn_mf(); + __insn_exch4(&rw->lock, 0); /* Avoid waiting in the write buffer. */ +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + return !arch_write_val_locked(__insn_fetchaddgez4(&rw->lock, 1)); +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + u32 val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT); + if (likely(val == 0)) + return 1; + if (!arch_write_val_locked(val)) + __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT); + return 0; +} + +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + +#endif /* _ASM_TILE_SPINLOCK_64_H */ diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h index 4d97a2db932..0e9d382a2d4 100644 --- a/arch/tile/include/asm/stack.h +++ b/arch/tile/include/asm/stack.h @@ -25,7 +25,6 @@ struct KBacktraceIterator { BacktraceIterator it; struct task_struct *task; /* task we are backtracing */ - pte_t *pgtable; /* page table for user space access */ int end; /* iteration complete. */ int new_context; /* new context is starting */ int profile; /* profiling, so stop on async intrpt */ diff --git a/arch/tile/include/asm/statfs.h b/arch/tile/include/asm/statfs.h deleted file mode 100644 index 0b91fe198c2..00000000000 --- a/arch/tile/include/asm/statfs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/statfs.h> diff --git a/arch/tile/include/asm/string.h b/arch/tile/include/asm/string.h index 7535cf1a30e..92b271bd9eb 100644 --- a/arch/tile/include/asm/string.h +++ b/arch/tile/include/asm/string.h @@ -21,8 +21,10 @@ #define __HAVE_ARCH_MEMMOVE #define __HAVE_ARCH_STRCHR #define __HAVE_ARCH_STRLEN +#define __HAVE_ARCH_STRNLEN extern __kernel_size_t strlen(const char *); +extern __kernel_size_t strnlen(const char *, __kernel_size_t); extern char *strchr(const char *s, int c); extern void *memchr(const void *s, int c, size_t n); extern void *memset(void *, int, __kernel_size_t); diff --git a/arch/tile/include/asm/switch_to.h b/arch/tile/include/asm/switch_to.h new file mode 100644 index 00000000000..b8f888cbe6b --- /dev/null +++ b/arch/tile/include/asm/switch_to.h @@ -0,0 +1,79 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_SWITCH_TO_H +#define _ASM_TILE_SWITCH_TO_H + +#include <arch/sim_def.h> + +/* + * switch_to(n) should switch tasks to task nr n, first + * checking that n isn't the current task, in which case it does nothing. + * The number of callee-saved registers saved on the kernel stack + * is defined here for use in copy_thread() and must agree with __switch_to(). + */ +#define CALLEE_SAVED_FIRST_REG 30 +#define CALLEE_SAVED_REGS_COUNT 24 /* r30 to r52, plus an empty to align */ + +#ifndef __ASSEMBLY__ + +struct task_struct; + +/* + * Pause the DMA engine and static network before task switching. + */ +#define prepare_arch_switch(next) _prepare_arch_switch(next) +void _prepare_arch_switch(struct task_struct *next); + +struct task_struct; +#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next))) +extern struct task_struct *_switch_to(struct task_struct *prev, + struct task_struct *next); + +/* Helper function for _switch_to(). */ +extern struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next, + unsigned long new_system_save_k_0); + +/* Address that switched-away from tasks are at. */ +extern unsigned long get_switch_to_pc(void); + +/* + * Kernel threads can check to see if they need to migrate their + * stack whenever they return from a context switch; for user + * threads, we defer until they are returning to user-space. + */ +#define finish_arch_switch(prev) do { \ + if (unlikely((prev)->state == TASK_DEAD)) \ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \ + ((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \ + (current->pid << _SIM_CONTROL_OPERATOR_BITS)); \ + if (current->mm == NULL && !kstack_hash && \ + current_thread_info()->homecache_cpu != smp_processor_id()) \ + homecache_migrate_kthread(); \ +} while (0) + +/* Support function for forking a new task. */ +void ret_from_fork(void); + +/* Support function for forking a new kernel thread. */ +void ret_from_kernel_thread(void *fn, void *arg); + +/* Called from ret_from_xxx() when a new process starts up. */ +struct task_struct *sim_notify_fork(struct task_struct *prev); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_TILE_SWITCH_TO_H */ diff --git a/arch/tile/include/asm/syscall.h b/arch/tile/include/asm/syscall.h index d35e0dcb67b..9644b88f133 100644 --- a/arch/tile/include/asm/syscall.h +++ b/arch/tile/include/asm/syscall.h @@ -22,6 +22,12 @@ #include <linux/err.h> #include <arch/abi.h> +/* The array of function pointers for syscalls. */ +extern void *sys_call_table[]; +#ifdef CONFIG_COMPAT +extern void *compat_sys_call_table[]; +#endif + /* * Only the low 32 bits of orig_r0 are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h index 3b5507c31ea..07b298450ef 100644 --- a/arch/tile/include/asm/syscalls.h +++ b/arch/tile/include/asm/syscalls.h @@ -24,12 +24,6 @@ #include <linux/types.h> #include <linux/compat.h> -/* The array of function pointers for syscalls. */ -extern void *sys_call_table[]; -#ifdef CONFIG_COMPAT -extern void *compat_sys_call_table[]; -#endif - /* * Note that by convention, any syscall which requires the current * register set takes an additional "struct pt_regs *" pointer; a @@ -43,15 +37,15 @@ long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi, u32 len, int advice); int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi, u32 len_lo, u32 len_hi, int advice); -long sys_flush_cache(void); +long sys_cacheflush(unsigned long addr, unsigned long len, + unsigned long flags); #ifndef __tilegx__ /* No mmap() in the 32-bit kernel. */ #define sys_mmap sys_mmap #endif #ifndef __tilegx__ /* mm/fault.c */ -long sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *); -long _sys_cmpxchg_badaddr(unsigned long address); +long sys_cmpxchg_badaddr(unsigned long address); #endif #ifdef CONFIG_COMPAT @@ -62,14 +56,14 @@ long sys_truncate64(const char __user *path, loff_t length); long sys_ftruncate64(unsigned int fd, loff_t length); #endif +/* Provide versions of standard syscalls that use current_pt_regs(). */ +long sys_rt_sigreturn(void); +#define sys_rt_sigreturn sys_rt_sigreturn + /* These are the intvec*.S trampolines. */ -long _sys_sigaltstack(const stack_t __user *, stack_t __user *); long _sys_rt_sigreturn(void); long _sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid); -long _sys_execve(const char __user *filename, - const char __user *const __user *argv, - const char __user *const __user *envp); #include <asm-generic/syscalls.h> diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h deleted file mode 100644 index 23d1842f483..00000000000 --- a/arch/tile/include/asm/system.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#ifndef _ASM_TILE_SYSTEM_H -#define _ASM_TILE_SYSTEM_H - -#ifndef __ASSEMBLY__ - -#include <linux/types.h> -#include <linux/irqflags.h> - -/* NOTE: we can't include <linux/ptrace.h> due to #include dependencies. */ -#include <asm/ptrace.h> - -#include <arch/chip.h> -#include <arch/sim_def.h> -#include <arch/spr_def.h> - -/* - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - */ - -#define read_barrier_depends() do { } while (0) - -#define __sync() __insn_mf() - -#if CHIP_HAS_SPLIT_CYCLE() -#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW) -#else -#define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */ -#endif - -#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() -#include <hv/syscall_public.h> -/* - * Issue an uncacheable load to each memory controller, then - * wait until those loads have completed. - */ -static inline void __mb_incoherent(void) -{ - long clobber_r10; - asm volatile("swint2" - : "=R10" (clobber_r10) - : "R10" (HV_SYS_fence_incoherent) - : "r0", "r1", "r2", "r3", "r4", - "r5", "r6", "r7", "r8", "r9", - "r11", "r12", "r13", "r14", - "r15", "r16", "r17", "r18", "r19", - "r20", "r21", "r22", "r23", "r24", - "r25", "r26", "r27", "r28", "r29"); -} -#endif - -/* Fence to guarantee visibility of stores to incoherent memory. */ -static inline void -mb_incoherent(void) -{ - __insn_mf(); - -#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() - { -#if CHIP_HAS_TILE_WRITE_PENDING() - const unsigned long WRITE_TIMEOUT_CYCLES = 400; - unsigned long start = get_cycles_low(); - do { - if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0) - return; - } while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES); -#endif /* CHIP_HAS_TILE_WRITE_PENDING() */ - (void) __mb_incoherent(); - } -#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */ -} - -#define fast_wmb() __sync() -#define fast_rmb() __sync() -#define fast_mb() __sync() -#define fast_iob() mb_incoherent() - -#define wmb() fast_wmb() -#define rmb() fast_rmb() -#define mb() fast_mb() -#define iob() fast_iob() - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#define smp_read_barrier_depends() read_barrier_depends() -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) -#endif - -#define set_mb(var, value) \ - do { var = value; mb(); } while (0) - -/* - * Pause the DMA engine and static network before task switching. - */ -#define prepare_arch_switch(next) _prepare_arch_switch(next) -void _prepare_arch_switch(struct task_struct *next); - - -/* - * switch_to(n) should switch tasks to task nr n, first - * checking that n isn't the current task, in which case it does nothing. - * The number of callee-saved registers saved on the kernel stack - * is defined here for use in copy_thread() and must agree with __switch_to(). - */ -#endif /* !__ASSEMBLY__ */ -#define CALLEE_SAVED_FIRST_REG 30 -#define CALLEE_SAVED_REGS_COUNT 24 /* r30 to r52, plus an empty to align */ -#ifndef __ASSEMBLY__ -struct task_struct; -#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next))) -extern struct task_struct *_switch_to(struct task_struct *prev, - struct task_struct *next); - -/* Helper function for _switch_to(). */ -extern struct task_struct *__switch_to(struct task_struct *prev, - struct task_struct *next, - unsigned long new_system_save_k_0); - -/* Address that switched-away from tasks are at. */ -extern unsigned long get_switch_to_pc(void); - -/* - * On SMP systems, when the scheduler does migration-cost autodetection, - * it needs a way to flush as much of the CPU's caches as possible: - * - * TODO: fill this in! - */ -static inline void sched_cacheflush(void) -{ -} - -#define arch_align_stack(x) (x) - -/* - * Is the kernel doing fixups of unaligned accesses? If <0, no kernel - * intervention occurs and SIGBUS is delivered with no data address - * info. If 0, the kernel single-steps the instruction to discover - * the data address to provide with the SIGBUS. If 1, the kernel does - * a fixup. - */ -extern int unaligned_fixup; - -/* Is the kernel printing on each unaligned fixup? */ -extern int unaligned_printk; - -/* Number of unaligned fixups performed */ -extern unsigned int unaligned_fixup_count; - -/* Init-time routine to do tile-specific per-cpu setup. */ -void setup_cpu(int boot); - -/* User-level DMA management functions */ -void grant_dma_mpls(void); -void restrict_dma_mpls(void); - -#ifdef CONFIG_HARDWALL -/* User-level network management functions */ -void reset_network_state(void); -void grant_network_mpls(void); -void restrict_network_mpls(void); -int hardwall_deactivate(struct task_struct *task); - -/* Hook hardwall code into changes in affinity. */ -#define arch_set_cpus_allowed(p, new_mask) do { \ - if (p->thread.hardwall && !cpumask_equal(&p->cpus_allowed, new_mask)) \ - hardwall_deactivate(p); \ -} while (0) -#endif - -/* - * Kernel threads can check to see if they need to migrate their - * stack whenever they return from a context switch; for user - * threads, we defer until they are returning to user-space. - */ -#define finish_arch_switch(prev) do { \ - if (unlikely((prev)->state == TASK_DEAD)) \ - __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \ - ((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \ - __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \ - (current->pid << _SIM_CONTROL_OPERATOR_BITS)); \ - if (current->mm == NULL && !kstack_hash && \ - current_thread_info()->homecache_cpu != smp_processor_id()) \ - homecache_migrate_kthread(); \ -} while (0) - -/* Support function for forking a new task. */ -void ret_from_fork(void); - -/* Called from ret_from_fork() when a new process starts up. */ -struct task_struct *sim_notify_fork(struct task_struct *prev); - -#endif /* !__ASSEMBLY__ */ - -#endif /* _ASM_TILE_SYSTEM_H */ diff --git a/arch/tile/include/asm/termbits.h b/arch/tile/include/asm/termbits.h deleted file mode 100644 index 3935b106de7..00000000000 --- a/arch/tile/include/asm/termbits.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termbits.h> diff --git a/arch/tile/include/asm/termios.h b/arch/tile/include/asm/termios.h deleted file mode 100644 index 280d78a9d96..00000000000 --- a/arch/tile/include/asm/termios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termios.h> diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 3405b52853b..48e4fd0f38e 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -39,6 +39,11 @@ struct thread_info { struct restart_block restart_block; struct single_step_state *step_state; /* single step state (if non-zero) */ + int align_ctl; /* controls unaligned access */ +#ifdef __tilegx__ + unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */ + void __user *unalign_jit_base; /* unalign fixup JIT base */ +#endif }; /* @@ -56,6 +61,7 @@ struct thread_info { .fn = do_no_restart_syscall, \ }, \ .step_state = NULL, \ + .align_ctl = 0, \ } #define init_thread_info (init_thread_union.thread_info) @@ -77,40 +83,36 @@ struct thread_info { #ifndef __ASSEMBLY__ +void arch_release_thread_info(struct thread_info *info); + /* How to get the thread information struct from C. */ register unsigned long stack_pointer __asm__("sp"); #define current_thread_info() \ ((struct thread_info *)(stack_pointer & -THREAD_SIZE)) -#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR -extern struct thread_info *alloc_thread_info_node(struct task_struct *task, int node); -extern void free_thread_info(struct thread_info *info); - /* Sit on a nap instruction until interrupted. */ extern void smp_nap(void); -/* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */ +/* Enable interrupts racelessly and nap forever: helper for arch_cpu_idle(). */ extern void _cpu_idle(void); -/* Switch boot idle thread to a freshly-allocated stack and free old stack. */ -extern void cpu_idle_on_new_stack(struct thread_info *old_ti, - unsigned long new_sp, - unsigned long new_ss10); - #else /* __ASSEMBLY__ */ -/* how to get the thread information struct from ASM */ +/* + * How to get the thread information struct from assembly. + * Note that we use different macros since different architectures + * have different semantics in their "mm" instruction and we would + * like to guarantee that the macro expands to exactly one instruction. + */ #ifdef __tilegx__ -#define GET_THREAD_INFO(reg) move reg, sp; mm reg, zero, LOG2_THREAD_SIZE, 63 +#define EXTRACT_THREAD_INFO(reg) mm reg, zero, LOG2_THREAD_SIZE, 63 #else #define GET_THREAD_INFO(reg) mm reg, sp, zero, LOG2_THREAD_SIZE, 31 #endif #endif /* !__ASSEMBLY__ */ -#define PREEMPT_ACTIVE 0x10000000 - /* * Thread information flags that various assembly files may need to access. * Keep flags accessed frequently in low bits, particular since it makes @@ -125,6 +127,9 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti, #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SECCOMP 6 /* secure computing */ #define TIF_MEMDIE 7 /* OOM killer at work */ +#define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ +#define TIF_SYSCALL_TRACEPOINT 9 /* syscall tracepoint instrumentation */ +#define TIF_POLLING_NRFLAG 10 /* idle is polling for TIF_NEED_RESCHED */ #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) @@ -134,10 +139,20 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti, #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1<<TIF_SECCOMP) #define _TIF_MEMDIE (1<<TIF_MEMDIE) +#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) +#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) +#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) /* Work to do on any return to user space. */ #define _TIF_ALLWORK_MASK \ - (_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SINGLESTEP|_TIF_ASYNC_TLB) + (_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SINGLESTEP|\ + _TIF_ASYNC_TLB|_TIF_NOTIFY_RESUME) + +/* Work to do at syscall entry. */ +#define _TIF_SYSCALL_ENTRY_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT) + +/* Work to do at syscall exit. */ +#define _TIF_SYSCALL_EXIT_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT) /* * Thread-synchronous status. @@ -149,18 +164,31 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti, #ifdef __tilegx__ #define TS_COMPAT 0x0001 /* 32-bit compatibility mode */ #endif -#define TS_POLLING 0x0004 /* in idle loop but not sleeping */ #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal */ -#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) - #ifndef __ASSEMBLY__ #define HAVE_SET_RESTORE_SIGMASK 1 static inline void set_restore_sigmask(void) { struct thread_info *ti = current_thread_info(); ti->status |= TS_RESTORE_SIGMASK; - set_bit(TIF_SIGPENDING, &ti->flags); + WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags)); +} +static inline void clear_restore_sigmask(void) +{ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; +} +static inline bool test_restore_sigmask(void) +{ + return current_thread_info()->status & TS_RESTORE_SIGMASK; +} +static inline bool test_and_clear_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + if (!(ti->status & TS_RESTORE_SIGMASK)) + return false; + ti->status &= ~TS_RESTORE_SIGMASK; + return true; } #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/tile-desc.h b/arch/tile/include/asm/tile-desc.h new file mode 100644 index 00000000000..43849bf79dc --- /dev/null +++ b/arch/tile/include/asm/tile-desc.h @@ -0,0 +1,19 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __tilegx__ +#include <asm/tile-desc_32.h> +#else +#include <asm/tile-desc_64.h> +#endif diff --git a/arch/tile/include/asm/tile-desc_32.h b/arch/tile/include/asm/tile-desc_32.h new file mode 100644 index 00000000000..f09c5c43b0b --- /dev/null +++ b/arch/tile/include/asm/tile-desc_32.h @@ -0,0 +1,553 @@ +/* TILEPro opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +#ifndef opcode_tilepro_h +#define opcode_tilepro_h + +#include <arch/opcode.h> + + +enum +{ + TILEPRO_MAX_OPERANDS = 5 /* mm */ +}; + +typedef enum +{ + TILEPRO_OPC_BPT, + TILEPRO_OPC_INFO, + TILEPRO_OPC_INFOL, + TILEPRO_OPC_J, + TILEPRO_OPC_JAL, + TILEPRO_OPC_MOVE, + TILEPRO_OPC_MOVE_SN, + TILEPRO_OPC_MOVEI, + TILEPRO_OPC_MOVEI_SN, + TILEPRO_OPC_MOVELI, + TILEPRO_OPC_MOVELI_SN, + TILEPRO_OPC_MOVELIS, + TILEPRO_OPC_PREFETCH, + TILEPRO_OPC_RAISE, + TILEPRO_OPC_ADD, + TILEPRO_OPC_ADD_SN, + TILEPRO_OPC_ADDB, + TILEPRO_OPC_ADDB_SN, + TILEPRO_OPC_ADDBS_U, + TILEPRO_OPC_ADDBS_U_SN, + TILEPRO_OPC_ADDH, + TILEPRO_OPC_ADDH_SN, + TILEPRO_OPC_ADDHS, + TILEPRO_OPC_ADDHS_SN, + TILEPRO_OPC_ADDI, + TILEPRO_OPC_ADDI_SN, + TILEPRO_OPC_ADDIB, + TILEPRO_OPC_ADDIB_SN, + TILEPRO_OPC_ADDIH, + TILEPRO_OPC_ADDIH_SN, + TILEPRO_OPC_ADDLI, + TILEPRO_OPC_ADDLI_SN, + TILEPRO_OPC_ADDLIS, + TILEPRO_OPC_ADDS, + TILEPRO_OPC_ADDS_SN, + TILEPRO_OPC_ADIFFB_U, + TILEPRO_OPC_ADIFFB_U_SN, + TILEPRO_OPC_ADIFFH, + TILEPRO_OPC_ADIFFH_SN, + TILEPRO_OPC_AND, + TILEPRO_OPC_AND_SN, + TILEPRO_OPC_ANDI, + TILEPRO_OPC_ANDI_SN, + TILEPRO_OPC_AULI, + TILEPRO_OPC_AVGB_U, + TILEPRO_OPC_AVGB_U_SN, + TILEPRO_OPC_AVGH, + TILEPRO_OPC_AVGH_SN, + TILEPRO_OPC_BBNS, + TILEPRO_OPC_BBNS_SN, + TILEPRO_OPC_BBNST, + TILEPRO_OPC_BBNST_SN, + TILEPRO_OPC_BBS, + TILEPRO_OPC_BBS_SN, + TILEPRO_OPC_BBST, + TILEPRO_OPC_BBST_SN, + TILEPRO_OPC_BGEZ, + TILEPRO_OPC_BGEZ_SN, + TILEPRO_OPC_BGEZT, + TILEPRO_OPC_BGEZT_SN, + TILEPRO_OPC_BGZ, + TILEPRO_OPC_BGZ_SN, + TILEPRO_OPC_BGZT, + TILEPRO_OPC_BGZT_SN, + TILEPRO_OPC_BITX, + TILEPRO_OPC_BITX_SN, + TILEPRO_OPC_BLEZ, + TILEPRO_OPC_BLEZ_SN, + TILEPRO_OPC_BLEZT, + TILEPRO_OPC_BLEZT_SN, + TILEPRO_OPC_BLZ, + TILEPRO_OPC_BLZ_SN, + TILEPRO_OPC_BLZT, + TILEPRO_OPC_BLZT_SN, + TILEPRO_OPC_BNZ, + TILEPRO_OPC_BNZ_SN, + TILEPRO_OPC_BNZT, + TILEPRO_OPC_BNZT_SN, + TILEPRO_OPC_BYTEX, + TILEPRO_OPC_BYTEX_SN, + TILEPRO_OPC_BZ, + TILEPRO_OPC_BZ_SN, + TILEPRO_OPC_BZT, + TILEPRO_OPC_BZT_SN, + TILEPRO_OPC_CLZ, + TILEPRO_OPC_CLZ_SN, + TILEPRO_OPC_CRC32_32, + TILEPRO_OPC_CRC32_32_SN, + TILEPRO_OPC_CRC32_8, + TILEPRO_OPC_CRC32_8_SN, + TILEPRO_OPC_CTZ, + TILEPRO_OPC_CTZ_SN, + TILEPRO_OPC_DRAIN, + TILEPRO_OPC_DTLBPR, + TILEPRO_OPC_DWORD_ALIGN, + TILEPRO_OPC_DWORD_ALIGN_SN, + TILEPRO_OPC_FINV, + TILEPRO_OPC_FLUSH, + TILEPRO_OPC_FNOP, + TILEPRO_OPC_ICOH, + TILEPRO_OPC_ILL, + TILEPRO_OPC_INTHB, + TILEPRO_OPC_INTHB_SN, + TILEPRO_OPC_INTHH, + TILEPRO_OPC_INTHH_SN, + TILEPRO_OPC_INTLB, + TILEPRO_OPC_INTLB_SN, + TILEPRO_OPC_INTLH, + TILEPRO_OPC_INTLH_SN, + TILEPRO_OPC_INV, + TILEPRO_OPC_IRET, + TILEPRO_OPC_JALB, + TILEPRO_OPC_JALF, + TILEPRO_OPC_JALR, + TILEPRO_OPC_JALRP, + TILEPRO_OPC_JB, + TILEPRO_OPC_JF, + TILEPRO_OPC_JR, + TILEPRO_OPC_JRP, + TILEPRO_OPC_LB, + TILEPRO_OPC_LB_SN, + TILEPRO_OPC_LB_U, + TILEPRO_OPC_LB_U_SN, + TILEPRO_OPC_LBADD, + TILEPRO_OPC_LBADD_SN, + TILEPRO_OPC_LBADD_U, + TILEPRO_OPC_LBADD_U_SN, + TILEPRO_OPC_LH, + TILEPRO_OPC_LH_SN, + TILEPRO_OPC_LH_U, + TILEPRO_OPC_LH_U_SN, + TILEPRO_OPC_LHADD, + TILEPRO_OPC_LHADD_SN, + TILEPRO_OPC_LHADD_U, + TILEPRO_OPC_LHADD_U_SN, + TILEPRO_OPC_LNK, + TILEPRO_OPC_LNK_SN, + TILEPRO_OPC_LW, + TILEPRO_OPC_LW_SN, + TILEPRO_OPC_LW_NA, + TILEPRO_OPC_LW_NA_SN, + TILEPRO_OPC_LWADD, + TILEPRO_OPC_LWADD_SN, + TILEPRO_OPC_LWADD_NA, + TILEPRO_OPC_LWADD_NA_SN, + TILEPRO_OPC_MAXB_U, + TILEPRO_OPC_MAXB_U_SN, + TILEPRO_OPC_MAXH, + TILEPRO_OPC_MAXH_SN, + TILEPRO_OPC_MAXIB_U, + TILEPRO_OPC_MAXIB_U_SN, + TILEPRO_OPC_MAXIH, + TILEPRO_OPC_MAXIH_SN, + TILEPRO_OPC_MF, + TILEPRO_OPC_MFSPR, + TILEPRO_OPC_MINB_U, + TILEPRO_OPC_MINB_U_SN, + TILEPRO_OPC_MINH, + TILEPRO_OPC_MINH_SN, + TILEPRO_OPC_MINIB_U, + TILEPRO_OPC_MINIB_U_SN, + TILEPRO_OPC_MINIH, + TILEPRO_OPC_MINIH_SN, + TILEPRO_OPC_MM, + TILEPRO_OPC_MNZ, + TILEPRO_OPC_MNZ_SN, + TILEPRO_OPC_MNZB, + TILEPRO_OPC_MNZB_SN, + TILEPRO_OPC_MNZH, + TILEPRO_OPC_MNZH_SN, + TILEPRO_OPC_MTSPR, + TILEPRO_OPC_MULHH_SS, + TILEPRO_OPC_MULHH_SS_SN, + TILEPRO_OPC_MULHH_SU, + TILEPRO_OPC_MULHH_SU_SN, + TILEPRO_OPC_MULHH_UU, + TILEPRO_OPC_MULHH_UU_SN, + TILEPRO_OPC_MULHHA_SS, + TILEPRO_OPC_MULHHA_SS_SN, + TILEPRO_OPC_MULHHA_SU, + TILEPRO_OPC_MULHHA_SU_SN, + TILEPRO_OPC_MULHHA_UU, + TILEPRO_OPC_MULHHA_UU_SN, + TILEPRO_OPC_MULHHSA_UU, + TILEPRO_OPC_MULHHSA_UU_SN, + TILEPRO_OPC_MULHL_SS, + TILEPRO_OPC_MULHL_SS_SN, + TILEPRO_OPC_MULHL_SU, + TILEPRO_OPC_MULHL_SU_SN, + TILEPRO_OPC_MULHL_US, + TILEPRO_OPC_MULHL_US_SN, + TILEPRO_OPC_MULHL_UU, + TILEPRO_OPC_MULHL_UU_SN, + TILEPRO_OPC_MULHLA_SS, + TILEPRO_OPC_MULHLA_SS_SN, + TILEPRO_OPC_MULHLA_SU, + TILEPRO_OPC_MULHLA_SU_SN, + TILEPRO_OPC_MULHLA_US, + TILEPRO_OPC_MULHLA_US_SN, + TILEPRO_OPC_MULHLA_UU, + TILEPRO_OPC_MULHLA_UU_SN, + TILEPRO_OPC_MULHLSA_UU, + TILEPRO_OPC_MULHLSA_UU_SN, + TILEPRO_OPC_MULLL_SS, + TILEPRO_OPC_MULLL_SS_SN, + TILEPRO_OPC_MULLL_SU, + TILEPRO_OPC_MULLL_SU_SN, + TILEPRO_OPC_MULLL_UU, + TILEPRO_OPC_MULLL_UU_SN, + TILEPRO_OPC_MULLLA_SS, + TILEPRO_OPC_MULLLA_SS_SN, + TILEPRO_OPC_MULLLA_SU, + TILEPRO_OPC_MULLLA_SU_SN, + TILEPRO_OPC_MULLLA_UU, + TILEPRO_OPC_MULLLA_UU_SN, + TILEPRO_OPC_MULLLSA_UU, + TILEPRO_OPC_MULLLSA_UU_SN, + TILEPRO_OPC_MVNZ, + TILEPRO_OPC_MVNZ_SN, + TILEPRO_OPC_MVZ, + TILEPRO_OPC_MVZ_SN, + TILEPRO_OPC_MZ, + TILEPRO_OPC_MZ_SN, + TILEPRO_OPC_MZB, + TILEPRO_OPC_MZB_SN, + TILEPRO_OPC_MZH, + TILEPRO_OPC_MZH_SN, + TILEPRO_OPC_NAP, + TILEPRO_OPC_NOP, + TILEPRO_OPC_NOR, + TILEPRO_OPC_NOR_SN, + TILEPRO_OPC_OR, + TILEPRO_OPC_OR_SN, + TILEPRO_OPC_ORI, + TILEPRO_OPC_ORI_SN, + TILEPRO_OPC_PACKBS_U, + TILEPRO_OPC_PACKBS_U_SN, + TILEPRO_OPC_PACKHB, + TILEPRO_OPC_PACKHB_SN, + TILEPRO_OPC_PACKHS, + TILEPRO_OPC_PACKHS_SN, + TILEPRO_OPC_PACKLB, + TILEPRO_OPC_PACKLB_SN, + TILEPRO_OPC_PCNT, + TILEPRO_OPC_PCNT_SN, + TILEPRO_OPC_RL, + TILEPRO_OPC_RL_SN, + TILEPRO_OPC_RLI, + TILEPRO_OPC_RLI_SN, + TILEPRO_OPC_S1A, + TILEPRO_OPC_S1A_SN, + TILEPRO_OPC_S2A, + TILEPRO_OPC_S2A_SN, + TILEPRO_OPC_S3A, + TILEPRO_OPC_S3A_SN, + TILEPRO_OPC_SADAB_U, + TILEPRO_OPC_SADAB_U_SN, + TILEPRO_OPC_SADAH, + TILEPRO_OPC_SADAH_SN, + TILEPRO_OPC_SADAH_U, + TILEPRO_OPC_SADAH_U_SN, + TILEPRO_OPC_SADB_U, + TILEPRO_OPC_SADB_U_SN, + TILEPRO_OPC_SADH, + TILEPRO_OPC_SADH_SN, + TILEPRO_OPC_SADH_U, + TILEPRO_OPC_SADH_U_SN, + TILEPRO_OPC_SB, + TILEPRO_OPC_SBADD, + TILEPRO_OPC_SEQ, + TILEPRO_OPC_SEQ_SN, + TILEPRO_OPC_SEQB, + TILEPRO_OPC_SEQB_SN, + TILEPRO_OPC_SEQH, + TILEPRO_OPC_SEQH_SN, + TILEPRO_OPC_SEQI, + TILEPRO_OPC_SEQI_SN, + TILEPRO_OPC_SEQIB, + TILEPRO_OPC_SEQIB_SN, + TILEPRO_OPC_SEQIH, + TILEPRO_OPC_SEQIH_SN, + TILEPRO_OPC_SH, + TILEPRO_OPC_SHADD, + TILEPRO_OPC_SHL, + TILEPRO_OPC_SHL_SN, + TILEPRO_OPC_SHLB, + TILEPRO_OPC_SHLB_SN, + TILEPRO_OPC_SHLH, + TILEPRO_OPC_SHLH_SN, + TILEPRO_OPC_SHLI, + TILEPRO_OPC_SHLI_SN, + TILEPRO_OPC_SHLIB, + TILEPRO_OPC_SHLIB_SN, + TILEPRO_OPC_SHLIH, + TILEPRO_OPC_SHLIH_SN, + TILEPRO_OPC_SHR, + TILEPRO_OPC_SHR_SN, + TILEPRO_OPC_SHRB, + TILEPRO_OPC_SHRB_SN, + TILEPRO_OPC_SHRH, + TILEPRO_OPC_SHRH_SN, + TILEPRO_OPC_SHRI, + TILEPRO_OPC_SHRI_SN, + TILEPRO_OPC_SHRIB, + TILEPRO_OPC_SHRIB_SN, + TILEPRO_OPC_SHRIH, + TILEPRO_OPC_SHRIH_SN, + TILEPRO_OPC_SLT, + TILEPRO_OPC_SLT_SN, + TILEPRO_OPC_SLT_U, + TILEPRO_OPC_SLT_U_SN, + TILEPRO_OPC_SLTB, + TILEPRO_OPC_SLTB_SN, + TILEPRO_OPC_SLTB_U, + TILEPRO_OPC_SLTB_U_SN, + TILEPRO_OPC_SLTE, + TILEPRO_OPC_SLTE_SN, + TILEPRO_OPC_SLTE_U, + TILEPRO_OPC_SLTE_U_SN, + TILEPRO_OPC_SLTEB, + TILEPRO_OPC_SLTEB_SN, + TILEPRO_OPC_SLTEB_U, + TILEPRO_OPC_SLTEB_U_SN, + TILEPRO_OPC_SLTEH, + TILEPRO_OPC_SLTEH_SN, + TILEPRO_OPC_SLTEH_U, + TILEPRO_OPC_SLTEH_U_SN, + TILEPRO_OPC_SLTH, + TILEPRO_OPC_SLTH_SN, + TILEPRO_OPC_SLTH_U, + TILEPRO_OPC_SLTH_U_SN, + TILEPRO_OPC_SLTI, + TILEPRO_OPC_SLTI_SN, + TILEPRO_OPC_SLTI_U, + TILEPRO_OPC_SLTI_U_SN, + TILEPRO_OPC_SLTIB, + TILEPRO_OPC_SLTIB_SN, + TILEPRO_OPC_SLTIB_U, + TILEPRO_OPC_SLTIB_U_SN, + TILEPRO_OPC_SLTIH, + TILEPRO_OPC_SLTIH_SN, + TILEPRO_OPC_SLTIH_U, + TILEPRO_OPC_SLTIH_U_SN, + TILEPRO_OPC_SNE, + TILEPRO_OPC_SNE_SN, + TILEPRO_OPC_SNEB, + TILEPRO_OPC_SNEB_SN, + TILEPRO_OPC_SNEH, + TILEPRO_OPC_SNEH_SN, + TILEPRO_OPC_SRA, + TILEPRO_OPC_SRA_SN, + TILEPRO_OPC_SRAB, + TILEPRO_OPC_SRAB_SN, + TILEPRO_OPC_SRAH, + TILEPRO_OPC_SRAH_SN, + TILEPRO_OPC_SRAI, + TILEPRO_OPC_SRAI_SN, + TILEPRO_OPC_SRAIB, + TILEPRO_OPC_SRAIB_SN, + TILEPRO_OPC_SRAIH, + TILEPRO_OPC_SRAIH_SN, + TILEPRO_OPC_SUB, + TILEPRO_OPC_SUB_SN, + TILEPRO_OPC_SUBB, + TILEPRO_OPC_SUBB_SN, + TILEPRO_OPC_SUBBS_U, + TILEPRO_OPC_SUBBS_U_SN, + TILEPRO_OPC_SUBH, + TILEPRO_OPC_SUBH_SN, + TILEPRO_OPC_SUBHS, + TILEPRO_OPC_SUBHS_SN, + TILEPRO_OPC_SUBS, + TILEPRO_OPC_SUBS_SN, + TILEPRO_OPC_SW, + TILEPRO_OPC_SWADD, + TILEPRO_OPC_SWINT0, + TILEPRO_OPC_SWINT1, + TILEPRO_OPC_SWINT2, + TILEPRO_OPC_SWINT3, + TILEPRO_OPC_TBLIDXB0, + TILEPRO_OPC_TBLIDXB0_SN, + TILEPRO_OPC_TBLIDXB1, + TILEPRO_OPC_TBLIDXB1_SN, + TILEPRO_OPC_TBLIDXB2, + TILEPRO_OPC_TBLIDXB2_SN, + TILEPRO_OPC_TBLIDXB3, + TILEPRO_OPC_TBLIDXB3_SN, + TILEPRO_OPC_TNS, + TILEPRO_OPC_TNS_SN, + TILEPRO_OPC_WH64, + TILEPRO_OPC_XOR, + TILEPRO_OPC_XOR_SN, + TILEPRO_OPC_XORI, + TILEPRO_OPC_XORI_SN, + TILEPRO_OPC_NONE +} tilepro_mnemonic; + + + + +typedef enum +{ + TILEPRO_PIPELINE_X0, + TILEPRO_PIPELINE_X1, + TILEPRO_PIPELINE_Y0, + TILEPRO_PIPELINE_Y1, + TILEPRO_PIPELINE_Y2, +} tilepro_pipeline; + +#define tilepro_is_x_pipeline(p) ((int)(p) <= (int)TILEPRO_PIPELINE_X1) + +typedef enum +{ + TILEPRO_OP_TYPE_REGISTER, + TILEPRO_OP_TYPE_IMMEDIATE, + TILEPRO_OP_TYPE_ADDRESS, + TILEPRO_OP_TYPE_SPR +} tilepro_operand_type; + +struct tilepro_operand +{ + /* Is this operand a register, immediate or address? */ + tilepro_operand_type type; + + /* The default relocation type for this operand. */ + signed int default_reloc : 16; + + /* How many bits is this value? (used for range checking) */ + unsigned int num_bits : 5; + + /* Is the value signed? (used for range checking) */ + unsigned int is_signed : 1; + + /* Is this operand a source register? */ + unsigned int is_src_reg : 1; + + /* Is this operand written? (i.e. is it a destination register) */ + unsigned int is_dest_reg : 1; + + /* Is this operand PC-relative? */ + unsigned int is_pc_relative : 1; + + /* By how many bits do we right shift the value before inserting? */ + unsigned int rightshift : 2; + + /* Return the bits for this operand to be ORed into an existing bundle. */ + tilepro_bundle_bits (*insert) (int op); + + /* Extract this operand and return it. */ + unsigned int (*extract) (tilepro_bundle_bits bundle); +}; + + +extern const struct tilepro_operand tilepro_operands[]; + +/* One finite-state machine per pipe for rapid instruction decoding. */ +extern const unsigned short * const +tilepro_bundle_decoder_fsms[TILEPRO_NUM_PIPELINE_ENCODINGS]; + + +struct tilepro_opcode +{ + /* The opcode mnemonic, e.g. "add" */ + const char *name; + + /* The enum value for this mnemonic. */ + tilepro_mnemonic mnemonic; + + /* A bit mask of which of the five pipes this instruction + is compatible with: + X0 0x01 + X1 0x02 + Y0 0x04 + Y1 0x08 + Y2 0x10 */ + unsigned char pipes; + + /* How many operands are there? */ + unsigned char num_operands; + + /* Which register does this write implicitly, or TREG_ZERO if none? */ + unsigned char implicitly_written_register; + + /* Can this be bundled with other instructions (almost always true). */ + unsigned char can_bundle; + + /* The description of the operands. Each of these is an + * index into the tilepro_operands[] table. */ + unsigned char operands[TILEPRO_NUM_PIPELINE_ENCODINGS][TILEPRO_MAX_OPERANDS]; + +}; + +extern const struct tilepro_opcode tilepro_opcodes[]; + + +/* Used for non-textual disassembly into structs. */ +struct tilepro_decoded_instruction +{ + const struct tilepro_opcode *opcode; + const struct tilepro_operand *operands[TILEPRO_MAX_OPERANDS]; + int operand_values[TILEPRO_MAX_OPERANDS]; +}; + + +/* Disassemble a bundle into a struct for machine processing. */ +extern int parse_insn_tilepro(tilepro_bundle_bits bits, + unsigned int pc, + struct tilepro_decoded_instruction + decoded[TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE]); + + +/* Given a set of bundle bits and a specific pipe, returns which + * instruction the bundle contains in that pipe. + */ +extern const struct tilepro_opcode * +find_opcode(tilepro_bundle_bits bits, tilepro_pipeline pipe); + + + +#endif /* opcode_tilepro_h */ diff --git a/arch/tile/include/asm/tile-desc_64.h b/arch/tile/include/asm/tile-desc_64.h new file mode 100644 index 00000000000..1819efcba54 --- /dev/null +++ b/arch/tile/include/asm/tile-desc_64.h @@ -0,0 +1,483 @@ +/* TILE-Gx opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +#ifndef opcode_tile_h +#define opcode_tile_h + +#include <arch/opcode.h> + + +enum +{ + TILEGX_MAX_OPERANDS = 4 /* bfexts */ +}; + +typedef enum +{ + TILEGX_OPC_BPT, + TILEGX_OPC_INFO, + TILEGX_OPC_INFOL, + TILEGX_OPC_MOVE, + TILEGX_OPC_MOVEI, + TILEGX_OPC_MOVELI, + TILEGX_OPC_PREFETCH, + TILEGX_OPC_PREFETCH_ADD_L1, + TILEGX_OPC_PREFETCH_ADD_L1_FAULT, + TILEGX_OPC_PREFETCH_ADD_L2, + TILEGX_OPC_PREFETCH_ADD_L2_FAULT, + TILEGX_OPC_PREFETCH_ADD_L3, + TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + TILEGX_OPC_PREFETCH_L1, + TILEGX_OPC_PREFETCH_L1_FAULT, + TILEGX_OPC_PREFETCH_L2, + TILEGX_OPC_PREFETCH_L2_FAULT, + TILEGX_OPC_PREFETCH_L3, + TILEGX_OPC_PREFETCH_L3_FAULT, + TILEGX_OPC_RAISE, + TILEGX_OPC_ADD, + TILEGX_OPC_ADDI, + TILEGX_OPC_ADDLI, + TILEGX_OPC_ADDX, + TILEGX_OPC_ADDXI, + TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXSC, + TILEGX_OPC_AND, + TILEGX_OPC_ANDI, + TILEGX_OPC_BEQZ, + TILEGX_OPC_BEQZT, + TILEGX_OPC_BFEXTS, + TILEGX_OPC_BFEXTU, + TILEGX_OPC_BFINS, + TILEGX_OPC_BGEZ, + TILEGX_OPC_BGEZT, + TILEGX_OPC_BGTZ, + TILEGX_OPC_BGTZT, + TILEGX_OPC_BLBC, + TILEGX_OPC_BLBCT, + TILEGX_OPC_BLBS, + TILEGX_OPC_BLBST, + TILEGX_OPC_BLEZ, + TILEGX_OPC_BLEZT, + TILEGX_OPC_BLTZ, + TILEGX_OPC_BLTZT, + TILEGX_OPC_BNEZ, + TILEGX_OPC_BNEZT, + TILEGX_OPC_CLZ, + TILEGX_OPC_CMOVEQZ, + TILEGX_OPC_CMOVNEZ, + TILEGX_OPC_CMPEQ, + TILEGX_OPC_CMPEQI, + TILEGX_OPC_CMPEXCH, + TILEGX_OPC_CMPEXCH4, + TILEGX_OPC_CMPLES, + TILEGX_OPC_CMPLEU, + TILEGX_OPC_CMPLTS, + TILEGX_OPC_CMPLTSI, + TILEGX_OPC_CMPLTU, + TILEGX_OPC_CMPLTUI, + TILEGX_OPC_CMPNE, + TILEGX_OPC_CMUL, + TILEGX_OPC_CMULA, + TILEGX_OPC_CMULAF, + TILEGX_OPC_CMULF, + TILEGX_OPC_CMULFR, + TILEGX_OPC_CMULH, + TILEGX_OPC_CMULHR, + TILEGX_OPC_CRC32_32, + TILEGX_OPC_CRC32_8, + TILEGX_OPC_CTZ, + TILEGX_OPC_DBLALIGN, + TILEGX_OPC_DBLALIGN2, + TILEGX_OPC_DBLALIGN4, + TILEGX_OPC_DBLALIGN6, + TILEGX_OPC_DRAIN, + TILEGX_OPC_DTLBPR, + TILEGX_OPC_EXCH, + TILEGX_OPC_EXCH4, + TILEGX_OPC_FDOUBLE_ADD_FLAGS, + TILEGX_OPC_FDOUBLE_ADDSUB, + TILEGX_OPC_FDOUBLE_MUL_FLAGS, + TILEGX_OPC_FDOUBLE_PACK1, + TILEGX_OPC_FDOUBLE_PACK2, + TILEGX_OPC_FDOUBLE_SUB_FLAGS, + TILEGX_OPC_FDOUBLE_UNPACK_MAX, + TILEGX_OPC_FDOUBLE_UNPACK_MIN, + TILEGX_OPC_FETCHADD, + TILEGX_OPC_FETCHADD4, + TILEGX_OPC_FETCHADDGEZ, + TILEGX_OPC_FETCHADDGEZ4, + TILEGX_OPC_FETCHAND, + TILEGX_OPC_FETCHAND4, + TILEGX_OPC_FETCHOR, + TILEGX_OPC_FETCHOR4, + TILEGX_OPC_FINV, + TILEGX_OPC_FLUSH, + TILEGX_OPC_FLUSHWB, + TILEGX_OPC_FNOP, + TILEGX_OPC_FSINGLE_ADD1, + TILEGX_OPC_FSINGLE_ADDSUB2, + TILEGX_OPC_FSINGLE_MUL1, + TILEGX_OPC_FSINGLE_MUL2, + TILEGX_OPC_FSINGLE_PACK1, + TILEGX_OPC_FSINGLE_PACK2, + TILEGX_OPC_FSINGLE_SUB1, + TILEGX_OPC_ICOH, + TILEGX_OPC_ILL, + TILEGX_OPC_INV, + TILEGX_OPC_IRET, + TILEGX_OPC_J, + TILEGX_OPC_JAL, + TILEGX_OPC_JALR, + TILEGX_OPC_JALRP, + TILEGX_OPC_JR, + TILEGX_OPC_JRP, + TILEGX_OPC_LD, + TILEGX_OPC_LD1S, + TILEGX_OPC_LD1S_ADD, + TILEGX_OPC_LD1U, + TILEGX_OPC_LD1U_ADD, + TILEGX_OPC_LD2S, + TILEGX_OPC_LD2S_ADD, + TILEGX_OPC_LD2U, + TILEGX_OPC_LD2U_ADD, + TILEGX_OPC_LD4S, + TILEGX_OPC_LD4S_ADD, + TILEGX_OPC_LD4U, + TILEGX_OPC_LD4U_ADD, + TILEGX_OPC_LD_ADD, + TILEGX_OPC_LDNA, + TILEGX_OPC_LDNA_ADD, + TILEGX_OPC_LDNT, + TILEGX_OPC_LDNT1S, + TILEGX_OPC_LDNT1S_ADD, + TILEGX_OPC_LDNT1U, + TILEGX_OPC_LDNT1U_ADD, + TILEGX_OPC_LDNT2S, + TILEGX_OPC_LDNT2S_ADD, + TILEGX_OPC_LDNT2U, + TILEGX_OPC_LDNT2U_ADD, + TILEGX_OPC_LDNT4S, + TILEGX_OPC_LDNT4S_ADD, + TILEGX_OPC_LDNT4U, + TILEGX_OPC_LDNT4U_ADD, + TILEGX_OPC_LDNT_ADD, + TILEGX_OPC_LNK, + TILEGX_OPC_MF, + TILEGX_OPC_MFSPR, + TILEGX_OPC_MM, + TILEGX_OPC_MNZ, + TILEGX_OPC_MTSPR, + TILEGX_OPC_MUL_HS_HS, + TILEGX_OPC_MUL_HS_HU, + TILEGX_OPC_MUL_HS_LS, + TILEGX_OPC_MUL_HS_LU, + TILEGX_OPC_MUL_HU_HU, + TILEGX_OPC_MUL_HU_LS, + TILEGX_OPC_MUL_HU_LU, + TILEGX_OPC_MUL_LS_LS, + TILEGX_OPC_MUL_LS_LU, + TILEGX_OPC_MUL_LU_LU, + TILEGX_OPC_MULA_HS_HS, + TILEGX_OPC_MULA_HS_HU, + TILEGX_OPC_MULA_HS_LS, + TILEGX_OPC_MULA_HS_LU, + TILEGX_OPC_MULA_HU_HU, + TILEGX_OPC_MULA_HU_LS, + TILEGX_OPC_MULA_HU_LU, + TILEGX_OPC_MULA_LS_LS, + TILEGX_OPC_MULA_LS_LU, + TILEGX_OPC_MULA_LU_LU, + TILEGX_OPC_MULAX, + TILEGX_OPC_MULX, + TILEGX_OPC_MZ, + TILEGX_OPC_NAP, + TILEGX_OPC_NOP, + TILEGX_OPC_NOR, + TILEGX_OPC_OR, + TILEGX_OPC_ORI, + TILEGX_OPC_PCNT, + TILEGX_OPC_REVBITS, + TILEGX_OPC_REVBYTES, + TILEGX_OPC_ROTL, + TILEGX_OPC_ROTLI, + TILEGX_OPC_SHL, + TILEGX_OPC_SHL16INSLI, + TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADDX, + TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADDX, + TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADDX, + TILEGX_OPC_SHLI, + TILEGX_OPC_SHLX, + TILEGX_OPC_SHLXI, + TILEGX_OPC_SHRS, + TILEGX_OPC_SHRSI, + TILEGX_OPC_SHRU, + TILEGX_OPC_SHRUI, + TILEGX_OPC_SHRUX, + TILEGX_OPC_SHRUXI, + TILEGX_OPC_SHUFFLEBYTES, + TILEGX_OPC_ST, + TILEGX_OPC_ST1, + TILEGX_OPC_ST1_ADD, + TILEGX_OPC_ST2, + TILEGX_OPC_ST2_ADD, + TILEGX_OPC_ST4, + TILEGX_OPC_ST4_ADD, + TILEGX_OPC_ST_ADD, + TILEGX_OPC_STNT, + TILEGX_OPC_STNT1, + TILEGX_OPC_STNT1_ADD, + TILEGX_OPC_STNT2, + TILEGX_OPC_STNT2_ADD, + TILEGX_OPC_STNT4, + TILEGX_OPC_STNT4_ADD, + TILEGX_OPC_STNT_ADD, + TILEGX_OPC_SUB, + TILEGX_OPC_SUBX, + TILEGX_OPC_SUBXSC, + TILEGX_OPC_SWINT0, + TILEGX_OPC_SWINT1, + TILEGX_OPC_SWINT2, + TILEGX_OPC_SWINT3, + TILEGX_OPC_TBLIDXB0, + TILEGX_OPC_TBLIDXB1, + TILEGX_OPC_TBLIDXB2, + TILEGX_OPC_TBLIDXB3, + TILEGX_OPC_V1ADD, + TILEGX_OPC_V1ADDI, + TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADIFFU, + TILEGX_OPC_V1AVGU, + TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPEQI, + TILEGX_OPC_V1CMPLES, + TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTSI, + TILEGX_OPC_V1CMPLTU, + TILEGX_OPC_V1CMPLTUI, + TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1DDOTPU, + TILEGX_OPC_V1DDOTPUA, + TILEGX_OPC_V1DDOTPUS, + TILEGX_OPC_V1DDOTPUSA, + TILEGX_OPC_V1DOTP, + TILEGX_OPC_V1DOTPA, + TILEGX_OPC_V1DOTPU, + TILEGX_OPC_V1DOTPUA, + TILEGX_OPC_V1DOTPUS, + TILEGX_OPC_V1DOTPUSA, + TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_L, + TILEGX_OPC_V1MAXU, + TILEGX_OPC_V1MAXUI, + TILEGX_OPC_V1MINU, + TILEGX_OPC_V1MINUI, + TILEGX_OPC_V1MNZ, + TILEGX_OPC_V1MULTU, + TILEGX_OPC_V1MULU, + TILEGX_OPC_V1MULUS, + TILEGX_OPC_V1MZ, + TILEGX_OPC_V1SADAU, + TILEGX_OPC_V1SADU, + TILEGX_OPC_V1SHL, + TILEGX_OPC_V1SHLI, + TILEGX_OPC_V1SHRS, + TILEGX_OPC_V1SHRSI, + TILEGX_OPC_V1SHRU, + TILEGX_OPC_V1SHRUI, + TILEGX_OPC_V1SUB, + TILEGX_OPC_V1SUBUC, + TILEGX_OPC_V2ADD, + TILEGX_OPC_V2ADDI, + TILEGX_OPC_V2ADDSC, + TILEGX_OPC_V2ADIFFS, + TILEGX_OPC_V2AVGS, + TILEGX_OPC_V2CMPEQ, + TILEGX_OPC_V2CMPEQI, + TILEGX_OPC_V2CMPLES, + TILEGX_OPC_V2CMPLEU, + TILEGX_OPC_V2CMPLTS, + TILEGX_OPC_V2CMPLTSI, + TILEGX_OPC_V2CMPLTU, + TILEGX_OPC_V2CMPLTUI, + TILEGX_OPC_V2CMPNE, + TILEGX_OPC_V2DOTP, + TILEGX_OPC_V2DOTPA, + TILEGX_OPC_V2INT_H, + TILEGX_OPC_V2INT_L, + TILEGX_OPC_V2MAXS, + TILEGX_OPC_V2MAXSI, + TILEGX_OPC_V2MINS, + TILEGX_OPC_V2MINSI, + TILEGX_OPC_V2MNZ, + TILEGX_OPC_V2MULFSC, + TILEGX_OPC_V2MULS, + TILEGX_OPC_V2MULTS, + TILEGX_OPC_V2MZ, + TILEGX_OPC_V2PACKH, + TILEGX_OPC_V2PACKL, + TILEGX_OPC_V2PACKUC, + TILEGX_OPC_V2SADAS, + TILEGX_OPC_V2SADAU, + TILEGX_OPC_V2SADS, + TILEGX_OPC_V2SADU, + TILEGX_OPC_V2SHL, + TILEGX_OPC_V2SHLI, + TILEGX_OPC_V2SHLSC, + TILEGX_OPC_V2SHRS, + TILEGX_OPC_V2SHRSI, + TILEGX_OPC_V2SHRU, + TILEGX_OPC_V2SHRUI, + TILEGX_OPC_V2SUB, + TILEGX_OPC_V2SUBSC, + TILEGX_OPC_V4ADD, + TILEGX_OPC_V4ADDSC, + TILEGX_OPC_V4INT_H, + TILEGX_OPC_V4INT_L, + TILEGX_OPC_V4PACKSC, + TILEGX_OPC_V4SHL, + TILEGX_OPC_V4SHLSC, + TILEGX_OPC_V4SHRS, + TILEGX_OPC_V4SHRU, + TILEGX_OPC_V4SUB, + TILEGX_OPC_V4SUBSC, + TILEGX_OPC_WH64, + TILEGX_OPC_XOR, + TILEGX_OPC_XORI, + TILEGX_OPC_NONE +} tilegx_mnemonic; + + + +typedef enum +{ + TILEGX_PIPELINE_X0, + TILEGX_PIPELINE_X1, + TILEGX_PIPELINE_Y0, + TILEGX_PIPELINE_Y1, + TILEGX_PIPELINE_Y2, +} tilegx_pipeline; + +#define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1) + +typedef enum +{ + TILEGX_OP_TYPE_REGISTER, + TILEGX_OP_TYPE_IMMEDIATE, + TILEGX_OP_TYPE_ADDRESS, + TILEGX_OP_TYPE_SPR +} tilegx_operand_type; + +struct tilegx_operand +{ + /* Is this operand a register, immediate or address? */ + tilegx_operand_type type; + + /* The default relocation type for this operand. */ + signed int default_reloc : 16; + + /* How many bits is this value? (used for range checking) */ + unsigned int num_bits : 5; + + /* Is the value signed? (used for range checking) */ + unsigned int is_signed : 1; + + /* Is this operand a source register? */ + unsigned int is_src_reg : 1; + + /* Is this operand written? (i.e. is it a destination register) */ + unsigned int is_dest_reg : 1; + + /* Is this operand PC-relative? */ + unsigned int is_pc_relative : 1; + + /* By how many bits do we right shift the value before inserting? */ + unsigned int rightshift : 2; + + /* Return the bits for this operand to be ORed into an existing bundle. */ + tilegx_bundle_bits (*insert) (int op); + + /* Extract this operand and return it. */ + unsigned int (*extract) (tilegx_bundle_bits bundle); +}; + + +extern const struct tilegx_operand tilegx_operands[]; + +/* One finite-state machine per pipe for rapid instruction decoding. */ +extern const unsigned short * const +tilegx_bundle_decoder_fsms[TILEGX_NUM_PIPELINE_ENCODINGS]; + + +struct tilegx_opcode +{ + /* The opcode mnemonic, e.g. "add" */ + const char *name; + + /* The enum value for this mnemonic. */ + tilegx_mnemonic mnemonic; + + /* A bit mask of which of the five pipes this instruction + is compatible with: + X0 0x01 + X1 0x02 + Y0 0x04 + Y1 0x08 + Y2 0x10 */ + unsigned char pipes; + + /* How many operands are there? */ + unsigned char num_operands; + + /* Which register does this write implicitly, or TREG_ZERO if none? */ + unsigned char implicitly_written_register; + + /* Can this be bundled with other instructions (almost always true). */ + unsigned char can_bundle; + + /* The description of the operands. Each of these is an + * index into the tilegx_operands[] table. */ + unsigned char operands[TILEGX_NUM_PIPELINE_ENCODINGS][TILEGX_MAX_OPERANDS]; + +}; + +extern const struct tilegx_opcode tilegx_opcodes[]; + +/* Used for non-textual disassembly into structs. */ +struct tilegx_decoded_instruction +{ + const struct tilegx_opcode *opcode; + const struct tilegx_operand *operands[TILEGX_MAX_OPERANDS]; + long long operand_values[TILEGX_MAX_OPERANDS]; +}; + + +/* Disassemble a bundle into a struct for machine processing. */ +extern int parse_insn_tilegx(tilegx_bundle_bits bits, + unsigned long long pc, + struct tilegx_decoded_instruction + decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]); + + + +#endif /* opcode_tilegx_h */ diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h index 29921f0b86d..dc987d53e2a 100644 --- a/arch/tile/include/asm/timex.h +++ b/arch/tile/include/asm/timex.h @@ -29,11 +29,13 @@ typedef unsigned long long cycles_t; #if CHIP_HAS_SPLIT_CYCLE() cycles_t get_cycles(void); +#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW) #else static inline cycles_t get_cycles(void) { return __insn_mfspr(SPR_CYCLE); } +#define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */ #endif cycles_t get_clock_rate(void); diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h index 96199d214fb..dcf91b25a1e 100644 --- a/arch/tile/include/asm/tlbflush.h +++ b/arch/tile/include/asm/tlbflush.h @@ -38,16 +38,11 @@ DECLARE_PER_CPU(int, current_asid); /* The hypervisor tells us what ASIDs are available to us. */ extern int min_asid, max_asid; -static inline unsigned long hv_page_size(const struct vm_area_struct *vma) -{ - return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE; -} - /* Pass as vma pointer for non-executable mapping, if no vma available. */ -#define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL) +#define FLUSH_NONEXEC ((struct vm_area_struct *)-1UL) /* Flush a single user page on this cpu. */ -static inline void local_flush_tlb_page(const struct vm_area_struct *vma, +static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr, unsigned long page_size) { @@ -60,7 +55,7 @@ static inline void local_flush_tlb_page(const struct vm_area_struct *vma, } /* Flush range of user pages on this cpu. */ -static inline void local_flush_tlb_pages(const struct vm_area_struct *vma, +static inline void local_flush_tlb_pages(struct vm_area_struct *vma, unsigned long addr, unsigned long page_size, unsigned long len) @@ -117,10 +112,10 @@ extern void flush_tlb_all(void); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void flush_tlb_current_task(void); extern void flush_tlb_mm(struct mm_struct *); -extern void flush_tlb_page(const struct vm_area_struct *, unsigned long); -extern void flush_tlb_page_mm(const struct vm_area_struct *, +extern void flush_tlb_page(struct vm_area_struct *, unsigned long); +extern void flush_tlb_page_mm(struct vm_area_struct *, struct mm_struct *, unsigned long); -extern void flush_tlb_range(const struct vm_area_struct *, +extern void flush_tlb_range(struct vm_area_struct *, unsigned long start, unsigned long end); #define flush_tlb() flush_tlb_current_task() diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h index 343172d422a..93831184423 100644 --- a/arch/tile/include/asm/topology.h +++ b/arch/tile/include/asm/topology.h @@ -44,27 +44,6 @@ static inline const struct cpumask *cpumask_of_node(int node) /* For now, use numa node -1 for global allocation. */ #define pcibus_to_node(bus) ((void)(bus), -1) -/* sched_domains SD_NODE_INIT for TILE architecture */ -#define SD_NODE_INIT (struct sched_domain) { \ - .min_interval = 8, \ - .max_interval = 32, \ - .busy_factor = 32, \ - .imbalance_pct = 125, \ - .cache_nice_tries = 1, \ - .busy_idx = 3, \ - .idle_idx = 1, \ - .newidle_idx = 2, \ - .wake_idx = 1, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_EXEC \ - | SD_BALANCE_FORK \ - | SD_WAKE_AFFINE \ - | SD_SERIALIZE, \ - .last_balance = jiffies, \ - .balance_interval = 1, \ -} - /* By definition, we create nodes based on online memory. */ #define node_has_online_mem(nid) 1 @@ -77,9 +56,6 @@ static inline const struct cpumask *cpumask_of_node(int node) #define topology_core_id(cpu) (cpu) #define topology_core_cpumask(cpu) ((void)(cpu), cpu_online_mask) #define topology_thread_cpumask(cpu) cpumask_of(cpu) - -/* indicates that pointers to the topology struct cpumask maps are valid */ -#define arch_provides_topology_pointers yes #endif #endif /* _ASM_TILE_TOPOLOGY_H */ diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index d06e35f5720..4b99a1c3aab 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h @@ -15,10 +15,15 @@ #ifndef _ASM_TILE_TRAPS_H #define _ASM_TILE_TRAPS_H +#ifndef __ASSEMBLY__ +#include <arch/chip.h> + /* mm/fault.c */ void do_page_fault(struct pt_regs *, int fault_num, unsigned long address, unsigned long write); +#if CHIP_HAS_TILE_DMA() void do_async_page_fault(struct pt_regs *); +#endif #ifndef __tilegx__ /* @@ -60,7 +65,21 @@ void do_breakpoint(struct pt_regs *, int fault_num); #ifdef __tilegx__ +/* kernel/single_step.c */ void gx_singlestep_handle(struct pt_regs *, int fault_num); + +/* kernel/intvec_64.S */ +void fill_ra_stack(void); + +/* Handle unalign data fixup. */ +extern void do_unaligned(struct pt_regs *regs, int vecnum); +#endif + +#endif /* __ASSEMBLY__ */ + +#ifdef __tilegx__ +/* 128 byte JIT per unalign fixup. */ +#define UNALIGN_JIT_SHIFT 7 #endif -#endif /* _ASM_TILE_SYSCALLS_H */ +#endif /* _ASM_TILE_TRAPS_H */ diff --git a/arch/tile/include/asm/types.h b/arch/tile/include/asm/types.h deleted file mode 100644 index b9e79bc580d..00000000000 --- a/arch/tile/include/asm/types.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/types.h> diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index ef34d2caa5b..b6cde3209b9 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -114,45 +114,79 @@ struct exception_table_entry { extern int fixup_exception(struct pt_regs *regs); /* - * We return the __get_user_N function results in a structure, - * thus in r0 and r1. If "err" is zero, "val" is the result - * of the read; otherwise, "err" is -EFAULT. - * - * We rarely need 8-byte values on a 32-bit architecture, but - * we size the structure to accommodate. In practice, for the - * the smaller reads, we can zero the high word for free, and - * the caller will ignore it by virtue of casting anyway. + * Support macros for __get_user(). + * + * Implementation note: The "case 8" logic of casting to the type of + * the result of subtracting the value from itself is basically a way + * of keeping all integer types the same, but casting any pointers to + * ptrdiff_t, i.e. also an integer type. This way there are no + * questionable casts seen by the compiler on an ILP32 platform. + * + * Note that __get_user() and __put_user() assume proper alignment. */ -struct __get_user { - unsigned long long val; - int err; -}; -/* - * FIXME: we should express these as inline extended assembler, since - * they're fundamentally just a variable dereference and some - * supporting exception_table gunk. Note that (a la i386) we can - * extend the copy_to_user and copy_from_user routines to call into - * such extended assembler routines, though we will have to use a - * different return code in that case (1, 2, or 4, rather than -EFAULT). - */ -extern struct __get_user __get_user_1(const void __user *); -extern struct __get_user __get_user_2(const void __user *); -extern struct __get_user __get_user_4(const void __user *); -extern struct __get_user __get_user_8(const void __user *); -extern int __put_user_1(long, void __user *); -extern int __put_user_2(long, void __user *); -extern int __put_user_4(long, void __user *); -extern int __put_user_8(long long, void __user *); - -/* Unimplemented routines to cause linker failures */ -extern struct __get_user __get_user_bad(void); -extern int __put_user_bad(void); +#ifdef __LP64__ +#define _ASM_PTR ".quad" +#define _ASM_ALIGN ".align 8" +#else +#define _ASM_PTR ".long" +#define _ASM_ALIGN ".align 4" +#endif + +#define __get_user_asm(OP, x, ptr, ret) \ + asm volatile("1: {" #OP " %1, %2; movei %0, 0 }\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "0: { movei %1, 0; movei %0, %3 }\n" \ + "j 9f\n" \ + ".section __ex_table,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR " 1b, 0b\n" \ + ".popsection\n" \ + "9:" \ + : "=r" (ret), "=r" (x) \ + : "r" (ptr), "i" (-EFAULT)) + +#ifdef __tilegx__ +#define __get_user_1(x, ptr, ret) __get_user_asm(ld1u, x, ptr, ret) +#define __get_user_2(x, ptr, ret) __get_user_asm(ld2u, x, ptr, ret) +#define __get_user_4(x, ptr, ret) __get_user_asm(ld4s, x, ptr, ret) +#define __get_user_8(x, ptr, ret) __get_user_asm(ld, x, ptr, ret) +#else +#define __get_user_1(x, ptr, ret) __get_user_asm(lb_u, x, ptr, ret) +#define __get_user_2(x, ptr, ret) __get_user_asm(lh_u, x, ptr, ret) +#define __get_user_4(x, ptr, ret) __get_user_asm(lw, x, ptr, ret) +#ifdef __LITTLE_ENDIAN +#define __lo32(a, b) a +#define __hi32(a, b) b +#else +#define __lo32(a, b) b +#define __hi32(a, b) a +#endif +#define __get_user_8(x, ptr, ret) \ + ({ \ + unsigned int __a, __b; \ + asm volatile("1: { lw %1, %3; addi %2, %3, 4 }\n" \ + "2: { lw %2, %2; movei %0, 0 }\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "0: { movei %1, 0; movei %2, 0 }\n" \ + "{ movei %0, %4; j 9f }\n" \ + ".section __ex_table,\"a\"\n" \ + ".align 4\n" \ + ".word 1b, 0b\n" \ + ".word 2b, 0b\n" \ + ".popsection\n" \ + "9:" \ + : "=r" (ret), "=r" (__a), "=&r" (__b) \ + : "r" (ptr), "i" (-EFAULT)); \ + (x) = (__typeof(x))(__typeof((x)-(x))) \ + (((u64)__hi32(__a, __b) << 32) | \ + __lo32(__a, __b)); \ + }) +#endif + +extern int __get_user_bad(void) + __attribute__((warning("sizeof __get_user argument not 1, 2, 4 or 8"))); -/* - * Careful: we have to cast the result to the type of the pointer - * for sign reasons. - */ /** * __get_user: - Get a simple variable from user space, with less checking. * @x: Variable to store result. @@ -174,30 +208,64 @@ extern int __put_user_bad(void); * function. */ #define __get_user(x, ptr) \ -({ struct __get_user __ret; \ - __typeof__(*(ptr)) const __user *__gu_addr = (ptr); \ - __chk_user_ptr(__gu_addr); \ - switch (sizeof(*(__gu_addr))) { \ - case 1: \ - __ret = __get_user_1(__gu_addr); \ - break; \ - case 2: \ - __ret = __get_user_2(__gu_addr); \ - break; \ - case 4: \ - __ret = __get_user_4(__gu_addr); \ - break; \ - case 8: \ - __ret = __get_user_8(__gu_addr); \ - break; \ - default: \ - __ret = __get_user_bad(); \ - break; \ - } \ - (x) = (__typeof__(*__gu_addr)) (__typeof__(*__gu_addr - *__gu_addr)) \ - __ret.val; \ - __ret.err; \ -}) + ({ \ + int __ret; \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: __get_user_1(x, ptr, __ret); break; \ + case 2: __get_user_2(x, ptr, __ret); break; \ + case 4: __get_user_4(x, ptr, __ret); break; \ + case 8: __get_user_8(x, ptr, __ret); break; \ + default: __ret = __get_user_bad(); break; \ + } \ + __ret; \ + }) + +/* Support macros for __put_user(). */ + +#define __put_user_asm(OP, x, ptr, ret) \ + asm volatile("1: {" #OP " %1, %2; movei %0, 0 }\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "0: { movei %0, %3; j 9f }\n" \ + ".section __ex_table,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR " 1b, 0b\n" \ + ".popsection\n" \ + "9:" \ + : "=r" (ret) \ + : "r" (ptr), "r" (x), "i" (-EFAULT)) + +#ifdef __tilegx__ +#define __put_user_1(x, ptr, ret) __put_user_asm(st1, x, ptr, ret) +#define __put_user_2(x, ptr, ret) __put_user_asm(st2, x, ptr, ret) +#define __put_user_4(x, ptr, ret) __put_user_asm(st4, x, ptr, ret) +#define __put_user_8(x, ptr, ret) __put_user_asm(st, x, ptr, ret) +#else +#define __put_user_1(x, ptr, ret) __put_user_asm(sb, x, ptr, ret) +#define __put_user_2(x, ptr, ret) __put_user_asm(sh, x, ptr, ret) +#define __put_user_4(x, ptr, ret) __put_user_asm(sw, x, ptr, ret) +#define __put_user_8(x, ptr, ret) \ + ({ \ + u64 __x = (__typeof((x)-(x)))(x); \ + int __lo = (int) __x, __hi = (int) (__x >> 32); \ + asm volatile("1: { sw %1, %2; addi %0, %1, 4 }\n" \ + "2: { sw %0, %3; movei %0, 0 }\n" \ + ".pushsection .fixup,\"ax\"\n" \ + "0: { movei %0, %4; j 9f }\n" \ + ".section __ex_table,\"a\"\n" \ + ".align 4\n" \ + ".word 1b, 0b\n" \ + ".word 2b, 0b\n" \ + ".popsection\n" \ + "9:" \ + : "=&r" (ret) \ + : "r" (ptr), "r" (__lo32(__lo, __hi)), \ + "r" (__hi32(__lo, __hi)), "i" (-EFAULT)); \ + }) +#endif + +extern int __put_user_bad(void) + __attribute__((warning("sizeof __put_user argument not 1, 2, 4 or 8"))); /** * __put_user: - Write a simple value into user space, with less checking. @@ -217,39 +285,19 @@ extern int __put_user_bad(void); * function. * * Returns zero on success, or -EFAULT on error. - * - * Implementation note: The "case 8" logic of casting to the type of - * the result of subtracting the value from itself is basically a way - * of keeping all integer types the same, but casting any pointers to - * ptrdiff_t, i.e. also an integer type. This way there are no - * questionable casts seen by the compiler on an ILP32 platform. */ #define __put_user(x, ptr) \ ({ \ - int __pu_err = 0; \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - typeof(*__pu_addr) __pu_val = (x); \ - __chk_user_ptr(__pu_addr); \ - switch (sizeof(__pu_val)) { \ - case 1: \ - __pu_err = __put_user_1((long)__pu_val, __pu_addr); \ - break; \ - case 2: \ - __pu_err = __put_user_2((long)__pu_val, __pu_addr); \ - break; \ - case 4: \ - __pu_err = __put_user_4((long)__pu_val, __pu_addr); \ - break; \ - case 8: \ - __pu_err = \ - __put_user_8((__typeof__(__pu_val - __pu_val))__pu_val,\ - __pu_addr); \ - break; \ - default: \ - __pu_err = __put_user_bad(); \ - break; \ + int __ret; \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: __put_user_1(x, ptr, __ret); break; \ + case 2: __put_user_2(x, ptr, __ret); break; \ + case 4: __put_user_4(x, ptr, __ret); break; \ + case 8: __put_user_8(x, ptr, __ret); break; \ + default: __ret = __put_user_bad(); break; \ } \ - __pu_err; \ + __ret; \ }) /* @@ -353,7 +401,12 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } -#ifdef CONFIG_DEBUG_COPY_FROM_USER +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS +/* + * There are still unprovable places in the generic code as of 2.6.34, so this + * option is not really compatible with -Werror, which is more useful in + * general. + */ extern void copy_from_user_overflow(void) __compiletime_warning("copy_from_user() size is not provably correct"); @@ -378,7 +431,7 @@ static inline unsigned long __must_check copy_from_user(void *to, /** * __copy_in_user() - copy data within user space, with less checking. * @to: Destination address, in user space. - * @from: Source address, in kernel space. + * @from: Source address, in user space. * @n: Number of bytes to copy. * * Context: User context only. This function may sleep. @@ -395,7 +448,7 @@ extern unsigned long __copy_in_user_inatomic( static inline unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n) { - might_sleep(); + might_fault(); return __copy_in_user_inatomic(to, from, n); } @@ -520,37 +573,6 @@ static inline unsigned long __must_check flush_user( } /** - * inv_user: - Invalidate a block of memory in user space from cache. - * @mem: Destination address, in user space. - * @len: Number of bytes to invalidate. - * - * Returns number of bytes that could not be invalidated. - * On success, this will be zero. - * - * Note that on Tile64, the "inv" operation is in fact a - * "flush and invalidate", so cache write-backs will occur prior - * to the cache being marked invalid. - */ -extern unsigned long inv_user_asm(void __user *mem, unsigned long len); -static inline unsigned long __must_check __inv_user( - void __user *mem, unsigned long len) -{ - int retval; - - might_fault(); - retval = inv_user_asm(mem, len); - mb_incoherent(); - return retval; -} -static inline unsigned long __must_check inv_user( - void __user *mem, unsigned long len) -{ - if (access_ok(VERIFY_WRITE, mem, len)) - return __inv_user(mem, len); - return len; -} - -/** * finv_user: - Flush-inval a block of memory in user space from cache. * @mem: Destination address, in user space. * @len: Number of bytes to invalidate. diff --git a/arch/tile/include/asm/ucontext.h b/arch/tile/include/asm/ucontext.h deleted file mode 100644 index 9bc07b9f30f..00000000000 --- a/arch/tile/include/asm/ucontext.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ucontext.h> diff --git a/arch/tile/include/asm/unaligned.h b/arch/tile/include/asm/unaligned.h index 137e2de5b10..5a58a0d1144 100644 --- a/arch/tile/include/asm/unaligned.h +++ b/arch/tile/include/asm/unaligned.h @@ -15,10 +15,29 @@ #ifndef _ASM_TILE_UNALIGNED_H #define _ASM_TILE_UNALIGNED_H -#include <linux/unaligned/le_struct.h> -#include <linux/unaligned/be_byteshift.h> -#include <linux/unaligned/generic.h> -#define get_unaligned __get_unaligned_le -#define put_unaligned __put_unaligned_le +/* + * We could implement faster get_unaligned_[be/le]64 using the ldna + * instruction on tilegx; however, we need to either copy all of the + * other generic functions to here (which is pretty ugly) or else + * modify both the generic code and other arch code to allow arch + * specific unaligned data access functions. Given these functions + * are not often called, we'll stick with the generic version. + */ +#include <asm-generic/unaligned.h> + +/* + * Is the kernel doing fixups of unaligned accesses? If <0, no kernel + * intervention occurs and SIGBUS is delivered with no data address + * info. If 0, the kernel single-steps the instruction to discover + * the data address to provide with the SIGBUS. If 1, the kernel does + * a fixup. + */ +extern int unaligned_fixup; + +/* Is the kernel printing on each unaligned fixup? */ +extern int unaligned_printk; + +/* Number of unaligned fixups performed */ +extern unsigned int unaligned_fixup_count; #endif /* _ASM_TILE_UNALIGNED_H */ diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h index b35c2db7119..940831fe9e9 100644 --- a/arch/tile/include/asm/unistd.h +++ b/arch/tile/include/asm/unistd.h @@ -11,37 +11,10 @@ * NON INFRINGEMENT. See the GNU General Public License for * more details. */ - -#if !defined(_ASM_TILE_UNISTD_H) || defined(__SYSCALL) -#define _ASM_TILE_UNISTD_H - -#ifndef __LP64__ -/* Use the flavor of this syscall that matches the 32-bit API better. */ -#define __ARCH_WANT_SYNC_FILE_RANGE2 -#endif - -/* Use the standard ABI for syscalls. */ -#include <asm-generic/unistd.h> - -/* Additional Tilera-specific syscalls. */ -#define __NR_flush_cache (__NR_arch_specific_syscall + 1) -__SYSCALL(__NR_flush_cache, sys_flush_cache) - -#ifndef __tilegx__ -/* "Fast" syscalls provide atomic support for 32-bit chips. */ -#define __NR_FAST_cmpxchg -1 -#define __NR_FAST_atomic_update -2 -#define __NR_FAST_cmpxchg64 -3 -#define __NR_cmpxchg_badaddr (__NR_arch_specific_syscall + 0) -__SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr) -#endif - -#ifdef __KERNEL__ /* In compat mode, we use sys_llseek() for compat_sys_llseek(). */ #ifdef CONFIG_COMPAT #define __ARCH_WANT_SYS_LLSEEK #endif #define __ARCH_WANT_SYS_NEWFSTATAT -#endif - -#endif /* _ASM_TILE_UNISTD_H */ +#define __ARCH_WANT_SYS_CLONE +#include <uapi/asm/unistd.h> diff --git a/arch/tile/include/asm/vdso.h b/arch/tile/include/asm/vdso.h new file mode 100644 index 00000000000..9f6a78d665f --- /dev/null +++ b/arch/tile/include/asm/vdso.h @@ -0,0 +1,49 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __TILE_VDSO_H__ +#define __TILE_VDSO_H__ + +#include <linux/types.h> + +/* + * Note about the vdso_data structure: + * + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the + * structure is supposed to be known only to the function in the vdso + * itself and may change without notice. + */ + +struct vdso_data { + __u64 tz_update_count; /* Timezone atomicity ctr */ + __u64 tb_update_count; /* Timebase atomicity ctr */ + __u64 xtime_tod_stamp; /* TOD clock for xtime */ + __u64 xtime_clock_sec; /* Kernel time second */ + __u64 xtime_clock_nsec; /* Kernel time nanosecond */ + __u64 wtom_clock_sec; /* Wall to monotonic clock second */ + __u64 wtom_clock_nsec; /* Wall to monotonic clock nanosecond */ + __u32 mult; /* Cycle to nanosecond multiplier */ + __u32 shift; /* Cycle to nanosecond divisor (power of two) */ + __u32 tz_minuteswest; /* Minutes west of Greenwich */ + __u32 tz_dsttime; /* Type of dst correction */ +}; + +extern struct vdso_data *vdso_data; + +/* __vdso_rt_sigreturn is defined with the addresses in the vdso page. */ +extern void __vdso_rt_sigreturn(void); + +extern int setup_vdso_pages(void); + +#endif /* __TILE_VDSO_H__ */ diff --git a/arch/tile/include/hv/pagesize.h b/arch/tile/include/asm/vga.h index 58bed114fed..7b46e754d61 100644 --- a/arch/tile/include/hv/pagesize.h +++ b/arch/tile/include/asm/vga.h @@ -10,23 +10,30 @@ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or * NON INFRINGEMENT. See the GNU General Public License for * more details. + * + * Access to VGA videoram. */ -/** - * @file pagesize.h - */ +#ifndef _ASM_TILE_VGA_H +#define _ASM_TILE_VGA_H -#ifndef _HV_PAGESIZE_H -#define _HV_PAGESIZE_H +#include <asm/io.h> -/** The log2 of the size of small pages, in bytes. This value should - * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL). - */ -#define HV_LOG2_PAGE_SIZE_SMALL 16 +#define VT_BUF_HAVE_RW -/** The log2 of the size of large pages, in bytes. This value should be - * verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE). - */ -#define HV_LOG2_PAGE_SIZE_LARGE 24 +static inline void scr_writew(u16 val, volatile u16 *addr) +{ + __raw_writew(val, (volatile u16 __iomem *) addr); +} + +static inline u16 scr_readw(volatile const u16 *addr) +{ + return __raw_readw((volatile const u16 __iomem *) addr); +} + +#define vga_readb(a) readb((u8 __iomem *)(a)) +#define vga_writeb(v,a) writeb(v, (u8 __iomem *)(a)) + +#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap(x, s)) -#endif /* _HV_PAGESIZE_H */ +#endif diff --git a/arch/tile/include/asm/xor.h b/arch/tile/include/asm/xor.h deleted file mode 100644 index c82eb12a5b1..00000000000 --- a/arch/tile/include/asm/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/xor.h> diff --git a/arch/tile/include/gxio/common.h b/arch/tile/include/gxio/common.h new file mode 100644 index 00000000000..724595a24d0 --- /dev/null +++ b/arch/tile/include/gxio/common.h @@ -0,0 +1,40 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _GXIO_COMMON_H_ +#define _GXIO_COMMON_H_ + +/* + * Routines shared between the various GXIO device components. + */ + +#include <hv/iorpc.h> + +#include <linux/types.h> +#include <linux/compiler.h> +#include <linux/io.h> + +/* Define the standard gxio MMIO functions using kernel functions. */ +#define __gxio_mmio_read8(addr) readb(addr) +#define __gxio_mmio_read16(addr) readw(addr) +#define __gxio_mmio_read32(addr) readl(addr) +#define __gxio_mmio_read64(addr) readq(addr) +#define __gxio_mmio_write8(addr, val) writeb((val), (addr)) +#define __gxio_mmio_write16(addr, val) writew((val), (addr)) +#define __gxio_mmio_write32(addr, val) writel((val), (addr)) +#define __gxio_mmio_write64(addr, val) writeq((val), (addr)) +#define __gxio_mmio_read(addr) __gxio_mmio_read64(addr) +#define __gxio_mmio_write(addr, val) __gxio_mmio_write64((addr), (val)) + +#endif /* !_GXIO_COMMON_H_ */ diff --git a/arch/tile/include/gxio/dma_queue.h b/arch/tile/include/gxio/dma_queue.h new file mode 100644 index 00000000000..b9e45e37649 --- /dev/null +++ b/arch/tile/include/gxio/dma_queue.h @@ -0,0 +1,161 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _GXIO_DMA_QUEUE_H_ +#define _GXIO_DMA_QUEUE_H_ + +/* + * DMA queue management APIs shared between TRIO and mPIPE. + */ + +#include <gxio/common.h> + +/* The credit counter lives in the high 32 bits. */ +#define DMA_QUEUE_CREDIT_SHIFT 32 + +/* + * State object that tracks a DMA queue's head and tail indices, as + * well as the number of commands posted and completed. The + * structure is accessed via a thread-safe, lock-free algorithm. + */ +typedef struct { + /* + * Address of a MPIPE_EDMA_POST_REGION_VAL_t, + * TRIO_PUSH_DMA_REGION_VAL_t, or TRIO_PULL_DMA_REGION_VAL_t + * register. These register have identical encodings and provide + * information about how many commands have been processed. + */ + void *post_region_addr; + + /* + * A lazily-updated count of how many edescs the hardware has + * completed. + */ + uint64_t hw_complete_count __attribute__ ((aligned(64))); + + /* + * High 32 bits are a count of available egress command credits, + * low 24 bits are the next egress "slot". + */ + int64_t credits_and_next_index; + +} __gxio_dma_queue_t; + +/* Initialize a dma queue. */ +extern void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue, + void *post_region_addr, + unsigned int num_entries); + +/* + * Update the "credits_and_next_index" and "hw_complete_count" fields + * based on pending hardware completions. Note that some other thread + * may have already done this and, importantly, may still be in the + * process of updating "credits_and_next_index". + */ +extern void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue); + +/* Wait for credits to become available. */ +extern int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue, + int64_t modifier); + +/* Reserve slots in the queue, optionally waiting for slots to become + * available, and optionally returning a "completion_slot" suitable for + * direct comparison to "hw_complete_count". + */ +static inline int64_t __gxio_dma_queue_reserve(__gxio_dma_queue_t *dma_queue, + unsigned int num, bool wait, + bool completion) +{ + uint64_t slot; + + /* + * Try to reserve 'num' egress command slots. We do this by + * constructing a constant that subtracts N credits and adds N to + * the index, and using fetchaddgez to only apply it if the credits + * count doesn't go negative. + */ + int64_t modifier = (((int64_t)(-num)) << DMA_QUEUE_CREDIT_SHIFT) | num; + int64_t old = + __insn_fetchaddgez(&dma_queue->credits_and_next_index, + modifier); + + if (unlikely(old + modifier < 0)) { + /* + * We're out of credits. Try once to get more by checking for + * completed egress commands. If that fails, wait or fail. + */ + __gxio_dma_queue_update_credits(dma_queue); + old = __insn_fetchaddgez(&dma_queue->credits_and_next_index, + modifier); + if (old + modifier < 0) { + if (wait) + old = __gxio_dma_queue_wait_for_credits + (dma_queue, modifier); + else + return GXIO_ERR_DMA_CREDITS; + } + } + + /* The bottom 24 bits of old encode the "slot". */ + slot = (old & 0xffffff); + + if (completion) { + /* + * A "completion_slot" is a "slot" which can be compared to + * "hw_complete_count" at any time in the future. To convert + * "slot" into a "completion_slot", we access "hw_complete_count" + * once (knowing that we have reserved a slot, and thus, it will + * be "basically" accurate), and combine its high 40 bits with + * the 24 bit "slot", and handle "wrapping" by adding "1 << 24" + * if the result is LESS than "hw_complete_count". + */ + uint64_t complete; + complete = ACCESS_ONCE(dma_queue->hw_complete_count); + slot |= (complete & 0xffffffffff000000); + if (slot < complete) + slot += 0x1000000; + } + + /* + * If any of our slots mod 256 were equivalent to 0, go ahead and + * collect some egress credits, and update "hw_complete_count", and + * make sure the index doesn't overflow into the credits. + */ + if (unlikely(((old + num) & 0xff) < num)) { + __gxio_dma_queue_update_credits(dma_queue); + + /* Make sure the index doesn't overflow into the credits. */ +#ifdef __BIG_ENDIAN__ + *(((uint8_t *)&dma_queue->credits_and_next_index) + 4) = 0; +#else + *(((uint8_t *)&dma_queue->credits_and_next_index) + 3) = 0; +#endif + } + + return slot; +} + +/* Non-inlinable "__gxio_dma_queue_reserve(..., true)". */ +extern int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue, + unsigned int num, int wait); + +/* Check whether a particular "completion slot" has completed. + * + * Note that this function requires a "completion slot", and thus + * cannot be used with the result of any "reserve_fast" function. + */ +extern int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, + int64_t completion_slot, int update); + +#endif /* !_GXIO_DMA_QUEUE_H_ */ diff --git a/arch/tile/include/gxio/iorpc_globals.h b/arch/tile/include/gxio/iorpc_globals.h new file mode 100644 index 00000000000..52c721f8dad --- /dev/null +++ b/arch/tile/include/gxio/iorpc_globals.h @@ -0,0 +1,38 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __IORPC_LINUX_RPC_H__ +#define __IORPC_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + +#define IORPC_OP_ARM_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9000) +#define IORPC_OP_CLOSE_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9001) +#define IORPC_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define IORPC_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + +int __iorpc_arm_pollfd(int fd, int pollfd_cookie); + +int __iorpc_close_pollfd(int fd, int pollfd_cookie); + +int __iorpc_get_mmio_base(int fd, HV_PTE *base); + +int __iorpc_check_mmio_offset(int fd, unsigned long offset, unsigned long size); + +#endif /* !__IORPC_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_mpipe.h b/arch/tile/include/gxio/iorpc_mpipe.h new file mode 100644 index 00000000000..4cda03de734 --- /dev/null +++ b/arch/tile/include/gxio/iorpc_mpipe.h @@ -0,0 +1,144 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __GXIO_MPIPE_LINUX_RPC_H__ +#define __GXIO_MPIPE_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <hv/drv_mpipe_intf.h> +#include <asm/page.h> +#include <gxio/kiorpc.h> +#include <gxio/mpipe.h> +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + +#define GXIO_MPIPE_OP_ALLOC_BUFFER_STACKS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1200) +#define GXIO_MPIPE_OP_INIT_BUFFER_STACK_AUX IORPC_OPCODE(IORPC_FORMAT_KERNEL_MEM, 0x1201) + +#define GXIO_MPIPE_OP_ALLOC_NOTIF_RINGS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1203) +#define GXIO_MPIPE_OP_INIT_NOTIF_RING_AUX IORPC_OPCODE(IORPC_FORMAT_KERNEL_MEM, 0x1204) +#define GXIO_MPIPE_OP_REQUEST_NOTIF_RING_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1205) +#define GXIO_MPIPE_OP_ENABLE_NOTIF_RING_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1206) +#define GXIO_MPIPE_OP_ALLOC_NOTIF_GROUPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1207) +#define GXIO_MPIPE_OP_INIT_NOTIF_GROUP IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1208) +#define GXIO_MPIPE_OP_ALLOC_BUCKETS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1209) +#define GXIO_MPIPE_OP_INIT_BUCKET IORPC_OPCODE(IORPC_FORMAT_NONE, 0x120a) +#define GXIO_MPIPE_OP_ALLOC_EDMA_RINGS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x120b) +#define GXIO_MPIPE_OP_INIT_EDMA_RING_AUX IORPC_OPCODE(IORPC_FORMAT_KERNEL_MEM, 0x120c) + +#define GXIO_MPIPE_OP_COMMIT_RULES IORPC_OPCODE(IORPC_FORMAT_NONE, 0x120f) +#define GXIO_MPIPE_OP_REGISTER_CLIENT_MEMORY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1210) +#define GXIO_MPIPE_OP_LINK_OPEN_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1211) +#define GXIO_MPIPE_OP_LINK_CLOSE_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1212) +#define GXIO_MPIPE_OP_LINK_SET_ATTR_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1213) + +#define GXIO_MPIPE_OP_GET_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x121e) +#define GXIO_MPIPE_OP_SET_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x121f) +#define GXIO_MPIPE_OP_ADJUST_TIMESTAMP_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1220) +#define GXIO_MPIPE_OP_CONFIG_EDMA_RING_BLKS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1221) +#define GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1222) +#define GXIO_MPIPE_OP_ARM_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9000) +#define GXIO_MPIPE_OP_CLOSE_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9001) +#define GXIO_MPIPE_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define GXIO_MPIPE_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context, + void *mem_va, size_t mem_size, + unsigned int mem_flags, unsigned int stack, + unsigned int buffer_size_enum); + + +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va, + size_t mem_size, unsigned int mem_flags, + unsigned int ring); + +int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context, + int inter_x, int inter_y, + int inter_ipi, int inter_event, + unsigned int ring); + +int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context, + unsigned int ring); + +int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, + unsigned int group, + gxio_mpipe_notif_group_bits_t bits); + +int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count, + unsigned int first, unsigned int flags); + +int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket, + MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info); + +int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va, + size_t mem_size, unsigned int mem_flags, + unsigned int ring, unsigned int channel); + + +int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob, + size_t blob_size); + +int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, + unsigned int iotlb, HV_PTE pte, + unsigned int flags); + +int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context, + _gxio_mpipe_link_name_t name, unsigned int flags); + +int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac); + +int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac, + uint32_t attr, int64_t val); + +int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec, + uint64_t *nsec, uint64_t *cycles); + +int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec, + uint64_t nsec, uint64_t cycles); + +int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, + int64_t nsec); + +int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, + int32_t ppb); + +int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); + +int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie); + +int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base); + +int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context, + unsigned long offset, unsigned long size); + +#endif /* !__GXIO_MPIPE_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_mpipe_info.h b/arch/tile/include/gxio/iorpc_mpipe_info.h new file mode 100644 index 00000000000..f0b04284468 --- /dev/null +++ b/arch/tile/include/gxio/iorpc_mpipe_info.h @@ -0,0 +1,50 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __GXIO_MPIPE_INFO_LINUX_RPC_H__ +#define __GXIO_MPIPE_INFO_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <hv/drv_mpipe_intf.h> +#include <asm/page.h> +#include <gxio/kiorpc.h> +#include <gxio/mpipe.h> +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + + +#define GXIO_MPIPE_INFO_OP_INSTANCE_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1250) +#define GXIO_MPIPE_INFO_OP_ENUMERATE_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1251) +#define GXIO_MPIPE_INFO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define GXIO_MPIPE_INFO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + + +int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context, + _gxio_mpipe_link_name_t name); + +int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context, + unsigned int idx, + _gxio_mpipe_link_name_t *name, + _gxio_mpipe_link_mac_t *mac); + +int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context, + HV_PTE *base); + +int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context, + unsigned long offset, unsigned long size); + +#endif /* !__GXIO_MPIPE_INFO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h new file mode 100644 index 00000000000..376a4f77116 --- /dev/null +++ b/arch/tile/include/gxio/iorpc_trio.h @@ -0,0 +1,104 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __GXIO_TRIO_LINUX_RPC_H__ +#define __GXIO_TRIO_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <hv/drv_trio_intf.h> +#include <gxio/trio.h> +#include <gxio/kiorpc.h> +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + +#define GXIO_TRIO_OP_DEALLOC_ASID IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400) +#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1401) + +#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1404) + +#define GXIO_TRIO_OP_ALLOC_SCATTER_QUEUES IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140e) +#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1412) + +#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1414) + +#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e) +#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141f) +#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1420) +#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1421) + +#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1423) +#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1424) +#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1425) +#define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + +int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count, + unsigned int first, unsigned int flags); + + +int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + + +int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context, + unsigned int pio_region, unsigned int mac, + uint32_t bus_address_hi, unsigned int flags); + + +int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context, + unsigned int map, unsigned long va, + uint64_t size, unsigned int asid, + unsigned int mac, uint64_t bus_address, + unsigned int node, + unsigned int order_mode); + +int gxio_trio_get_port_property(gxio_trio_context_t *context, + struct pcie_trio_ports_property *trio_ports); + +int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event, + unsigned int mac, unsigned int intx); + +int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event, + unsigned int mac, unsigned int mem_map, + uint64_t mem_map_base, uint64_t mem_map_limit, + unsigned int asid); + + +int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps, + uint16_t mrs, unsigned int mac); + +int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac); + +int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac); + +int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base); + +int gxio_trio_check_mmio_offset(gxio_trio_context_t *context, + unsigned long offset, unsigned long size); + +#endif /* !__GXIO_TRIO_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_uart.h b/arch/tile/include/gxio/iorpc_uart.h new file mode 100644 index 00000000000..55429d48ea5 --- /dev/null +++ b/arch/tile/include/gxio/iorpc_uart.h @@ -0,0 +1,40 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __GXIO_UART_LINUX_RPC_H__ +#define __GXIO_UART_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <hv/drv_uart_intf.h> +#include <gxio/uart.h> +#include <gxio/kiorpc.h> +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + +#define GXIO_UART_OP_CFG_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1900) +#define GXIO_UART_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define GXIO_UART_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + +int gxio_uart_cfg_interrupt(gxio_uart_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event); + +int gxio_uart_get_mmio_base(gxio_uart_context_t *context, HV_PTE *base); + +int gxio_uart_check_mmio_offset(gxio_uart_context_t *context, + unsigned long offset, unsigned long size); + +#endif /* !__GXIO_UART_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/iorpc_usb_host.h b/arch/tile/include/gxio/iorpc_usb_host.h new file mode 100644 index 00000000000..79962a97de8 --- /dev/null +++ b/arch/tile/include/gxio/iorpc_usb_host.h @@ -0,0 +1,46 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __GXIO_USB_HOST_LINUX_RPC_H__ +#define __GXIO_USB_HOST_LINUX_RPC_H__ + +#include <hv/iorpc.h> + +#include <hv/drv_usb_host_intf.h> +#include <asm/page.h> +#include <gxio/kiorpc.h> +#include <gxio/usb_host.h> +#include <linux/string.h> +#include <linux/module.h> +#include <asm/pgtable.h> + +#define GXIO_USB_HOST_OP_CFG_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1800) +#define GXIO_USB_HOST_OP_REGISTER_CLIENT_MEMORY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1801) +#define GXIO_USB_HOST_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define GXIO_USB_HOST_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + +int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x, + int inter_y, int inter_ipi, int inter_event); + +int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context, + HV_PTE pte, unsigned int flags); + +int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, + HV_PTE *base); + +int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context, + unsigned long offset, unsigned long size); + +#endif /* !__GXIO_USB_HOST_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/kiorpc.h b/arch/tile/include/gxio/kiorpc.h new file mode 100644 index 00000000000..ee5820979ff --- /dev/null +++ b/arch/tile/include/gxio/kiorpc.h @@ -0,0 +1,29 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Support routines for kernel IORPC drivers. + */ + +#ifndef _GXIO_KIORPC_H +#define _GXIO_KIORPC_H + +#include <linux/types.h> +#include <asm/page.h> +#include <arch/chip.h> + +#if CHIP_HAS_MMIO() +void __iomem *iorpc_ioremap(int hv_fd, resource_size_t offset, + unsigned long size); +#endif + +#endif /* _GXIO_KIORPC_H */ diff --git a/arch/tile/include/gxio/mpipe.h b/arch/tile/include/gxio/mpipe.h new file mode 100644 index 00000000000..e37cf4f0cff --- /dev/null +++ b/arch/tile/include/gxio/mpipe.h @@ -0,0 +1,1871 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _GXIO_MPIPE_H_ +#define _GXIO_MPIPE_H_ + +/* + * + * An API for allocating, configuring, and manipulating mPIPE hardware + * resources. + */ + +#include <gxio/common.h> +#include <gxio/dma_queue.h> + +#include <linux/time.h> + +#include <arch/mpipe_def.h> +#include <arch/mpipe_shm.h> + +#include <hv/drv_mpipe_intf.h> +#include <hv/iorpc.h> + +/* + * + * The TILE-Gx mPIPE&tm; shim provides Ethernet connectivity, packet + * classification, and packet load balancing services. The + * gxio_mpipe_ API, declared in <gxio/mpipe.h>, allows applications to + * allocate mPIPE IO channels, configure packet distribution + * parameters, and send and receive Ethernet packets. The API is + * designed to be a minimal wrapper around the mPIPE hardware, making + * system calls only where necessary to preserve inter-process + * protection guarantees. + * + * The APIs described below allow the programmer to allocate and + * configure mPIPE resources. As described below, the mPIPE is a + * single shared hardware device that provides partitionable resources + * that are shared between all applications in the system. The + * gxio_mpipe_ API allows userspace code to make resource request + * calls to the hypervisor, which in turns keeps track of the + * resources in use by all applications, maintains protection + * guarantees, and resets resources upon application shutdown. + * + * We strongly recommend reading the mPIPE section of the IO Device + * Guide (UG404) before working with this API. Most functions in the + * gxio_mpipe_ API are directly analogous to hardware interfaces and + * the documentation assumes that the reader understands those + * hardware interfaces. + * + * @section mpipe__ingress mPIPE Ingress Hardware Resources + * + * The mPIPE ingress hardware provides extensive hardware offload for + * tasks like packet header parsing, load balancing, and memory + * management. This section provides a brief introduction to the + * hardware components and the gxio_mpipe_ calls used to manage them; + * see the IO Device Guide for a much more detailed description of the + * mPIPE's capabilities. + * + * When a packet arrives at one of the mPIPE's Ethernet MACs, it is + * assigned a channel number indicating which MAC received it. It + * then proceeds through the following hardware pipeline: + * + * @subsection mpipe__classification Classification + * + * A set of classification processors run header parsing code on each + * incoming packet, extracting information including the destination + * MAC address, VLAN, Ethernet type, and five-tuple hash. Some of + * this information is then used to choose which buffer stack will be + * used to hold the packet, and which bucket will be used by the load + * balancer to determine which application will receive the packet. + * + * The rules by which the buffer stack and bucket are chosen can be + * configured via the @ref gxio_mpipe_classifier API. A given app can + * specify multiple rules, each one specifying a bucket range, and a + * set of buffer stacks, to be used for packets matching the rule. + * Each rule can optionally specify a restricted set of channels, + * VLANs, and/or dMACs, in which it is interested. By default, a + * given rule starts out matching all channels associated with the + * mPIPE context's set of open links; all VLANs; and all dMACs. + * Subsequent restrictions can then be added. + * + * @subsection mpipe__load_balancing Load Balancing + * + * The mPIPE load balancer is responsible for choosing the NotifRing + * to which the packet will be delivered. This decision is based on + * the bucket number indicated by the classification program. In + * general, the bucket number is based on some number of low bits of + * the packet's flow hash (applications that aren't interested in flow + * hashing use a single bucket). Each load balancer bucket keeps a + * record of the NotifRing to which packets directed to that bucket + * are currently being delivered. Based on the bucket's load + * balancing mode (@ref gxio_mpipe_bucket_mode_t), the load balancer + * either forwards the packet to the previously assigned NotifRing or + * decides to choose a new NotifRing. If a new NotifRing is required, + * the load balancer chooses the least loaded ring in the NotifGroup + * associated with the bucket. + * + * The load balancer is a shared resource. Each application needs to + * explicitly allocate NotifRings, NotifGroups, and buckets, using + * gxio_mpipe_alloc_notif_rings(), gxio_mpipe_alloc_notif_groups(), + * and gxio_mpipe_alloc_buckets(). Then the application needs to + * configure them using gxio_mpipe_init_notif_ring() and + * gxio_mpipe_init_notif_group_and_buckets(). + * + * @subsection mpipe__buffers Buffer Selection and Packet Delivery + * + * Once the load balancer has chosen the destination NotifRing, the + * mPIPE DMA engine pops at least one buffer off of the 'buffer stack' + * chosen by the classification program and DMAs the packet data into + * that buffer. Each buffer stack provides a hardware-accelerated + * stack of data buffers with the same size. If the packet data is + * larger than the buffers provided by the chosen buffer stack, the + * mPIPE hardware pops off multiple buffers and chains the packet data + * through a multi-buffer linked list. Once the packet data is + * delivered to the buffer(s), the mPIPE hardware writes the + * ::gxio_mpipe_idesc_t metadata object (calculated by the classifier) + * into the NotifRing and increments the number of packets delivered + * to that ring. + * + * Applications can push buffers onto a buffer stack by calling + * gxio_mpipe_push_buffer() or by egressing a packet with the + * ::gxio_mpipe_edesc_t::hwb bit set, indicating that the egressed + * buffers should be returned to the stack. + * + * Applications can allocate and initialize buffer stacks with the + * gxio_mpipe_alloc_buffer_stacks() and gxio_mpipe_init_buffer_stack() + * APIs. + * + * The application must also register the memory pages that will hold + * packets. This requires calling gxio_mpipe_register_page() for each + * memory page that will hold packets allocated by the application for + * a given buffer stack. Since each buffer stack is limited to 16 + * registered pages, it may be necessary to use huge pages, or even + * extremely huge pages, to hold all the buffers. + * + * @subsection mpipe__iqueue NotifRings + * + * Each NotifRing is a region of shared memory, allocated by the + * application, to which the mPIPE delivers packet descriptors + * (::gxio_mpipe_idesc_t). The application can allocate them via + * gxio_mpipe_alloc_notif_rings(). The application can then either + * explicitly initialize them with gxio_mpipe_init_notif_ring() and + * then read from them manually, or can make use of the convenience + * wrappers provided by @ref gxio_mpipe_wrappers. + * + * @section mpipe__egress mPIPE Egress Hardware + * + * Applications use eDMA rings to queue packets for egress. The + * application can allocate them via gxio_mpipe_alloc_edma_rings(). + * The application can then either explicitly initialize them with + * gxio_mpipe_init_edma_ring() and then write to them manually, or + * can make use of the convenience wrappers provided by + * @ref gxio_mpipe_wrappers. + * + * @section gxio__shortcomings Plans for Future API Revisions + * + * The API defined here is only an initial version of the mPIPE API. + * Future plans include: + * + * - Higher level wrapper functions to provide common initialization + * patterns. This should help users start writing mPIPE programs + * without having to learn the details of the hardware. + * + * - Support for reset and deallocation of resources, including + * cleanup upon application shutdown. + * + * - Support for calling these APIs in the BME. + * + * - Support for IO interrupts. + * + * - Clearer definitions of thread safety guarantees. + * + * @section gxio__mpipe_examples Examples + * + * See the following mPIPE example programs for more information about + * allocating mPIPE resources and using them in real applications: + * + * - @ref mpipe/ingress/app.c : Receiving packets. + * + * - @ref mpipe/forward/app.c : Forwarding packets. + * + * Note that there are several more examples. + */ + +/* Flags that can be passed to resource allocation functions. */ +enum gxio_mpipe_alloc_flags_e { + /* Require an allocation to start at a specified resource index. */ + GXIO_MPIPE_ALLOC_FIXED = HV_MPIPE_ALLOC_FIXED, +}; + +/* Flags that can be passed to memory registration functions. */ +enum gxio_mpipe_mem_flags_e { + /* Do not fill L3 when writing, and invalidate lines upon egress. */ + GXIO_MPIPE_MEM_FLAG_NT_HINT = IORPC_MEM_BUFFER_FLAG_NT_HINT, + + /* L3 cache fills should only populate IO cache ways. */ + GXIO_MPIPE_MEM_FLAG_IO_PIN = IORPC_MEM_BUFFER_FLAG_IO_PIN, +}; + +/* An ingress packet descriptor. When a packet arrives, the mPIPE + * hardware generates this structure and writes it into a NotifRing. + */ +typedef MPIPE_PDESC_t gxio_mpipe_idesc_t; + +/* An egress command descriptor. Applications write this structure + * into eDMA rings and the hardware performs the indicated operation + * (normally involving egressing some bytes). Note that egressing a + * single packet may involve multiple egress command descriptors. + */ +typedef MPIPE_EDMA_DESC_t gxio_mpipe_edesc_t; + +/* + * Max # of mpipe instances. 2 currently. + */ +#define GXIO_MPIPE_INSTANCE_MAX HV_MPIPE_INSTANCE_MAX + +#define NR_MPIPE_MAX GXIO_MPIPE_INSTANCE_MAX + +/* Get the "va" field from an "idesc". + * + * This is the address at which the ingress hardware copied the first + * byte of the packet. + * + * If the classifier detected a custom header, then this will point to + * the custom header, and gxio_mpipe_idesc_get_l2_start() will point + * to the actual L2 header. + * + * Note that this value may be misleading if "idesc->be" is set. + * + * @param idesc An ingress packet descriptor. + */ +static inline unsigned char *gxio_mpipe_idesc_get_va(gxio_mpipe_idesc_t *idesc) +{ + return (unsigned char *)(long)idesc->va; +} + +/* Get the "xfer_size" from an "idesc". + * + * This is the actual number of packet bytes transferred into memory + * by the hardware. + * + * Note that this value may be misleading if "idesc->be" is set. + * + * @param idesc An ingress packet descriptor. + * + * ISSUE: Is this the best name for this? + * FIXME: Add more docs about chaining, clipping, etc. + */ +static inline unsigned int gxio_mpipe_idesc_get_xfer_size(gxio_mpipe_idesc_t + *idesc) +{ + return idesc->l2_size; +} + +/* Get the "l2_offset" from an "idesc". + * + * Extremely customized classifiers might not support this function. + * + * This is the number of bytes between the "va" and the L2 header. + * + * The L2 header consists of a destination mac address, a source mac + * address, and an initial ethertype. Various initial ethertypes + * allow encoding extra information in the L2 header, often including + * a vlan, and/or a new ethertype. + * + * Note that the "l2_offset" will be non-zero if (and only if) the + * classifier processed a custom header for the packet. + * + * @param idesc An ingress packet descriptor. + */ +static inline uint8_t gxio_mpipe_idesc_get_l2_offset(gxio_mpipe_idesc_t *idesc) +{ + return (idesc->custom1 >> 32) & 0xFF; +} + +/* Get the "l2_start" from an "idesc". + * + * This is simply gxio_mpipe_idesc_get_va() plus + * gxio_mpipe_idesc_get_l2_offset(). + * + * @param idesc An ingress packet descriptor. + */ +static inline unsigned char *gxio_mpipe_idesc_get_l2_start(gxio_mpipe_idesc_t + *idesc) +{ + unsigned char *va = gxio_mpipe_idesc_get_va(idesc); + return va + gxio_mpipe_idesc_get_l2_offset(idesc); +} + +/* Get the "l2_length" from an "idesc". + * + * This is simply gxio_mpipe_idesc_get_xfer_size() minus + * gxio_mpipe_idesc_get_l2_offset(). + * + * @param idesc An ingress packet descriptor. + */ +static inline unsigned int gxio_mpipe_idesc_get_l2_length(gxio_mpipe_idesc_t + *idesc) +{ + unsigned int xfer_size = idesc->l2_size; + return xfer_size - gxio_mpipe_idesc_get_l2_offset(idesc); +} + +/* A context object used to manage mPIPE hardware resources. */ +typedef struct { + + /* File descriptor for calling up to Linux (and thus the HV). */ + int fd; + + /* Corresponding mpipe instance #. */ + int instance; + + /* The VA at which configuration registers are mapped. */ + char *mmio_cfg_base; + + /* The VA at which IDMA, EDMA, and buffer manager are mapped. */ + char *mmio_fast_base; + + /* The "initialized" buffer stacks. */ + gxio_mpipe_rules_stacks_t __stacks; + +} gxio_mpipe_context_t; + +/* This is only used internally, but it's most easily made visible here. */ +typedef gxio_mpipe_context_t gxio_mpipe_info_context_t; + +/* Initialize an mPIPE context. + * + * This function allocates an mPIPE "service domain" and maps the MMIO + * registers into the caller's VA space. + * + * @param context Context object to be initialized. + * @param mpipe_instance Instance number of mPIPE shim to be controlled via + * context. + */ +extern int gxio_mpipe_init(gxio_mpipe_context_t *context, + unsigned int mpipe_instance); + +/* Destroy an mPIPE context. + * + * This function frees the mPIPE "service domain" and unmaps the MMIO + * registers from the caller's VA space. + * + * If a user process exits without calling this routine, the kernel + * will destroy the mPIPE context as part of process teardown. + * + * @param context Context object to be destroyed. + */ +extern int gxio_mpipe_destroy(gxio_mpipe_context_t *context); + +/***************************************************************** + * Buffer Stacks * + ******************************************************************/ + +/* Allocate a set of buffer stacks. + * + * The return value is NOT interesting if count is zero. + * + * @param context An initialized mPIPE context. + * @param count Number of stacks required. + * @param first Index of first stack if ::GXIO_MPIPE_ALLOC_FIXED flag is set, + * otherwise ignored. + * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e. + * @return Index of first allocated buffer stack, or + * ::GXIO_MPIPE_ERR_NO_BUFFER_STACK if allocation failed. + */ +extern int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context, + unsigned int count, + unsigned int first, + unsigned int flags); + +/* Enum codes for buffer sizes supported by mPIPE. */ +typedef enum { + /* 128 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_128 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_128, + /* 256 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_256 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_256, + /* 512 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_512 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_512, + /* 1024 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_1024 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1024, + /* 1664 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_1664 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_1664, + /* 4096 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_4096 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_4096, + /* 10368 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_10368 = + MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_10368, + /* 16384 byte packet data buffer. */ + GXIO_MPIPE_BUFFER_SIZE_16384 = MPIPE_BSM_INIT_DAT_1__SIZE_VAL_BSZ_16384 +} gxio_mpipe_buffer_size_enum_t; + +/* Convert a buffer size in bytes into a buffer size enum. */ +extern gxio_mpipe_buffer_size_enum_t +gxio_mpipe_buffer_size_to_buffer_size_enum(size_t size); + +/* Convert a buffer size enum into a buffer size in bytes. */ +extern size_t +gxio_mpipe_buffer_size_enum_to_buffer_size(gxio_mpipe_buffer_size_enum_t + buffer_size_enum); + +/* Calculate the number of bytes required to store a given number of + * buffers in the memory registered with a buffer stack via + * gxio_mpipe_init_buffer_stack(). + */ +extern size_t gxio_mpipe_calc_buffer_stack_bytes(unsigned long buffers); + +/* Initialize a buffer stack. This function binds a region of memory + * to be used by the hardware for storing buffer addresses pushed via + * gxio_mpipe_push_buffer() or as the result of sending a buffer out + * the egress with the 'push to stack when done' bit set. Once this + * function returns, the memory region's contents may be arbitrarily + * modified by the hardware at any time and software should not access + * the memory region again. + * + * @param context An initialized mPIPE context. + * @param stack The buffer stack index. + * @param buffer_size_enum The size of each buffer in the buffer stack, + * as an enum. + * @param mem The address of the buffer stack. This memory must be + * physically contiguous and aligned to a 64kB boundary. + * @param mem_size The size of the buffer stack, in bytes. + * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags. + * @return Zero on success, ::GXIO_MPIPE_ERR_INVAL_BUFFER_SIZE if + * buffer_size_enum is invalid, ::GXIO_MPIPE_ERR_BAD_BUFFER_STACK if + * stack has not been allocated. + */ +extern int gxio_mpipe_init_buffer_stack(gxio_mpipe_context_t *context, + unsigned int stack, + gxio_mpipe_buffer_size_enum_t + buffer_size_enum, void *mem, + size_t mem_size, + unsigned int mem_flags); + +/* Push a buffer onto a previously initialized buffer stack. + * + * The size of the buffer being pushed must match the size that was + * registered with gxio_mpipe_init_buffer_stack(). All packet buffer + * addresses are 128-byte aligned; the low 7 bits of the specified + * buffer address will be ignored. + * + * @param context An initialized mPIPE context. + * @param stack The buffer stack index. + * @param buffer The buffer (the low seven bits are ignored). + */ +static inline void gxio_mpipe_push_buffer(gxio_mpipe_context_t *context, + unsigned int stack, void *buffer) +{ + MPIPE_BSM_REGION_ADDR_t offset = { {0} }; + MPIPE_BSM_REGION_VAL_t val = { {0} }; + + /* + * The mmio_fast_base region starts at the IDMA region, so subtract + * off that initial offset. + */ + offset.region = + MPIPE_MMIO_ADDR__REGION_VAL_BSM - + MPIPE_MMIO_ADDR__REGION_VAL_IDMA; + offset.stack = stack; + +#if __SIZEOF_POINTER__ == 4 + val.va = ((ulong) buffer) >> MPIPE_BSM_REGION_VAL__VA_SHIFT; +#else + val.va = ((long)buffer) >> MPIPE_BSM_REGION_VAL__VA_SHIFT; +#endif + + __gxio_mmio_write(context->mmio_fast_base + offset.word, val.word); +} + +/* Pop a buffer off of a previously initialized buffer stack. + * + * @param context An initialized mPIPE context. + * @param stack The buffer stack index. + * @return The buffer, or NULL if the stack is empty. + */ +static inline void *gxio_mpipe_pop_buffer(gxio_mpipe_context_t *context, + unsigned int stack) +{ + MPIPE_BSM_REGION_ADDR_t offset = { {0} }; + + /* + * The mmio_fast_base region starts at the IDMA region, so subtract + * off that initial offset. + */ + offset.region = + MPIPE_MMIO_ADDR__REGION_VAL_BSM - + MPIPE_MMIO_ADDR__REGION_VAL_IDMA; + offset.stack = stack; + + while (1) { + /* + * Case 1: val.c == ..._UNCHAINED, va is non-zero. + * Case 2: val.c == ..._INVALID, va is zero. + * Case 3: val.c == ..._NOT_RDY, va is zero. + */ + MPIPE_BSM_REGION_VAL_t val; + val.word = + __gxio_mmio_read(context->mmio_fast_base + + offset.word); + + /* + * Handle case 1 and 2 by returning the buffer (or NULL). + * Handle case 3 by waiting for the prefetch buffer to refill. + */ + if (val.c != MPIPE_EDMA_DESC_WORD1__C_VAL_NOT_RDY) + return (void *)((unsigned long)val. + va << MPIPE_BSM_REGION_VAL__VA_SHIFT); + } +} + +/***************************************************************** + * NotifRings * + ******************************************************************/ + +/* Allocate a set of NotifRings. + * + * The return value is NOT interesting if count is zero. + * + * Note that NotifRings are allocated in chunks, so allocating one at + * a time is much less efficient than allocating several at once. + * + * @param context An initialized mPIPE context. + * @param count Number of NotifRings required. + * @param first Index of first NotifRing if ::GXIO_MPIPE_ALLOC_FIXED flag + * is set, otherwise ignored. + * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e. + * @return Index of first allocated buffer NotifRing, or + * ::GXIO_MPIPE_ERR_NO_NOTIF_RING if allocation failed. + */ +extern int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +/* Initialize a NotifRing, using the given memory and size. + * + * @param context An initialized mPIPE context. + * @param ring The NotifRing index. + * @param mem A physically contiguous region of memory to be filled + * with a ring of ::gxio_mpipe_idesc_t structures. + * @param mem_size Number of bytes in the ring. Must be 128, 512, + * 2048, or 65536 * sizeof(gxio_mpipe_idesc_t). + * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags. + * + * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_NOTIF_RING or + * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure. + */ +extern int gxio_mpipe_init_notif_ring(gxio_mpipe_context_t *context, + unsigned int ring, + void *mem, size_t mem_size, + unsigned int mem_flags); + +/* Configure an interrupt to be sent to a tile on incoming NotifRing + * traffic. Once an interrupt is sent for a particular ring, no more + * will be sent until gxio_mica_enable_notif_ring_interrupt() is called. + * + * @param context An initialized mPIPE context. + * @param x X coordinate of interrupt target tile. + * @param y Y coordinate of interrupt target tile. + * @param i Index of the IPI register which will receive the interrupt. + * @param e Specific event which will be set in the target IPI register when + * the interrupt occurs. + * @param ring The NotifRing index. + * @return Zero on success, GXIO_ERR_INVAL if params are out of range. + */ +extern int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t + *context, int x, int y, + int i, int e, + unsigned int ring); + +/* Enable an interrupt on incoming NotifRing traffic. + * + * @param context An initialized mPIPE context. + * @param ring The NotifRing index. + * @return Zero on success, GXIO_ERR_INVAL if params are out of range. + */ +extern int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t + *context, unsigned int ring); + +/* Map all of a client's memory via the given IOTLB. + * @param context An initialized mPIPE context. + * @param iotlb IOTLB index. + * @param pte Page table entry. + * @param flags Flags. + * @return Zero on success, or a negative error code. + */ +extern int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context, + unsigned int iotlb, HV_PTE pte, + unsigned int flags); + +/***************************************************************** + * Notif Groups * + ******************************************************************/ + +/* Allocate a set of NotifGroups. + * + * The return value is NOT interesting if count is zero. + * + * @param context An initialized mPIPE context. + * @param count Number of NotifGroups required. + * @param first Index of first NotifGroup if ::GXIO_MPIPE_ALLOC_FIXED flag + * is set, otherwise ignored. + * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e. + * @return Index of first allocated buffer NotifGroup, or + * ::GXIO_MPIPE_ERR_NO_NOTIF_GROUP if allocation failed. + */ +extern int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context, + unsigned int count, + unsigned int first, + unsigned int flags); + +/* Add a NotifRing to a NotifGroup. This only sets a bit in the + * application's 'group' object; the hardware NotifGroup can be + * initialized by passing 'group' to gxio_mpipe_init_notif_group() or + * gxio_mpipe_init_notif_group_and_buckets(). + */ +static inline void +gxio_mpipe_notif_group_add_ring(gxio_mpipe_notif_group_bits_t *bits, int ring) +{ + bits->ring_mask[ring / 64] |= (1ull << (ring % 64)); +} + +/* Set a particular NotifGroup bitmask. Since the load balancer + * makes decisions based on both bucket and NotifGroup state, most + * applications should use gxio_mpipe_init_notif_group_and_buckets() + * rather than using this function to configure just a NotifGroup. + */ +extern int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context, + unsigned int group, + gxio_mpipe_notif_group_bits_t bits); + +/***************************************************************** + * Load Balancer * + ******************************************************************/ + +/* Allocate a set of load balancer buckets. + * + * The return value is NOT interesting if count is zero. + * + * Note that buckets are allocated in chunks, so allocating one at + * a time is much less efficient than allocating several at once. + * + * Note that the buckets are actually divided into two sub-ranges, of + * different sizes, and different chunk sizes, and the range you get + * by default is determined by the size of the request. Allocations + * cannot span the two sub-ranges. + * + * @param context An initialized mPIPE context. + * @param count Number of buckets required. + * @param first Index of first bucket if ::GXIO_MPIPE_ALLOC_FIXED flag is set, + * otherwise ignored. + * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e. + * @return Index of first allocated buffer bucket, or + * ::GXIO_MPIPE_ERR_NO_BUCKET if allocation failed. + */ +extern int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +/* The legal modes for gxio_mpipe_bucket_info_t and + * gxio_mpipe_init_notif_group_and_buckets(). + * + * All modes except ::GXIO_MPIPE_BUCKET_ROUND_ROBIN expect that the user + * will allocate a power-of-two number of buckets and initialize them + * to the same mode. The classifier program then uses the appropriate + * number of low bits from the incoming packet's flow hash to choose a + * load balancer bucket. Based on that bucket's load balancing mode, + * reference count, and currently active NotifRing, the load balancer + * chooses the NotifRing to which the packet will be delivered. + */ +typedef enum { + /* All packets for a bucket go to the same NotifRing unless the + * NotifRing gets full, in which case packets will be dropped. If + * the bucket reference count ever reaches zero, a new NotifRing may + * be chosen. + */ + GXIO_MPIPE_BUCKET_DYNAMIC_FLOW_AFFINITY = + MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_DFA, + + /* All packets for a bucket always go to the same NotifRing. + */ + GXIO_MPIPE_BUCKET_STATIC_FLOW_AFFINITY = + MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_FIXED, + + /* All packets for a bucket go to the least full NotifRing in the + * group, providing load balancing round robin behavior. + */ + GXIO_MPIPE_BUCKET_ROUND_ROBIN = + MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_ALWAYS_PICK, + + /* All packets for a bucket go to the same NotifRing unless the + * NotifRing gets full, at which point the bucket starts using the + * least full NotifRing in the group. If all NotifRings in the + * group are full, packets will be dropped. + */ + GXIO_MPIPE_BUCKET_STICKY_FLOW_LOCALITY = + MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY, + + /* All packets for a bucket go to the same NotifRing unless the + * NotifRing gets full, or a random timer fires, at which point the + * bucket starts using the least full NotifRing in the group. If + * all NotifRings in the group are full, packets will be dropped. + * WARNING: This mode is BROKEN on chips with fewer than 64 tiles. + */ + GXIO_MPIPE_BUCKET_PREFER_FLOW_LOCALITY = + MPIPE_LBL_INIT_DAT_BSTS_TBL__MODE_VAL_STICKY_RAND, + +} gxio_mpipe_bucket_mode_t; + +/* Copy a set of bucket initialization values into the mPIPE + * hardware. Since the load balancer makes decisions based on both + * bucket and NotifGroup state, most applications should use + * gxio_mpipe_init_notif_group_and_buckets() rather than using this + * function to configure a single bucket. + * + * @param context An initialized mPIPE context. + * @param bucket Bucket index to be initialized. + * @param bucket_info Initial reference count, NotifRing index, and mode. + * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_BUCKET on failure. + */ +extern int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, + unsigned int bucket, + gxio_mpipe_bucket_info_t bucket_info); + +/* Initializes a group and range of buckets and range of rings such + * that the load balancer runs a particular load balancing function. + * + * First, the group is initialized with the given rings. + * + * Second, each bucket is initialized with the mode and group, and a + * ring chosen round-robin from the given rings. + * + * Normally, the classifier picks a bucket, and then the load balancer + * picks a ring, based on the bucket's mode, group, and current ring, + * possibly updating the bucket's ring. + * + * @param context An initialized mPIPE context. + * @param group The group. + * @param ring The first ring. + * @param num_rings The number of rings. + * @param bucket The first bucket. + * @param num_buckets The number of buckets. + * @param mode The load balancing mode. + * + * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_BUCKET, + * ::GXIO_MPIPE_ERR_BAD_NOTIF_GROUP, or + * ::GXIO_MPIPE_ERR_BAD_NOTIF_RING on failure. + */ +extern int gxio_mpipe_init_notif_group_and_buckets(gxio_mpipe_context_t + *context, + unsigned int group, + unsigned int ring, + unsigned int num_rings, + unsigned int bucket, + unsigned int num_buckets, + gxio_mpipe_bucket_mode_t + mode); + +/* Return credits to a NotifRing and/or bucket. + * + * @param context An initialized mPIPE context. + * @param ring The NotifRing index, or -1. + * @param bucket The bucket, or -1. + * @param count The number of credits to return. + */ +static inline void gxio_mpipe_credit(gxio_mpipe_context_t *context, + int ring, int bucket, unsigned int count) +{ + /* NOTE: Fancy struct initialization would break "C89" header test. */ + + MPIPE_IDMA_RELEASE_REGION_ADDR_t offset = { {0} }; + MPIPE_IDMA_RELEASE_REGION_VAL_t val = { {0} }; + + /* + * The mmio_fast_base region starts at the IDMA region, so subtract + * off that initial offset. + */ + offset.region = + MPIPE_MMIO_ADDR__REGION_VAL_IDMA - + MPIPE_MMIO_ADDR__REGION_VAL_IDMA; + offset.ring = ring; + offset.bucket = bucket; + offset.ring_enable = (ring >= 0); + offset.bucket_enable = (bucket >= 0); + val.count = count; + + __gxio_mmio_write(context->mmio_fast_base + offset.word, val.word); +} + +/***************************************************************** + * Egress Rings * + ******************************************************************/ + +/* Allocate a set of eDMA rings. + * + * The return value is NOT interesting if count is zero. + * + * @param context An initialized mPIPE context. + * @param count Number of eDMA rings required. + * @param first Index of first eDMA ring if ::GXIO_MPIPE_ALLOC_FIXED flag + * is set, otherwise ignored. + * @param flags Flag bits from ::gxio_mpipe_alloc_flags_e. + * @return Index of first allocated buffer eDMA ring, or + * ::GXIO_MPIPE_ERR_NO_EDMA_RING if allocation failed. + */ +extern int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +/* Initialize an eDMA ring, using the given memory and size. + * + * @param context An initialized mPIPE context. + * @param ering The eDMA ring index. + * @param channel The channel to use. This must be one of the channels + * associated with the context's set of open links. + * @param mem A physically contiguous region of memory to be filled + * with a ring of ::gxio_mpipe_edesc_t structures. + * @param mem_size Number of bytes in the ring. Must be 512, 2048, + * 8192 or 65536, times 16 (i.e. sizeof(gxio_mpipe_edesc_t)). + * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags. + * + * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_EDMA_RING or + * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure. + */ +extern int gxio_mpipe_init_edma_ring(gxio_mpipe_context_t *context, + unsigned int ering, unsigned int channel, + void *mem, size_t mem_size, + unsigned int mem_flags); + +/* Set the "max_blks", "min_snf_blks", and "db" fields of + * ::MPIPE_EDMA_RG_INIT_DAT_THRESH_t for a given edma ring. + * + * The global pool of dynamic blocks will be automatically adjusted. + * + * This function should not be called after any egress has been done + * on the edma ring. + * + * Most applications should just use gxio_mpipe_equeue_set_snf_size(). + * + * @param context An initialized mPIPE context. + * @param ering The eDMA ring index. + * @param max_blks The number of blocks to dedicate to the ring + * (normally min_snf_blks + 1). Must be greater than min_snf_blocks. + * @param min_snf_blks The number of blocks which must be stored + * prior to starting to send the packet (normally 12). + * @param db Whether to allow use of dynamic blocks by the ring + * (normally 1). + * + * @return 0 on success, negative on error. + */ +extern int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context, + unsigned int ering, + unsigned int max_blks, + unsigned int min_snf_blks, + unsigned int db); + +/***************************************************************** + * Classifier Program * + ******************************************************************/ + +/* + * + * Functions for loading or configuring the mPIPE classifier program. + * + * The mPIPE classification processors all run a special "classifier" + * program which, for each incoming packet, parses the packet headers, + * encodes some packet metadata in the "idesc", and either drops the + * packet, or picks a notif ring to handle the packet, and a buffer + * stack to contain the packet, usually based on the channel, VLAN, + * dMAC, flow hash, and packet size, under the guidance of the "rules" + * API described below. + * + * @section gxio_mpipe_classifier_default Default Classifier + * + * The MDE provides a simple "default" classifier program. It is + * shipped as source in "$TILERA_ROOT/src/sys/mpipe/classifier.c", + * which serves as its official documentation. It is shipped as a + * binary program in "$TILERA_ROOT/tile/boot/classifier", which is + * automatically included in bootroms created by "tile-monitor", and + * is automatically loaded by the hypervisor at boot time. + * + * The L2 analysis handles LLC packets, SNAP packets, and "VLAN + * wrappers" (keeping the outer VLAN). + * + * The L3 analysis handles IPv4 and IPv6, dropping packets with bad + * IPv4 header checksums, requesting computation of a TCP/UDP checksum + * if appropriate, and hashing the dest and src IP addresses, plus the + * ports for TCP/UDP packets, into the flow hash. No special analysis + * is done for "fragmented" packets or "tunneling" protocols. Thus, + * the first fragment of a fragmented TCP/UDP packet is hashed using + * src/dest IP address and ports and all subsequent fragments are only + * hashed according to src/dest IP address. + * + * The L3 analysis handles other packets too, hashing the dMAC + * smac into a flow hash. + * + * The channel, VLAN, and dMAC used to pick a "rule" (see the + * "rules" APIs below), which in turn is used to pick a buffer stack + * (based on the packet size) and a bucket (based on the flow hash). + * + * To receive traffic matching a particular (channel/VLAN/dMAC + * pattern, an application should allocate its own buffer stacks and + * load balancer buckets, and map traffic to those stacks and buckets, + * as decribed by the "rules" API below. + * + * Various packet metadata is encoded in the idesc. The flow hash is + * four bytes at 0x0C. The VLAN is two bytes at 0x10. The ethtype is + * two bytes at 0x12. The l3 start is one byte at 0x14. The l4 start + * is one byte at 0x15 for IPv4 and IPv6 packets, and otherwise zero. + * The protocol is one byte at 0x16 for IPv4 and IPv6 packets, and + * otherwise zero. + * + * @section gxio_mpipe_classifier_custom Custom Classifiers. + * + * A custom classifier may be created using "tile-mpipe-cc" with a + * customized version of the default classifier sources. + * + * The custom classifier may be included in bootroms using the + * "--classifier" option to "tile-monitor", or loaded dynamically + * using gxio_mpipe_classifier_load_from_file(). + * + * Be aware that "extreme" customizations may break the assumptions of + * the "rules" APIs described below, but simple customizations, such + * as adding new packet metadata, should be fine. + */ + +/* A set of classifier rules, plus a context. */ +typedef struct { + + /* The context. */ + gxio_mpipe_context_t *context; + + /* The actual rules. */ + gxio_mpipe_rules_list_t list; + +} gxio_mpipe_rules_t; + +/* Initialize a classifier program rules list. + * + * This function can be called on a previously initialized rules list + * to discard any previously added rules. + * + * @param rules Rules list to initialize. + * @param context An initialized mPIPE context. + */ +extern void gxio_mpipe_rules_init(gxio_mpipe_rules_t *rules, + gxio_mpipe_context_t *context); + +/* Begin a new rule on the indicated rules list. + * + * Note that an empty rule matches all packets, but an empty rule list + * matches no packets. + * + * @param rules Rules list to which new rule is appended. + * @param bucket First load balancer bucket to which packets will be + * delivered. + * @param num_buckets Number of buckets (must be a power of two) across + * which packets will be distributed based on the "flow hash". + * @param stacks Either NULL, to assign each packet to the smallest + * initialized buffer stack which does not induce chaining (and to + * drop packets which exceed the largest initialized buffer stack + * buffer size), or an array, with each entry indicating which buffer + * stack should be used for packets up to that size (with 255 + * indicating that those packets should be dropped). + * @return 0 on success, or a negative error code on failure. + */ +extern int gxio_mpipe_rules_begin(gxio_mpipe_rules_t *rules, + unsigned int bucket, + unsigned int num_buckets, + gxio_mpipe_rules_stacks_t *stacks); + +/* Set the headroom of the current rule. + * + * @param rules Rules list whose current rule will be modified. + * @param headroom The headroom. + * @return 0 on success, or a negative error code on failure. + */ +extern int gxio_mpipe_rules_set_headroom(gxio_mpipe_rules_t *rules, + uint8_t headroom); + +/* Indicate that packets from a particular channel can be delivered + * to the buckets and buffer stacks associated with the current rule. + * + * Channels added must be associated with links opened by the mPIPE context + * used in gxio_mpipe_rules_init(). A rule with no channels is equivalent + * to a rule naming all such associated channels. + * + * @param rules Rules list whose current rule will be modified. + * @param channel The channel to add. + * @return 0 on success, or a negative error code on failure. + */ +extern int gxio_mpipe_rules_add_channel(gxio_mpipe_rules_t *rules, + unsigned int channel); + +/* Commit rules. + * + * The rules are sent to the hypervisor, where they are combined with + * the rules from other apps, and used to program the hardware classifier. + * + * Note that if this function returns an error, then the rules will NOT + * have been committed, even if the error is due to interactions with + * rules from another app. + * + * @param rules Rules list to commit. + * @return 0 on success, or a negative error code on failure. + */ +extern int gxio_mpipe_rules_commit(gxio_mpipe_rules_t *rules); + +/***************************************************************** + * Ingress Queue Wrapper * + ******************************************************************/ + +/* + * + * Convenience functions for receiving packets from a NotifRing and + * sending packets via an eDMA ring. + * + * The mpipe ingress and egress hardware uses shared memory packet + * descriptors to describe packets that have arrived on ingress or + * are destined for egress. These descriptors are stored in shared + * memory ring buffers and written or read by hardware as necessary. + * The gxio library provides wrapper functions that manage the head and + * tail pointers for these rings, allowing the user to easily read or + * write packet descriptors. + * + * The initialization interface for ingress and egress rings is quite + * similar. For example, to create an ingress queue, the user passes + * a ::gxio_mpipe_iqueue_t state object, a ring number from + * gxio_mpipe_alloc_notif_rings(), and the address of memory to hold a + * ring buffer to the gxio_mpipe_iqueue_init() function. The function + * returns success when the state object has been initialized and the + * hardware configured to deliver packets to the specified ring + * buffer. Similarly, gxio_mpipe_equeue_init() takes a + * ::gxio_mpipe_equeue_t state object, a ring number from + * gxio_mpipe_alloc_edma_rings(), and a shared memory buffer. + * + * @section gxio_mpipe_iqueue Working with Ingress Queues + * + * Once initialized, the gxio_mpipe_iqueue_t API provides two flows + * for getting the ::gxio_mpipe_idesc_t packet descriptor associated + * with incoming packets. The simplest is to call + * gxio_mpipe_iqueue_get() or gxio_mpipe_iqueue_try_get(). These + * functions copy the oldest packet descriptor out of the NotifRing and + * into a descriptor provided by the caller. They also immediately + * inform the hardware that a descriptor has been processed. + * + * For applications with stringent performance requirements, higher + * efficiency can be achieved by avoiding the packet descriptor copy + * and processing multiple descriptors at once. The + * gxio_mpipe_iqueue_peek() and gxio_mpipe_iqueue_try_peek() functions + * allow such optimizations. These functions provide a pointer to the + * next valid ingress descriptor in the NotifRing's shared memory ring + * buffer, and a count of how many contiguous descriptors are ready to + * be processed. The application can then process any number of those + * descriptors in place, calling gxio_mpipe_iqueue_consume() to inform + * the hardware after each one has been processed. + * + * @section gxio_mpipe_equeue Working with Egress Queues + * + * Similarly, the egress queue API provides a high-performance + * interface plus a simple wrapper for use in posting + * ::gxio_mpipe_edesc_t egress packet descriptors. The simple + * version, gxio_mpipe_equeue_put(), allows the programmer to wait for + * an eDMA ring slot to become available and write a single descriptor + * into the ring. + * + * Alternatively, you can reserve slots in the eDMA ring using + * gxio_mpipe_equeue_reserve() or gxio_mpipe_equeue_try_reserve(), and + * then fill in each slot using gxio_mpipe_equeue_put_at(). This + * capability can be used to amortize the cost of reserving slots + * across several packets. It also allows gather operations to be + * performed on a shared equeue, by ensuring that the edescs for all + * the fragments are all contiguous in the eDMA ring. + * + * The gxio_mpipe_equeue_reserve() and gxio_mpipe_equeue_try_reserve() + * functions return a 63-bit "completion slot", which is actually a + * sequence number, the low bits of which indicate the ring buffer + * index and the high bits the number of times the application has + * gone around the egress ring buffer. The extra bits allow an + * application to check for egress completion by calling + * gxio_mpipe_equeue_is_complete() to see whether a particular 'slot' + * number has finished. Given the maximum packet rates of the Gx + * processor, the 63-bit slot number will never wrap. + * + * In practice, most applications use the ::gxio_mpipe_edesc_t::hwb + * bit to indicate that the buffers containing egress packet data + * should be pushed onto a buffer stack when egress is complete. Such + * applications generally do not need to know when an egress operation + * completes (since there is no need to free a buffer post-egress), + * and thus can use the optimized gxio_mpipe_equeue_reserve_fast() or + * gxio_mpipe_equeue_try_reserve_fast() functions, which return a 24 + * bit "slot", instead of a 63-bit "completion slot". + * + * Once a slot has been "reserved", it MUST be filled. If the + * application reserves a slot and then decides that it does not + * actually need it, it can set the ::gxio_mpipe_edesc_t::ns (no send) + * bit on the descriptor passed to gxio_mpipe_equeue_put_at() to + * indicate that no data should be sent. This technique can also be + * used to drop an incoming packet, instead of forwarding it, since + * any buffer will still be pushed onto the buffer stack when the + * egress descriptor is processed. + */ + +/* A convenient interface to a NotifRing, for use by a single thread. + */ +typedef struct { + + /* The context. */ + gxio_mpipe_context_t *context; + + /* The actual NotifRing. */ + gxio_mpipe_idesc_t *idescs; + + /* The number of entries. */ + unsigned long num_entries; + + /* The number of entries minus one. */ + unsigned long mask_num_entries; + + /* The log2() of the number of entries. */ + unsigned long log2_num_entries; + + /* The next entry. */ + unsigned int head; + + /* The NotifRing id. */ + unsigned int ring; + +#ifdef __BIG_ENDIAN__ + /* The number of byteswapped entries. */ + unsigned int swapped; +#endif + +} gxio_mpipe_iqueue_t; + +/* Initialize an "iqueue". + * + * Takes the iqueue plus the same args as gxio_mpipe_init_notif_ring(). + */ +extern int gxio_mpipe_iqueue_init(gxio_mpipe_iqueue_t *iqueue, + gxio_mpipe_context_t *context, + unsigned int ring, + void *mem, size_t mem_size, + unsigned int mem_flags); + +/* Advance over some old entries in an iqueue. + * + * Please see the documentation for gxio_mpipe_iqueue_consume(). + * + * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init(). + * @param count The number of entries to advance over. + */ +static inline void gxio_mpipe_iqueue_advance(gxio_mpipe_iqueue_t *iqueue, + int count) +{ + /* Advance with proper wrap. */ + int head = iqueue->head + count; + iqueue->head = + (head & iqueue->mask_num_entries) + + (head >> iqueue->log2_num_entries); + +#ifdef __BIG_ENDIAN__ + /* HACK: Track swapped entries. */ + iqueue->swapped -= count; +#endif +} + +/* Release the ring and bucket for an old entry in an iqueue. + * + * Releasing the ring allows more packets to be delivered to the ring. + * + * Releasing the bucket allows flows using the bucket to be moved to a + * new ring when using GXIO_MPIPE_BUCKET_DYNAMIC_FLOW_AFFINITY. + * + * This function is shorthand for "gxio_mpipe_credit(iqueue->context, + * iqueue->ring, idesc->bucket_id, 1)", and it may be more convenient + * to make that underlying call, using those values, instead of + * tracking the entire "idesc". + * + * If packet processing is deferred, optimal performance requires that + * the releasing be deferred as well. + * + * Please see the documentation for gxio_mpipe_iqueue_consume(). + * + * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init(). + * @param idesc The descriptor which was processed. + */ +static inline void gxio_mpipe_iqueue_release(gxio_mpipe_iqueue_t *iqueue, + gxio_mpipe_idesc_t *idesc) +{ + gxio_mpipe_credit(iqueue->context, iqueue->ring, idesc->bucket_id, 1); +} + +/* Consume a packet from an "iqueue". + * + * After processing packets peeked at via gxio_mpipe_iqueue_peek() + * or gxio_mpipe_iqueue_try_peek(), you must call this function, or + * gxio_mpipe_iqueue_advance() plus gxio_mpipe_iqueue_release(), to + * advance over those entries, and release their rings and buckets. + * + * You may call this function as each packet is processed, or you can + * wait until several packets have been processed. + * + * Note that if you are using a single bucket, and you are handling + * batches of N packets, then you can replace several calls to this + * function with calls to "gxio_mpipe_iqueue_advance(iqueue, N)" and + * "gxio_mpipe_credit(iqueue->context, iqueue->ring, bucket, N)". + * + * Note that if your classifier sets "idesc->nr", then you should + * explicitly call "gxio_mpipe_iqueue_advance(iqueue, idesc)" plus + * "gxio_mpipe_credit(iqueue->context, iqueue->ring, -1, 1)", to + * avoid incorrectly crediting the (unused) bucket. + * + * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init(). + * @param idesc The descriptor which was processed. + */ +static inline void gxio_mpipe_iqueue_consume(gxio_mpipe_iqueue_t *iqueue, + gxio_mpipe_idesc_t *idesc) +{ + gxio_mpipe_iqueue_advance(iqueue, 1); + gxio_mpipe_iqueue_release(iqueue, idesc); +} + +/* Peek at the next packet(s) in an "iqueue", without waiting. + * + * If no packets are available, fills idesc_ref with NULL, and then + * returns ::GXIO_MPIPE_ERR_IQUEUE_EMPTY. Otherwise, fills idesc_ref + * with the address of the next valid packet descriptor, and returns + * the maximum number of valid descriptors which can be processed. + * You may process fewer descriptors if desired. + * + * Call gxio_mpipe_iqueue_consume() on each packet once it has been + * processed (or dropped), to allow more packets to be delivered. + * + * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init(). + * @param idesc_ref A pointer to a packet descriptor pointer. + * @return The (positive) number of packets which can be processed, + * or ::GXIO_MPIPE_ERR_IQUEUE_EMPTY if no packets are available. + */ +static inline int gxio_mpipe_iqueue_try_peek(gxio_mpipe_iqueue_t *iqueue, + gxio_mpipe_idesc_t **idesc_ref) +{ + gxio_mpipe_idesc_t *next; + + uint64_t head = iqueue->head; + uint64_t tail = __gxio_mmio_read(iqueue->idescs); + + /* Available entries. */ + uint64_t avail = + (tail >= head) ? (tail - head) : (iqueue->num_entries - head); + + if (avail == 0) { + *idesc_ref = NULL; + return GXIO_MPIPE_ERR_IQUEUE_EMPTY; + } + + next = &iqueue->idescs[head]; + + /* ISSUE: Is this helpful? */ + __insn_prefetch(next); + +#ifdef __BIG_ENDIAN__ + /* HACK: Swap new entries directly in memory. */ + { + int i, j; + for (i = iqueue->swapped; i < avail; i++) { + for (j = 0; j < 8; j++) + next[i].words[j] = + __builtin_bswap64(next[i].words[j]); + } + iqueue->swapped = avail; + } +#endif + + *idesc_ref = next; + + return avail; +} + +/* Drop a packet by pushing its buffer (if appropriate). + * + * NOTE: The caller must still call gxio_mpipe_iqueue_consume() if idesc + * came from gxio_mpipe_iqueue_try_peek() or gxio_mpipe_iqueue_peek(). + * + * @param iqueue An ingress queue initialized via gxio_mpipe_iqueue_init(). + * @param idesc A packet descriptor. + */ +static inline void gxio_mpipe_iqueue_drop(gxio_mpipe_iqueue_t *iqueue, + gxio_mpipe_idesc_t *idesc) +{ + /* FIXME: Handle "chaining" properly. */ + + if (!idesc->be) { + unsigned char *va = gxio_mpipe_idesc_get_va(idesc); + gxio_mpipe_push_buffer(iqueue->context, idesc->stack_idx, va); + } +} + +/***************************************************************** + * Egress Queue Wrapper * + ******************************************************************/ + +/* A convenient, thread-safe interface to an eDMA ring. */ +typedef struct { + + /* State object for tracking head and tail pointers. */ + __gxio_dma_queue_t dma_queue; + + /* The ring entries. */ + gxio_mpipe_edesc_t *edescs; + + /* The number of entries minus one. */ + unsigned long mask_num_entries; + + /* The log2() of the number of entries. */ + unsigned long log2_num_entries; + + /* The context. */ + gxio_mpipe_context_t *context; + + /* The ering. */ + unsigned int ering; + + /* The channel. */ + unsigned int channel; + +} gxio_mpipe_equeue_t; + +/* Initialize an "equeue". + * + * This function uses gxio_mpipe_init_edma_ring() to initialize the + * underlying edma_ring using the provided arguments. + * + * @param equeue An egress queue to be initialized. + * @param context An initialized mPIPE context. + * @param ering The eDMA ring index. + * @param channel The channel to use. This must be one of the channels + * associated with the context's set of open links. + * @param mem A physically contiguous region of memory to be filled + * with a ring of ::gxio_mpipe_edesc_t structures. + * @param mem_size Number of bytes in the ring. Must be 512, 2048, + * 8192 or 65536, times 16 (i.e. sizeof(gxio_mpipe_edesc_t)). + * @param mem_flags ::gxio_mpipe_mem_flags_e memory flags. + * + * @return 0 on success, ::GXIO_MPIPE_ERR_BAD_EDMA_RING or + * ::GXIO_ERR_INVAL_MEMORY_SIZE on failure. + */ +extern int gxio_mpipe_equeue_init(gxio_mpipe_equeue_t *equeue, + gxio_mpipe_context_t *context, + unsigned int ering, + unsigned int channel, + void *mem, unsigned int mem_size, + unsigned int mem_flags); + +/* Reserve completion slots for edescs. + * + * Use gxio_mpipe_equeue_put_at() to actually populate the slots. + * + * This function is slower than gxio_mpipe_equeue_reserve_fast(), but + * returns a full 64 bit completion slot, which can be used with + * gxio_mpipe_equeue_is_complete(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param num Number of slots to reserve (must be non-zero). + * @return The first reserved completion slot, or a negative error code. + */ +static inline int64_t gxio_mpipe_equeue_reserve(gxio_mpipe_equeue_t *equeue, + unsigned int num) +{ + return __gxio_dma_queue_reserve_aux(&equeue->dma_queue, num, true); +} + +/* Reserve completion slots for edescs, if possible. + * + * Use gxio_mpipe_equeue_put_at() to actually populate the slots. + * + * This function is slower than gxio_mpipe_equeue_try_reserve_fast(), + * but returns a full 64 bit completion slot, which can be used with + * gxio_mpipe_equeue_is_complete(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param num Number of slots to reserve (must be non-zero). + * @return The first reserved completion slot, or a negative error code. + */ +static inline int64_t gxio_mpipe_equeue_try_reserve(gxio_mpipe_equeue_t + *equeue, unsigned int num) +{ + return __gxio_dma_queue_reserve_aux(&equeue->dma_queue, num, false); +} + +/* Reserve slots for edescs. + * + * Use gxio_mpipe_equeue_put_at() to actually populate the slots. + * + * This function is faster than gxio_mpipe_equeue_reserve(), but + * returns a 24 bit slot (instead of a 64 bit completion slot), which + * thus cannot be used with gxio_mpipe_equeue_is_complete(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param num Number of slots to reserve (should be non-zero). + * @return The first reserved slot, or a negative error code. + */ +static inline int64_t gxio_mpipe_equeue_reserve_fast(gxio_mpipe_equeue_t + *equeue, unsigned int num) +{ + return __gxio_dma_queue_reserve(&equeue->dma_queue, num, true, false); +} + +/* Reserve slots for edescs, if possible. + * + * Use gxio_mpipe_equeue_put_at() to actually populate the slots. + * + * This function is faster than gxio_mpipe_equeue_try_reserve(), but + * returns a 24 bit slot (instead of a 64 bit completion slot), which + * thus cannot be used with gxio_mpipe_equeue_is_complete(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param num Number of slots to reserve (should be non-zero). + * @return The first reserved slot, or a negative error code. + */ +static inline int64_t gxio_mpipe_equeue_try_reserve_fast(gxio_mpipe_equeue_t + *equeue, + unsigned int num) +{ + return __gxio_dma_queue_reserve(&equeue->dma_queue, num, false, false); +} + +/* + * HACK: This helper function tricks gcc 4.6 into avoiding saving + * a copy of "edesc->words[0]" on the stack for no obvious reason. + */ + +static inline void gxio_mpipe_equeue_put_at_aux(gxio_mpipe_equeue_t *equeue, + uint_reg_t ew[2], + unsigned long slot) +{ + unsigned long edma_slot = slot & equeue->mask_num_entries; + gxio_mpipe_edesc_t *edesc_p = &equeue->edescs[edma_slot]; + + /* + * ISSUE: Could set eDMA ring to be on generation 1 at start, which + * would avoid the negation here, perhaps allowing "__insn_bfins()". + */ + ew[0] |= !((slot >> equeue->log2_num_entries) & 1); + + /* + * NOTE: We use "__gxio_mpipe_write()", plus the fact that the eDMA + * queue alignment restrictions ensure that these two words are on + * the same cacheline, to force proper ordering between the stores. + */ + __gxio_mmio_write64(&edesc_p->words[1], ew[1]); + __gxio_mmio_write64(&edesc_p->words[0], ew[0]); +} + +/* Post an edesc to a given slot in an equeue. + * + * This function copies the supplied edesc into entry "slot mod N" in + * the underlying ring, setting the "gen" bit to the appropriate value + * based on "(slot mod N*2)", where "N" is the size of the ring. Note + * that the higher bits of slot are unused, and thus, this function + * can handle "slots" as well as "completion slots". + * + * Normally this function is used to fill in slots reserved by + * gxio_mpipe_equeue_try_reserve(), gxio_mpipe_equeue_reserve(), + * gxio_mpipe_equeue_try_reserve_fast(), or + * gxio_mpipe_equeue_reserve_fast(), + * + * This function can also be used without "reserving" slots, if the + * application KNOWS that the ring can never overflow, for example, by + * pushing fewer buffers into the buffer stacks than there are total + * slots in the equeue, but this is NOT recommended. + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param edesc The egress descriptor to be posted. + * @param slot An egress slot (only the low bits are actually used). + */ +static inline void gxio_mpipe_equeue_put_at(gxio_mpipe_equeue_t *equeue, + gxio_mpipe_edesc_t edesc, + unsigned long slot) +{ + gxio_mpipe_equeue_put_at_aux(equeue, edesc.words, slot); +} + +/* Post an edesc to the next slot in an equeue. + * + * This is a convenience wrapper around + * gxio_mpipe_equeue_reserve_fast() and gxio_mpipe_equeue_put_at(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param edesc The egress descriptor to be posted. + * @return 0 on success. + */ +static inline int gxio_mpipe_equeue_put(gxio_mpipe_equeue_t *equeue, + gxio_mpipe_edesc_t edesc) +{ + int64_t slot = gxio_mpipe_equeue_reserve_fast(equeue, 1); + if (slot < 0) + return (int)slot; + + gxio_mpipe_equeue_put_at(equeue, edesc, slot); + + return 0; +} + +/* Ask the mPIPE hardware to egress outstanding packets immediately. + * + * This call is not necessary, but may slightly reduce overall latency. + * + * Technically, you should flush all gxio_mpipe_equeue_put_at() writes + * to memory before calling this function, to ensure the descriptors + * are visible in memory before the mPIPE hardware actually looks for + * them. But this should be very rare, and the only side effect would + * be increased latency, so it is up to the caller to decide whether + * or not to flush memory. + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + */ +static inline void gxio_mpipe_equeue_flush(gxio_mpipe_equeue_t *equeue) +{ + /* Use "ring_idx = 0" and "count = 0" to "wake up" the eDMA ring. */ + MPIPE_EDMA_POST_REGION_VAL_t val = { {0} }; + /* Flush the write buffers. */ + __insn_flushwb(); + __gxio_mmio_write(equeue->dma_queue.post_region_addr, val.word); +} + +/* Determine if a given edesc has been completed. + * + * Note that this function requires a "completion slot", and thus may + * NOT be used with a "slot" from gxio_mpipe_equeue_reserve_fast() or + * gxio_mpipe_equeue_try_reserve_fast(). + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param completion_slot The completion slot used by the edesc. + * @param update If true, and the desc does not appear to have completed + * yet, then update any software cache of the hardware completion counter, + * and check again. This should normally be true. + * @return True iff the given edesc has been completed. + */ +static inline int gxio_mpipe_equeue_is_complete(gxio_mpipe_equeue_t *equeue, + int64_t completion_slot, + int update) +{ + return __gxio_dma_queue_is_complete(&equeue->dma_queue, + completion_slot, update); +} + +/* Set the snf (store and forward) size for an equeue. + * + * The snf size for an equeue defaults to 1536, and encodes the size + * of the largest packet for which egress is guaranteed to avoid + * transmission underruns and/or corrupt checksums under heavy load. + * + * The snf size affects a global resource pool which cannot support, + * for example, all 24 equeues each requesting an snf size of 8K. + * + * To ensure that jumbo packets can be egressed properly, the snf size + * should be set to the size of the largest possible packet, which + * will usually be limited by the size of the app's largest buffer. + * + * This is a convenience wrapper around + * gxio_mpipe_config_edma_ring_blks(). + * + * This function should not be called after any egress has been done + * on the equeue. + * + * @param equeue An egress queue initialized via gxio_mpipe_equeue_init(). + * @param size The snf size, in bytes. + * @return Zero on success, negative error otherwise. + */ +static inline int gxio_mpipe_equeue_set_snf_size(gxio_mpipe_equeue_t *equeue, + size_t size) +{ + int blks = (size + 127) / 128; + return gxio_mpipe_config_edma_ring_blks(equeue->context, equeue->ering, + blks + 1, blks, 1); +} + +/***************************************************************** + * Link Management * + ******************************************************************/ + +/* + * + * Functions for manipulating and sensing the state and configuration + * of physical network links. + * + * @section gxio_mpipe_link_perm Link Permissions + * + * Opening a link (with gxio_mpipe_link_open()) requests a set of link + * permissions, which control what may be done with the link, and potentially + * what permissions may be granted to other processes. + * + * Data permission allows the process to receive packets from the link by + * specifying the link's channel number in mPIPE packet distribution rules, + * and to send packets to the link by using the link's channel number as + * the target for an eDMA ring. + * + * Stats permission allows the process to retrieve link attributes (such as + * the speeds it is capable of running at, or whether it is currently up), and + * to read and write certain statistics-related registers in the link's MAC. + * + * Control permission allows the process to retrieve and modify link attributes + * (so that it may, for example, bring the link up and take it down), and + * read and write many registers in the link's MAC and PHY. + * + * Any permission may be requested as shared, which allows other processes + * to also request shared permission, or exclusive, which prevents other + * processes from requesting it. In keeping with GXIO's typical usage in + * an embedded environment, the defaults for all permissions are shared. + * + * Permissions are granted on a first-come, first-served basis, so if two + * applications request an exclusive permission on the same link, the one + * to run first will win. Note, however, that some system components, like + * the kernel Ethernet driver, may get an opportunity to open links before + * any applications run. + * + * @section gxio_mpipe_link_names Link Names + * + * Link names are of the form gbe<em>number</em> (for Gigabit Ethernet), + * xgbe<em>number</em> (for 10 Gigabit Ethernet), loop<em>number</em> (for + * internal mPIPE loopback), or ilk<em>number</em>/<em>channel</em> + * (for Interlaken links); for instance, gbe0, xgbe1, loop3, and + * ilk0/12 are all possible link names. The correspondence between + * the link name and an mPIPE instance number or mPIPE channel number is + * system-dependent; all links will not exist on all systems, and the set + * of numbers used for a particular link type may not start at zero and may + * not be contiguous. Use gxio_mpipe_link_enumerate() to retrieve the set of + * links which exist on a system, and always use gxio_mpipe_link_instance() + * to determine which mPIPE controls a particular link. + * + * Note that in some cases, links may share hardware, such as PHYs, or + * internal mPIPE buffers; in these cases, only one of the links may be + * opened at a time. This is especially common with xgbe and gbe ports, + * since each xgbe port uses 4 SERDES lanes, each of which may also be + * configured as one gbe port. + * + * @section gxio_mpipe_link_states Link States + * + * The mPIPE link management model revolves around three different states, + * which are maintained for each link: + * + * 1. The <em>current</em> link state: is the link up now, and if so, at + * what speed? + * + * 2. The <em>desired</em> link state: what do we want the link state to be? + * The system is always working to make this state the current state; + * thus, if the desired state is up, and the link is down, we'll be + * constantly trying to bring it up, automatically. + * + * 3. The <em>possible</em> link state: what speeds are valid for this + * particular link? Or, in other words, what are the capabilities of + * the link hardware? + * + * These link states are not, strictly speaking, related to application + * state; they may be manipulated at any time, whether or not the link + * is currently being used for data transfer. However, for convenience, + * gxio_mpipe_link_open() and gxio_mpipe_link_close() (or application exit) + * can affect the link state. These implicit link management operations + * may be modified or disabled by the use of link open flags. + * + * From an application, you can use gxio_mpipe_link_get_attr() + * and gxio_mpipe_link_set_attr() to manipulate the link states. + * gxio_mpipe_link_get_attr() with ::GXIO_MPIPE_LINK_POSSIBLE_STATE + * gets you the possible link state. gxio_mpipe_link_get_attr() with + * ::GXIO_MPIPE_LINK_CURRENT_STATE gets you the current link state. + * Finally, gxio_mpipe_link_set_attr() and gxio_mpipe_link_get_attr() + * with ::GXIO_MPIPE_LINK_DESIRED_STATE allow you to modify or retrieve + * the desired link state. + * + * If you want to manage a link from a part of your application which isn't + * involved in packet processing, you can use the ::GXIO_MPIPE_LINK_NO_DATA + * flags on a gxio_mpipe_link_open() call. This opens the link, but does + * not request data permission, so it does not conflict with any exclusive + * permissions which may be held by other processes. You can then can use + * gxio_mpipe_link_get_attr() and gxio_mpipe_link_set_attr() on this link + * object to bring up or take down the link. + * + * Some links support link state bits which support various loopback + * modes. ::GXIO_MPIPE_LINK_LOOP_MAC tests datapaths within the Tile + * Processor itself; ::GXIO_MPIPE_LINK_LOOP_PHY tests the datapath between + * the Tile Processor and the external physical layer interface chip; and + * ::GXIO_MPIPE_LINK_LOOP_EXT tests the entire network datapath with the + * aid of an external loopback connector. In addition to enabling hardware + * testing, such configuration can be useful for software testing, as well. + * + * When LOOP_MAC or LOOP_PHY is enabled, packets transmitted on a channel + * will be received by that channel, instead of being emitted on the + * physical link, and packets received on the physical link will be ignored. + * Other than that, all standard GXIO operations work as you might expect. + * Note that loopback operation requires that the link be brought up using + * one or more of the GXIO_MPIPE_LINK_SPEED_xxx link state bits. + * + * Those familiar with previous versions of the MDE on TILEPro hardware + * will notice significant similarities between the NetIO link management + * model and the mPIPE link management model. However, the NetIO model + * was developed in stages, and some of its features -- for instance, + * the default setting of certain flags -- were shaped by the need to be + * compatible with previous versions of NetIO. Since the features provided + * by the mPIPE hardware and the mPIPE GXIO library are significantly + * different than those provided by NetIO, in some cases, we have made + * different choices in the mPIPE link management API. Thus, please read + * this documentation carefully before assuming that mPIPE link management + * operations are exactly equivalent to their NetIO counterparts. + */ + +/* An object used to manage mPIPE link state and resources. */ +typedef struct { + /* The overall mPIPE context. */ + gxio_mpipe_context_t *context; + + /* The channel number used by this link. */ + uint8_t channel; + + /* The MAC index used by this link. */ + uint8_t mac; +} gxio_mpipe_link_t; + +/* Translate a link name to the instance number of the mPIPE shim which is + * connected to that link. This call does not verify whether the link is + * currently available, and does not reserve any link resources; + * gxio_mpipe_link_open() must be called to perform those functions. + * + * Typically applications will call this function to translate a link name + * to an mPIPE instance number; call gxio_mpipe_init(), passing it that + * instance number, to initialize the mPIPE shim; and then call + * gxio_mpipe_link_open(), passing it the same link name plus the mPIPE + * context, to configure the link. + * + * @param link_name Name of the link; see @ref gxio_mpipe_link_names. + * @return The mPIPE instance number which is associated with the named + * link, or a negative error code (::GXIO_ERR_NO_DEVICE) if the link does + * not exist. + */ +extern int gxio_mpipe_link_instance(const char *link_name); + +/* Retrieve one of this system's legal link names, and its MAC address. + * + * @param index Link name index. If a system supports N legal link names, + * then indices between 0 and N - 1, inclusive, each correspond to one of + * those names. Thus, to retrieve all of a system's legal link names, + * call this function in a loop, starting with an index of zero, and + * incrementing it once per iteration until -1 is returned. + * @param link_name Pointer to the buffer which will receive the retrieved + * link name. The buffer should contain space for at least + * ::GXIO_MPIPE_LINK_NAME_LEN bytes; the returned name, including the + * terminating null byte, will be no longer than that. + * @param link_name Pointer to the buffer which will receive the retrieved + * MAC address. The buffer should contain space for at least 6 bytes. + * @return Zero if a link name was successfully retrieved; -1 if one was + * not. + */ +extern int gxio_mpipe_link_enumerate_mac(int index, char *link_name, + uint8_t *mac_addr); + +/* Open an mPIPE link. + * + * A link must be opened before it may be used to send or receive packets, + * and before its state may be examined or changed. Depending up on the + * link's intended use, one or more link permissions may be requested via + * the flags parameter; see @ref gxio_mpipe_link_perm. In addition, flags + * may request that the link's state be modified at open time. See @ref + * gxio_mpipe_link_states and @ref gxio_mpipe_link_open_flags for more detail. + * + * @param link A link state object, which will be initialized if this + * function completes successfully. + * @param context An initialized mPIPE context. + * @param link_name Name of the link. + * @param flags Zero or more @ref gxio_mpipe_link_open_flags, ORed together. + * @return 0 if the link was successfully opened, or a negative error code. + * + */ +extern int gxio_mpipe_link_open(gxio_mpipe_link_t *link, + gxio_mpipe_context_t *context, + const char *link_name, unsigned int flags); + +/* Close an mPIPE link. + * + * Closing a link makes it available for use by other processes. Once + * a link has been closed, packets may no longer be sent on or received + * from the link, and its state may not be examined or changed. + * + * @param link A link state object, which will no longer be initialized + * if this function completes successfully. + * @return 0 if the link was successfully closed, or a negative error code. + * + */ +extern int gxio_mpipe_link_close(gxio_mpipe_link_t *link); + +/* Return a link's channel number. + * + * @param link A properly initialized link state object. + * @return The channel number for the link. + */ +static inline int gxio_mpipe_link_channel(gxio_mpipe_link_t *link) +{ + return link->channel; +} + +/* Set a link attribute. + * + * @param link A properly initialized link state object. + * @param attr An attribute from the set of @ref gxio_mpipe_link_attrs. + * @param val New value of the attribute. + * @return 0 if the attribute was successfully set, or a negative error + * code. + */ +extern int gxio_mpipe_link_set_attr(gxio_mpipe_link_t *link, uint32_t attr, + int64_t val); + +/////////////////////////////////////////////////////////////////// +// Timestamp // +/////////////////////////////////////////////////////////////////// + +/* Get the timestamp of mPIPE when this routine is called. + * + * @param context An initialized mPIPE context. + * @param ts A timespec structure to store the current clock. + * @return If the call was successful, zero; otherwise, a negative error + * code. + */ +extern int gxio_mpipe_get_timestamp(gxio_mpipe_context_t *context, + struct timespec *ts); + +/* Set the timestamp of mPIPE. + * + * @param context An initialized mPIPE context. + * @param ts A timespec structure to store the requested clock. + * @return If the call was successful, zero; otherwise, a negative error + * code. + */ +extern int gxio_mpipe_set_timestamp(gxio_mpipe_context_t *context, + const struct timespec *ts); + +/* Adjust the timestamp of mPIPE. + * + * @param context An initialized mPIPE context. + * @param delta A signed time offset to adjust, in nanoseconds. + * The absolute value of this parameter must be less than or + * equal to 1000000000. + * @return If the call was successful, zero; otherwise, a negative error + * code. + */ +extern int gxio_mpipe_adjust_timestamp(gxio_mpipe_context_t *context, + int64_t delta); + +/** Adjust the mPIPE timestamp clock frequency. + * + * @param context An initialized mPIPE context. + * @param ppb A 32-bit signed PPB (Parts Per Billion) value to adjust. + * The absolute value of ppb must be less than or equal to 1000000000. + * Values less than about 30000 will generally cause a GXIO_ERR_INVAL + * return due to the granularity of the hardware that converts reference + * clock cycles into seconds and nanoseconds. + * @return If the call was successful, zero; otherwise, a negative error + * code. + */ +extern int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t* context, + int32_t ppb); + +#endif /* !_GXIO_MPIPE_H_ */ diff --git a/arch/tile/include/gxio/trio.h b/arch/tile/include/gxio/trio.h new file mode 100644 index 00000000000..df10a662cc2 --- /dev/null +++ b/arch/tile/include/gxio/trio.h @@ -0,0 +1,298 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * + * An API for allocating, configuring, and manipulating TRIO hardware + * resources + */ + +/* + * + * The TILE-Gx TRIO shim provides connections to external devices via + * PCIe or other transaction IO standards. The gxio_trio_ API, + * declared in <gxio/trio.h>, allows applications to allocate and + * configure TRIO IO resources like DMA command rings, memory map + * windows, and device interrupts. The following sections introduce + * the various components of the API. We strongly recommend reading + * the TRIO section of the IO Device Guide (UG404) before working with + * this API. + * + * @section trio__ingress TRIO Ingress Hardware Resources + * + * The TRIO ingress hardware is responsible for examining incoming + * PCIe or StreamIO packets and choosing a processing mechanism based + * on the packets' bus address. The gxio_trio_ API can be used to + * configure different handlers for different ranges of bus address + * space. The user can configure "mapped memory" and "scatter queue" + * regions to match incoming packets within 4kB-aligned ranges of bus + * addresses. Each range specifies a different set of mapping + * parameters to be applied when handling the ingress packet. The + * following sections describe how to work with MapMem and scatter + * queue regions. + * + * @subsection trio__mapmem TRIO MapMem Regions + * + * TRIO mapped memory (or MapMem) regions allow the user to map + * incoming read and write requests directly to the application's + * memory space. MapMem regions are allocated via + * gxio_trio_alloc_memory_maps(). Given an integer MapMem number, + * applications can use gxio_trio_init_memory_map() to specify the + * range of bus addresses that will match the region and the range of + * virtual addresses to which those packets will be applied. + * + * As with many other gxio APIs, the programmer must be sure to + * register memory pages that will be used with MapMem regions. Pages + * can be registered with TRIO by allocating an ASID (address space + * identifier) and then using gxio_trio_register_page() to register up to + * 16 pages with the hardware. The initialization functions for + * resources that require registered memory (MapMem, scatter queues, + * push DMA, and pull DMA) then take an 'asid' parameter in order to + * configure which set of registered pages is used by each resource. + * + * @subsection trio__scatter_queue TRIO Scatter Queues + * + * The TRIO shim's scatter queue regions allow users to dynamically + * map buffers from a large address space into a small range of bus + * addresses. This is particularly helpful for PCIe endpoint devices, + * where the host generally limits the size of BARs to tens of + * megabytes. + * + * Each scatter queue consists of a memory map region, a queue of + * tile-side buffer VAs to be mapped to that region, and a bus-mapped + * "doorbell" register that the remote endpoint can write to trigger a + * dequeue of the current buffer VA, thus swapping in a new buffer. + * The VAs pushed onto a scatter queue must be 4kB aligned, so + * applications may need to use higher-level protocols to inform + * remote entities that they should apply some additional, sub-4kB + * offset when reading or writing the scatter queue region. For more + * information, see the IO Device Guide (UG404). + * + * @section trio__egress TRIO Egress Hardware Resources + * + * The TRIO shim supports two mechanisms for egress packet generation: + * programmed IO (PIO) and push/pull DMA. PIO allows applications to + * create MMIO mappings for PCIe or StreamIO address space, such that + * the application can generate word-sized read or write transactions + * by issuing load or store instructions. Push and pull DMA are tuned + * for larger transactions; they use specialized hardware engines to + * transfer large blocks of data at line rate. + * + * @subsection trio__pio TRIO Programmed IO + * + * Programmed IO allows applications to create MMIO mappings for PCIe + * or StreamIO address space. The hardware PIO regions support access + * to PCIe configuration, IO, and memory space, but the gxio_trio API + * only supports memory space accesses. PIO regions are allocated + * with gxio_trio_alloc_pio_regions() and initialized via + * gxio_trio_init_pio_region(). Once a region is bound to a range of + * bus address via the initialization function, the application can + * use gxio_trio_map_pio_region() to create MMIO mappings from its VA + * space onto the range of bus addresses supported by the PIO region. + * + * @subsection trio_dma TRIO Push and Pull DMA + * + * The TRIO push and pull DMA engines allow users to copy blocks of + * data between application memory and the bus. Push DMA generates + * write packets that copy from application memory to the bus and pull + * DMA generates read packets that copy from the bus into application + * memory. The DMA engines are managed via an API that is very + * similar to the mPIPE eDMA interface. For a detailed explanation of + * the eDMA queue API, see @ref gxio_mpipe_wrappers. + * + * Push and pull DMA queues are allocated via + * gxio_trio_alloc_push_dma_ring() / gxio_trio_alloc_pull_dma_ring(). + * Once allocated, users generally use a ::gxio_trio_dma_queue_t + * object to manage the queue, providing easy wrappers for reserving + * command slots in the DMA command ring, filling those slots, and + * waiting for commands to complete. DMA queues can be initialized + * via gxio_trio_init_push_dma_queue() or + * gxio_trio_init_pull_dma_queue(). + * + * See @ref trio/push_dma/app.c for an example of how to use push DMA. + * + * @section trio_shortcomings Plans for Future API Revisions + * + * The simulation framework is incomplete. Future features include: + * + * - Support for reset and deallocation of resources. + * + * - Support for pull DMA. + * + * - Support for interrupt regions and user-space interrupt delivery. + * + * - Support for getting BAR mappings and reserving regions of BAR + * address space. + */ +#ifndef _GXIO_TRIO_H_ +#define _GXIO_TRIO_H_ + +#include <linux/types.h> + +#include <gxio/common.h> +#include <gxio/dma_queue.h> + +#include <arch/trio_constants.h> +#include <arch/trio.h> +#include <arch/trio_pcie_intfc.h> +#include <arch/trio_pcie_rc.h> +#include <arch/trio_shm.h> +#include <hv/drv_trio_intf.h> +#include <hv/iorpc.h> + +/* A context object used to manage TRIO hardware resources. */ +typedef struct { + + /* File descriptor for calling up to Linux (and thus the HV). */ + int fd; + + /* The VA at which the MAC MMIO registers are mapped. */ + char *mmio_base_mac; + + /* The VA at which the PIO config space are mapped for each PCIe MAC. + Gx36 has max 3 PCIe MACs per TRIO shim. */ + char *mmio_base_pio_cfg[TILEGX_TRIO_PCIES]; + +#ifdef USE_SHARED_PCIE_CONFIG_REGION + /* Index of the shared PIO region for PCI config access. */ + int pio_cfg_index; +#else + /* Index of the PIO region for PCI config access per MAC. */ + int pio_cfg_index[TILEGX_TRIO_PCIES]; +#endif + + /* The VA at which the push DMA MMIO registers are mapped. */ + char *mmio_push_dma[TRIO_NUM_PUSH_DMA_RINGS]; + + /* The VA at which the pull DMA MMIO registers are mapped. */ + char *mmio_pull_dma[TRIO_NUM_PUSH_DMA_RINGS]; + + /* Application space ID. */ + unsigned int asid; + +} gxio_trio_context_t; + +/* Command descriptor for push or pull DMA. */ +typedef TRIO_DMA_DESC_t gxio_trio_dma_desc_t; + +/* A convenient, thread-safe interface to an eDMA ring. */ +typedef struct { + + /* State object for tracking head and tail pointers. */ + __gxio_dma_queue_t dma_queue; + + /* The ring entries. */ + gxio_trio_dma_desc_t *dma_descs; + + /* The number of entries minus one. */ + unsigned long mask_num_entries; + + /* The log2() of the number of entries. */ + unsigned int log2_num_entries; + +} gxio_trio_dma_queue_t; + +/* Initialize a TRIO context. + * + * This function allocates a TRIO "service domain" and maps the MMIO + * registers into the the caller's VA space. + * + * @param trio_index Which TRIO shim; Gx36 must pass 0. + * @param context Context object to be initialized. + */ +extern int gxio_trio_init(gxio_trio_context_t *context, + unsigned int trio_index); + +/* This indicates that an ASID hasn't been allocated. */ +#define GXIO_ASID_NULL -1 + +/* Ordering modes for map memory regions and scatter queue regions. */ +typedef enum gxio_trio_order_mode_e { + /* Writes are not ordered. Reads always wait for previous writes. */ + GXIO_TRIO_ORDER_MODE_UNORDERED = + TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_UNORDERED, + /* Both writes and reads wait for previous transactions to complete. */ + GXIO_TRIO_ORDER_MODE_STRICT = + TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_STRICT, + /* Writes are ordered unless the incoming packet has the + relaxed-ordering attributes set. */ + GXIO_TRIO_ORDER_MODE_OBEY_PACKET = + TRIO_MAP_MEM_SETUP__ORDER_MODE_VAL_REL_ORD +} gxio_trio_order_mode_t; + +/* Initialize a memory mapping region. + * + * @param context An initialized TRIO context. + * @param map A Memory map region allocated by gxio_trio_alloc_memory_map(). + * @param target_mem VA of backing memory, should be registered via + * gxio_trio_register_page() and aligned to 4kB. + * @param target_size Length of the memory mapping, must be a multiple + * of 4kB. + * @param asid ASID to be used for Tile-side address translation. + * @param mac MAC number. + * @param bus_address Bus address at which the mapping starts. + * @param order_mode Memory ordering mode for this mapping. + * @return Zero on success, else ::GXIO_TRIO_ERR_BAD_MEMORY_MAP, + * GXIO_TRIO_ERR_BAD_ASID, or ::GXIO_TRIO_ERR_BAD_BUS_RANGE. + */ +extern int gxio_trio_init_memory_map(gxio_trio_context_t *context, + unsigned int map, void *target_mem, + size_t target_size, unsigned int asid, + unsigned int mac, uint64_t bus_address, + gxio_trio_order_mode_t order_mode); + +/* Flags that can be passed to resource allocation functions. */ +enum gxio_trio_alloc_flags_e { + GXIO_TRIO_ALLOC_FIXED = HV_TRIO_ALLOC_FIXED, +}; + +/* Flags that can be passed to memory registration functions. */ +enum gxio_trio_mem_flags_e { + /* Do not fill L3 when writing, and invalidate lines upon egress. */ + GXIO_TRIO_MEM_FLAG_NT_HINT = IORPC_MEM_BUFFER_FLAG_NT_HINT, + + /* L3 cache fills should only populate IO cache ways. */ + GXIO_TRIO_MEM_FLAG_IO_PIN = IORPC_MEM_BUFFER_FLAG_IO_PIN, +}; + +/* Flag indicating a request generator uses a special traffic + class. */ +#define GXIO_TRIO_FLAG_TRAFFIC_CLASS(N) HV_TRIO_FLAG_TC(N) + +/* Flag indicating a request generator uses a virtual function + number. */ +#define GXIO_TRIO_FLAG_VFUNC(N) HV_TRIO_FLAG_VFUNC(N) + +/***************************************************************** + * Memory Registration * + ******************************************************************/ + +/* Allocate Application Space Identifiers (ASIDs). Each ASID can + * register up to 16 page translations. ASIDs are used by memory map + * regions, scatter queues, and DMA queues to translate application + * VAs into memory system PAs. + * + * @param context An initialized TRIO context. + * @param count Number of ASIDs required. + * @param first Index of first ASID if ::GXIO_TRIO_ALLOC_FIXED flag + * is set, otherwise ignored. + * @param flags Flag bits, including bits from ::gxio_trio_alloc_flags_e. + * @return Index of first ASID, or ::GXIO_TRIO_ERR_NO_ASID if allocation + * failed. + */ +extern int gxio_trio_alloc_asids(gxio_trio_context_t *context, + unsigned int count, unsigned int first, + unsigned int flags); + +#endif /* ! _GXIO_TRIO_H_ */ diff --git a/arch/tile/include/gxio/uart.h b/arch/tile/include/gxio/uart.h new file mode 100644 index 00000000000..438ee7e46c7 --- /dev/null +++ b/arch/tile/include/gxio/uart.h @@ -0,0 +1,105 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _GXIO_UART_H_ +#define _GXIO_UART_H_ + +#include "common.h" + +#include <hv/drv_uart_intf.h> +#include <hv/iorpc.h> + +/* + * + * An API for manipulating UART interface. + */ + +/* + * + * The Rshim allows access to the processor's UART interface. + */ + +/* A context object used to manage UART resources. */ +typedef struct { + + /* File descriptor for calling up to the hypervisor. */ + int fd; + + /* The VA at which our MMIO registers are mapped. */ + char *mmio_base; + +} gxio_uart_context_t; + +/* Request UART interrupts. + * + * Request that interrupts be delivered to a tile when the UART's + * Receive FIFO is written, or the Write FIFO is read. + * + * @param context Pointer to a properly initialized gxio_uart_context_t. + * @param bind_cpu_x X coordinate of CPU to which interrupt will be delivered. + * @param bind_cpu_y Y coordinate of CPU to which interrupt will be delivered. + * @param bind_interrupt IPI interrupt number. + * @param bind_event Sub-interrupt event bit number; a negative value can + * disable the interrupt. + * @return Zero if all of the requested UART events were successfully + * configured to interrupt. + */ +extern int gxio_uart_cfg_interrupt(gxio_uart_context_t *context, + int bind_cpu_x, + int bind_cpu_y, + int bind_interrupt, int bind_event); + +/* Initialize a UART context. + * + * A properly initialized context must be obtained before any of the other + * gxio_uart routines may be used. + * + * @param context Pointer to a gxio_uart_context_t, which will be initialized + * by this routine, if it succeeds. + * @param uart_index Index of the UART to use. + * @return Zero if the context was successfully initialized, else a + * GXIO_ERR_xxx error code. + */ +extern int gxio_uart_init(gxio_uart_context_t *context, int uart_index); + +/* Destroy a UART context. + * + * Once destroyed, a context may not be used with any gxio_uart routines + * other than gxio_uart_init(). After this routine returns, no further + * interrupts requested on this context will be delivered. The state and + * configuration of the pins which had been attached to this context are + * unchanged by this operation. + * + * @param context Pointer to a gxio_uart_context_t. + * @return Zero if the context was successfully destroyed, else a + * GXIO_ERR_xxx error code. + */ +extern int gxio_uart_destroy(gxio_uart_context_t *context); + +/* Write UART register. + * @param context Pointer to a gxio_uart_context_t. + * @param offset UART register offset. + * @param word Data will be wrote to UART reigister. + */ +extern void gxio_uart_write(gxio_uart_context_t *context, uint64_t offset, + uint64_t word); + +/* Read UART register. + * @param context Pointer to a gxio_uart_context_t. + * @param offset UART register offset. + * @return Data read from UART register. + */ +extern uint64_t gxio_uart_read(gxio_uart_context_t *context, uint64_t offset); + +#endif /* _GXIO_UART_H_ */ diff --git a/arch/tile/include/gxio/usb_host.h b/arch/tile/include/gxio/usb_host.h new file mode 100644 index 00000000000..93c9636d2dd --- /dev/null +++ b/arch/tile/include/gxio/usb_host.h @@ -0,0 +1,87 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ +#ifndef _GXIO_USB_H_ +#define _GXIO_USB_H_ + +#include <gxio/common.h> + +#include <hv/drv_usb_host_intf.h> +#include <hv/iorpc.h> + +/* + * + * An API for manipulating general-purpose I/O pins. + */ + +/* + * + * The USB shim allows access to the processor's Universal Serial Bus + * connections. + */ + +/* A context object used to manage USB hardware resources. */ +typedef struct { + + /* File descriptor for calling up to the hypervisor. */ + int fd; + + /* The VA at which our MMIO registers are mapped. */ + char *mmio_base; +} gxio_usb_host_context_t; + +/* Initialize a USB context. + * + * A properly initialized context must be obtained before any of the other + * gxio_usb_host routines may be used. + * + * @param context Pointer to a gxio_usb_host_context_t, which will be + * initialized by this routine, if it succeeds. + * @param usb_index Index of the USB shim to use. + * @param is_ehci Nonzero to use the EHCI interface; zero to use the OHCI + * intereface. + * @return Zero if the context was successfully initialized, else a + * GXIO_ERR_xxx error code. + */ +extern int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index, + int is_ehci); + +/* Destroy a USB context. + * + * Once destroyed, a context may not be used with any gxio_usb_host routines + * other than gxio_usb_host_init(). After this routine returns, no further + * interrupts or signals requested on this context will be delivered. The + * state and configuration of the pins which had been attached to this + * context are unchanged by this operation. + * + * @param context Pointer to a gxio_usb_host_context_t. + * @return Zero if the context was successfully destroyed, else a + * GXIO_ERR_xxx error code. + */ +extern int gxio_usb_host_destroy(gxio_usb_host_context_t *context); + +/* Retrieve the address of the shim's MMIO registers. + * + * @param context Pointer to a properly initialized gxio_usb_host_context_t. + * @return The address of the shim's MMIO registers. + */ +extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context); + +/* Retrieve the length of the shim's MMIO registers. + * + * @param context Pointer to a properly initialized gxio_usb_host_context_t. + * @return The length of the shim's MMIO registers. + */ +extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context); + +#endif /* _GXIO_USB_H_ */ diff --git a/arch/tile/include/hv/drv_mpipe_intf.h b/arch/tile/include/hv/drv_mpipe_intf.h new file mode 100644 index 00000000000..c97e416dd96 --- /dev/null +++ b/arch/tile/include/hv/drv_mpipe_intf.h @@ -0,0 +1,605 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the mpipe driver. + */ + +#ifndef _SYS_HV_DRV_MPIPE_INTF_H +#define _SYS_HV_DRV_MPIPE_INTF_H + +#include <arch/mpipe.h> +#include <arch/mpipe_constants.h> + + +/** Number of mPIPE instances supported */ +#define HV_MPIPE_INSTANCE_MAX (2) + +/** Number of buffer stacks (32). */ +#define HV_MPIPE_NUM_BUFFER_STACKS \ + (MPIPE_MMIO_INIT_DAT_GX36_1__BUFFER_STACK_MASK_WIDTH) + +/** Number of NotifRings (256). */ +#define HV_MPIPE_NUM_NOTIF_RINGS (MPIPE_NUM_NOTIF_RINGS) + +/** Number of NotifGroups (32). */ +#define HV_MPIPE_NUM_NOTIF_GROUPS (MPIPE_NUM_NOTIF_GROUPS) + +/** Number of buckets (4160). */ +#define HV_MPIPE_NUM_BUCKETS (MPIPE_NUM_BUCKETS) + +/** Number of "lo" buckets (4096). */ +#define HV_MPIPE_NUM_LO_BUCKETS 4096 + +/** Number of "hi" buckets (64). */ +#define HV_MPIPE_NUM_HI_BUCKETS \ + (HV_MPIPE_NUM_BUCKETS - HV_MPIPE_NUM_LO_BUCKETS) + +/** Number of edma rings (24). */ +#define HV_MPIPE_NUM_EDMA_RINGS \ + (MPIPE_MMIO_INIT_DAT_GX36_1__EDMA_POST_MASK_WIDTH) + + + + +/** A flag bit indicating a fixed resource allocation. */ +#define HV_MPIPE_ALLOC_FIXED 0x01 + +/** Offset for the config register MMIO region. */ +#define HV_MPIPE_CONFIG_MMIO_OFFSET \ + (MPIPE_MMIO_ADDR__REGION_VAL_CFG << MPIPE_MMIO_ADDR__REGION_SHIFT) + +/** Size of the config register MMIO region. */ +#define HV_MPIPE_CONFIG_MMIO_SIZE (64 * 1024) + +/** Offset for the config register MMIO region. */ +#define HV_MPIPE_FAST_MMIO_OFFSET \ + (MPIPE_MMIO_ADDR__REGION_VAL_IDMA << MPIPE_MMIO_ADDR__REGION_SHIFT) + +/** Size of the fast register MMIO region (IDMA, EDMA, buffer stack). */ +#define HV_MPIPE_FAST_MMIO_SIZE \ + ((MPIPE_MMIO_ADDR__REGION_VAL_BSM + 1 - MPIPE_MMIO_ADDR__REGION_VAL_IDMA) \ + << MPIPE_MMIO_ADDR__REGION_SHIFT) + + +/* + * Each type of resource allocation comes in quantized chunks, where + * XXX_BITS is the number of chunks, and XXX_RES_PER_BIT is the number + * of resources in each chunk. + */ + +/** Number of buffer stack chunks available (32). */ +#define HV_MPIPE_ALLOC_BUFFER_STACKS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_1__BUFFER_STACK_MASK_WIDTH + +/** Granularity of buffer stack allocation (1). */ +#define HV_MPIPE_ALLOC_BUFFER_STACKS_RES_PER_BIT \ + (HV_MPIPE_NUM_BUFFER_STACKS / HV_MPIPE_ALLOC_BUFFER_STACKS_BITS) + +/** Number of NotifRing chunks available (32). */ +#define HV_MPIPE_ALLOC_NOTIF_RINGS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_0__NOTIF_RING_MASK_WIDTH + +/** Granularity of NotifRing allocation (8). */ +#define HV_MPIPE_ALLOC_NOTIF_RINGS_RES_PER_BIT \ + (HV_MPIPE_NUM_NOTIF_RINGS / HV_MPIPE_ALLOC_NOTIF_RINGS_BITS) + +/** Number of NotifGroup chunks available (32). */ +#define HV_MPIPE_ALLOC_NOTIF_GROUPS_BITS \ + HV_MPIPE_NUM_NOTIF_GROUPS + +/** Granularity of NotifGroup allocation (1). */ +#define HV_MPIPE_ALLOC_NOTIF_GROUPS_RES_PER_BIT \ + (HV_MPIPE_NUM_NOTIF_GROUPS / HV_MPIPE_ALLOC_NOTIF_GROUPS_BITS) + +/** Number of lo bucket chunks available (16). */ +#define HV_MPIPE_ALLOC_LO_BUCKETS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_0__BUCKET_RELEASE_MASK_LO_WIDTH + +/** Granularity of lo bucket allocation (256). */ +#define HV_MPIPE_ALLOC_LO_BUCKETS_RES_PER_BIT \ + (HV_MPIPE_NUM_LO_BUCKETS / HV_MPIPE_ALLOC_LO_BUCKETS_BITS) + +/** Number of hi bucket chunks available (16). */ +#define HV_MPIPE_ALLOC_HI_BUCKETS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_0__BUCKET_RELEASE_MASK_HI_WIDTH + +/** Granularity of hi bucket allocation (4). */ +#define HV_MPIPE_ALLOC_HI_BUCKETS_RES_PER_BIT \ + (HV_MPIPE_NUM_HI_BUCKETS / HV_MPIPE_ALLOC_HI_BUCKETS_BITS) + +/** Number of eDMA ring chunks available (24). */ +#define HV_MPIPE_ALLOC_EDMA_RINGS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_1__EDMA_POST_MASK_WIDTH + +/** Granularity of eDMA ring allocation (1). */ +#define HV_MPIPE_ALLOC_EDMA_RINGS_RES_PER_BIT \ + (HV_MPIPE_NUM_EDMA_RINGS / HV_MPIPE_ALLOC_EDMA_RINGS_BITS) + + + + +/** Bit vector encoding which NotifRings are in a NotifGroup. */ +typedef struct +{ + /** The actual bits. */ + uint64_t ring_mask[4]; + +} gxio_mpipe_notif_group_bits_t; + + +/** Another name for MPIPE_LBL_INIT_DAT_BSTS_TBL_t. */ +typedef MPIPE_LBL_INIT_DAT_BSTS_TBL_t gxio_mpipe_bucket_info_t; + + + +/** Eight buffer stack ids. */ +typedef struct +{ + /** The stacks. */ + uint8_t stacks[8]; + +} gxio_mpipe_rules_stacks_t; + + +/** A destination mac address. */ +typedef struct +{ + /** The octets. */ + uint8_t octets[6]; + +} gxio_mpipe_rules_dmac_t; + + +/** A vlan. */ +typedef uint16_t gxio_mpipe_rules_vlan_t; + + + +/** Maximum number of characters in a link name. */ +#define GXIO_MPIPE_LINK_NAME_LEN 32 + + +/** Structure holding a link name. Only needed, and only typedef'ed, + * because the IORPC stub generator only handles types which are single + * words coming before the parameter name. */ +typedef struct +{ + /** The name itself. */ + char name[GXIO_MPIPE_LINK_NAME_LEN]; +} +_gxio_mpipe_link_name_t; + +/** Maximum number of characters in a symbol name. */ +#define GXIO_MPIPE_SYMBOL_NAME_LEN 128 + + +/** Structure holding a symbol name. Only needed, and only typedef'ed, + * because the IORPC stub generator only handles types which are single + * words coming before the parameter name. */ +typedef struct +{ + /** The name itself. */ + char name[GXIO_MPIPE_SYMBOL_NAME_LEN]; +} +_gxio_mpipe_symbol_name_t; + + +/** Structure holding a MAC address. */ +typedef struct +{ + /** The address. */ + uint8_t mac[6]; +} +_gxio_mpipe_link_mac_t; + + + +/** Request shared data permission -- that is, the ability to send and + * receive packets -- on the specified link. Other processes may also + * request shared data permission on the same link. + * + * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA, + * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed. + */ +#define GXIO_MPIPE_LINK_DATA 0x00000001UL + +/** Do not request data permission on the specified link. + * + * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA, + * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed. + */ +#define GXIO_MPIPE_LINK_NO_DATA 0x00000002UL + +/** Request exclusive data permission -- that is, the ability to send and + * receive packets -- on the specified link. No other processes may + * request data permission on this link, and if any process already has + * data permission on it, this open will fail. + * + * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA, + * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed. + */ +#define GXIO_MPIPE_LINK_EXCL_DATA 0x00000004UL + +/** Request shared stats permission -- that is, the ability to read and write + * registers which contain link statistics, and to get link attributes -- + * on the specified link. Other processes may also request shared stats + * permission on the same link. + * + * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS, + * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed. + */ +#define GXIO_MPIPE_LINK_STATS 0x00000008UL + +/** Do not request stats permission on the specified link. + * + * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS, + * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed. + */ +#define GXIO_MPIPE_LINK_NO_STATS 0x00000010UL + +/** Request exclusive stats permission -- that is, the ability to read and + * write registers which contain link statistics, and to get link + * attributes -- on the specified link. No other processes may request + * stats permission on this link, and if any process already + * has stats permission on it, this open will fail. + * + * Requesting exclusive stats permission is normally a very bad idea, since + * it prevents programs like mpipe-stat from providing information on this + * link. Applications should only do this if they use MAC statistics + * registers, and cannot tolerate any of the clear-on-read registers being + * reset by other statistics programs. + * + * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS, + * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed. + */ +#define GXIO_MPIPE_LINK_EXCL_STATS 0x00000020UL + +/** Request shared control permission -- that is, the ability to modify link + * attributes, and read and write MAC and MDIO registers -- on the + * specified link. Other processes may also request shared control + * permission on the same link. + * + * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL, + * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed. + */ +#define GXIO_MPIPE_LINK_CTL 0x00000040UL + +/** Do not request control permission on the specified link. + * + * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL, + * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed. + */ +#define GXIO_MPIPE_LINK_NO_CTL 0x00000080UL + +/** Request exclusive control permission -- that is, the ability to modify + * link attributes, and read and write MAC and MDIO registers -- on the + * specified link. No other processes may request control permission on + * this link, and if any process already has control permission on it, + * this open will fail. + * + * Requesting exclusive control permission is not always a good idea, since + * it prevents programs like mpipe-link from configuring the link. + * + * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL, + * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed. + */ +#define GXIO_MPIPE_LINK_EXCL_CTL 0x00000100UL + +/** Set the desired state of the link to up, allowing any speeds which are + * supported by the link hardware, as part of this open operation; do not + * change the desired state of the link when it is closed or the process + * exits. No more than one of ::GXIO_MPIPE_LINK_AUTO_UP, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN, ::GXIO_MPIPE_LINK_AUTO_DOWN, or + * ::GXIO_MPIPE_LINK_AUTO_NONE may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_UP 0x00000200UL + +/** Set the desired state of the link to up, allowing any speeds which are + * supported by the link hardware, as part of this open operation; when the + * link is closed or this process exits, if no other process has the link + * open, set the desired state of the link to down. No more than one of + * ::GXIO_MPIPE_LINK_AUTO_UP, ::GXIO_MPIPE_LINK_AUTO_UPDOWN, + * ::GXIO_MPIPE_LINK_AUTO_DOWN, or ::GXIO_MPIPE_LINK_AUTO_NONE may be + * specifed in a gxio_mpipe_link_open() call. If none are specified, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_UPDOWN 0x00000400UL + +/** Do not change the desired state of the link as part of the open + * operation; when the link is closed or this process exits, if no other + * process has the link open, set the desired state of the link to down. + * No more than one of ::GXIO_MPIPE_LINK_AUTO_UP, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN, ::GXIO_MPIPE_LINK_AUTO_DOWN, or + * ::GXIO_MPIPE_LINK_AUTO_NONE may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_DOWN 0x00000800UL + +/** Do not change the desired state of the link as part of the open + * operation; do not change the desired state of the link when it is + * closed or the process exits. No more than one of + * ::GXIO_MPIPE_LINK_AUTO_UP, ::GXIO_MPIPE_LINK_AUTO_UPDOWN, + * ::GXIO_MPIPE_LINK_AUTO_DOWN, or ::GXIO_MPIPE_LINK_AUTO_NONE may be + * specifed in a gxio_mpipe_link_open() call. If none are specified, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_NONE 0x00001000UL + +/** Request that this open call not complete until the network link is up. + * The process will wait as long as necessary for this to happen; + * applications which wish to abandon waiting for the link after a + * specific time period should not specify this flag when opening a link, + * but should instead call gxio_mpipe_link_wait() afterward. The link + * must be opened with stats permission. Note that this flag by itself + * does not change the desired link state; if other open flags or previous + * link state changes have not requested a desired state of up, the open + * call will never complete. This flag is not available to kernel + * clients. + */ +#define GXIO_MPIPE_LINK_WAIT 0x00002000UL + + +/* + * Note: link attributes must fit in 24 bits, since we use the top 8 bits + * of the IORPC offset word for the channel number. + */ + +/** Determine whether jumbo frames may be received. If this attribute's + * value value is nonzero, the MAC will accept frames of up to 10240 bytes. + * If the value is zero, the MAC will only accept frames of up to 1544 + * bytes. The default value is zero. */ +#define GXIO_MPIPE_LINK_RECEIVE_JUMBO 0x010000 + +/** Determine whether to send pause frames on this link if the mPIPE packet + * FIFO is nearly full. If the value is zero, pause frames are not sent. + * If the value is nonzero, it is the delay value which will be sent in any + * pause frames which are output, in units of 512 bit times. + * + * Bear in mind that in almost all circumstances, the mPIPE packet FIFO + * will never fill up, since mPIPE will empty it as fast as or faster than + * the incoming data rate, by either delivering or dropping packets. The + * only situation in which this is not true is if the memory and cache + * subsystem is extremely heavily loaded, and mPIPE cannot perform DMA of + * packet data to memory in a timely fashion. In particular, pause frames + * will <em>not</em> be sent if packets cannot be delivered because + * NotifRings are full, buckets are full, or buffers are not available in + * a buffer stack. */ +#define GXIO_MPIPE_LINK_SEND_PAUSE 0x020000 + +/** Determine whether to suspend output on the receipt of pause frames. + * If the value is nonzero, mPIPE shim will suspend output on the link's + * channel when a pause frame is received. If the value is zero, pause + * frames will be ignored. The default value is zero. */ +#define GXIO_MPIPE_LINK_RECEIVE_PAUSE 0x030000 + +/** Interface MAC address. The value is a 6-byte MAC address, in the least + * significant 48 bits of the value; in other words, an address which would + * be printed as '12:34:56:78:90:AB' in IEEE 802 canonical format would + * be returned as 0x12345678ab. + * + * Depending upon the overall system design, a MAC address may or may not + * be available for each interface. Note that the interface's MAC address + * does not limit the packets received on its channel, although the + * classifier's rules could be configured to do that. Similarly, the MAC + * address is not used when transmitting packets, although applications + * could certainly decide to use the assigned address as a source MAC + * address when doing so. This attribute may only be retrieved with + * gxio_mpipe_link_get_attr(); it may not be modified. + */ +#define GXIO_MPIPE_LINK_MAC 0x040000 + +/** Determine whether to discard egress packets on link down. If this value + * is nonzero, packets sent on this link while the link is down will be + * discarded. If this value is zero, no packets will be sent on this link + * while it is down. The default value is one. */ +#define GXIO_MPIPE_LINK_DISCARD_IF_DOWN 0x050000 + +/** Possible link state. The value is a combination of link state flags, + * ORed together, that indicate link modes which are actually supported by + * the hardware. This attribute may only be retrieved with + * gxio_mpipe_link_get_attr(); it may not be modified. */ +#define GXIO_MPIPE_LINK_POSSIBLE_STATE 0x060000 + +/** Current link state. The value is a combination of link state flags, + * ORed together, that indicate the current state of the hardware. If the + * link is down, the value ANDed with ::GXIO_MPIPE_LINK_SPEED will be zero; + * if the link is up, the value ANDed with ::GXIO_MPIPE_LINK_SPEED will + * result in exactly one of the speed values, indicating the current speed. + * This attribute may only be retrieved with gxio_mpipe_link_get_attr(); it + * may not be modified. */ +#define GXIO_MPIPE_LINK_CURRENT_STATE 0x070000 + +/** Desired link state. The value is a conbination of flags, which specify + * the desired state for the link. With gxio_mpipe_link_set_attr(), this + * will, in the background, attempt to bring up the link using whichever of + * the requested flags are reasonable, or take down the link if the flags + * are zero. The actual link up or down operation may happen after this + * call completes. If the link state changes in the future, the system + * will continue to try to get back to the desired link state; for + * instance, if the link is brought up successfully, and then the network + * cable is disconnected, the link will go down. However, the desired + * state of the link is still up, so if the cable is reconnected, the link + * will be brought up again. + * + * With gxio_mpipe_link_set_attr(), this will indicate the desired state + * for the link, as set with a previous gxio_mpipe_link_set_attr() call, + * or implicitly by a gxio_mpipe_link_open() or link close operation. + * This may not reflect the current state of the link; to get that, use + * ::GXIO_MPIPE_LINK_CURRENT_STATE. + */ +#define GXIO_MPIPE_LINK_DESIRED_STATE 0x080000 + + + +/** Link can run, should run, or is running at 10 Mbps. */ +#define GXIO_MPIPE_LINK_10M 0x0000000000000001UL + +/** Link can run, should run, or is running at 100 Mbps. */ +#define GXIO_MPIPE_LINK_100M 0x0000000000000002UL + +/** Link can run, should run, or is running at 1 Gbps. */ +#define GXIO_MPIPE_LINK_1G 0x0000000000000004UL + +/** Link can run, should run, or is running at 10 Gbps. */ +#define GXIO_MPIPE_LINK_10G 0x0000000000000008UL + +/** Link can run, should run, or is running at 20 Gbps. */ +#define GXIO_MPIPE_LINK_20G 0x0000000000000010UL + +/** Link can run, should run, or is running at 25 Gbps. */ +#define GXIO_MPIPE_LINK_25G 0x0000000000000020UL + +/** Link can run, should run, or is running at 50 Gbps. */ +#define GXIO_MPIPE_LINK_50G 0x0000000000000040UL + +/** Link should run at the highest speed supported by the link and by + * the device connected to the link. Only usable as a value for + * the link's desired state; never returned as a value for the current + * or possible states. */ +#define GXIO_MPIPE_LINK_ANYSPEED 0x0000000000000800UL + +/** All legal link speeds. This value is provided for use in extracting + * the speed-related subset of the link state flags; it is not intended + * to be set directly as a value for one of the GXIO_MPIPE_LINK_xxx_STATE + * attributes. A link is up or is requested to be up if its current or + * desired state, respectively, ANDED with this value, is nonzero. */ +#define GXIO_MPIPE_LINK_SPEED_MASK 0x0000000000000FFFUL + +/** Link can run, should run, or is running in MAC loopback mode. This + * loops transmitted packets back to the receiver, inside the Tile + * Processor. */ +#define GXIO_MPIPE_LINK_LOOP_MAC 0x0000000000001000UL + +/** Link can run, should run, or is running in PHY loopback mode. This + * loops transmitted packets back to the receiver, inside the external + * PHY chip. */ +#define GXIO_MPIPE_LINK_LOOP_PHY 0x0000000000002000UL + +/** Link can run, should run, or is running in external loopback mode. + * This requires that an external loopback plug be installed on the + * Ethernet port. Note that only some links require that this be + * configured via the gxio_mpipe_link routines; other links can do + * external loopack with the plug and no special configuration. */ +#define GXIO_MPIPE_LINK_LOOP_EXT 0x0000000000004000UL + +/** All legal loopback types. */ +#define GXIO_MPIPE_LINK_LOOP_MASK 0x000000000000F000UL + +/** Link can run, should run, or is running in full-duplex mode. + * If neither ::GXIO_MPIPE_LINK_FDX nor ::GXIO_MPIPE_LINK_HDX are + * specified in a set of desired state flags, both are assumed. */ +#define GXIO_MPIPE_LINK_FDX 0x0000000000010000UL + +/** Link can run, should run, or is running in half-duplex mode. + * If neither ::GXIO_MPIPE_LINK_FDX nor ::GXIO_MPIPE_LINK_HDX are + * specified in a set of desired state flags, both are assumed. */ +#define GXIO_MPIPE_LINK_HDX 0x0000000000020000UL + + +/** An individual rule. */ +typedef struct +{ + /** The total size. */ + uint16_t size; + + /** The priority. */ + int16_t priority; + + /** The "headroom" in each buffer. */ + uint8_t headroom; + + /** The "tailroom" in each buffer. */ + uint8_t tailroom; + + /** The "capacity" of the largest buffer. */ + uint16_t capacity; + + /** The mask for converting a flow hash into a bucket. */ + uint16_t bucket_mask; + + /** The offset for converting a flow hash into a bucket. */ + uint16_t bucket_first; + + /** The buffer stack ids. */ + gxio_mpipe_rules_stacks_t stacks; + + /** The actual channels. */ + uint32_t channel_bits; + + /** The number of dmacs. */ + uint16_t num_dmacs; + + /** The number of vlans. */ + uint16_t num_vlans; + + /** The actual dmacs and vlans. */ + uint8_t dmacs_and_vlans[]; + +} gxio_mpipe_rules_rule_t; + + +/** A list of classifier rules. */ +typedef struct +{ + /** The offset to the end of the current rule. */ + uint16_t tail; + + /** The offset to the start of the current rule. */ + uint16_t head; + + /** The actual rules. */ + uint8_t rules[4096 - 4]; + +} gxio_mpipe_rules_list_t; + + + + +/** mPIPE statistics structure. These counters include all relevant + * events occurring on all links within the mPIPE shim. */ +typedef struct +{ + /** Number of ingress packets dropped for any reason. */ + uint64_t ingress_drops; + /** Number of ingress packets dropped because a buffer stack was empty. */ + uint64_t ingress_drops_no_buf; + /** Number of ingress packets dropped or truncated due to lack of space in + * the iPkt buffer. */ + uint64_t ingress_drops_ipkt; + /** Number of ingress packets dropped by the classifier or load balancer */ + uint64_t ingress_drops_cls_lb; + /** Total number of ingress packets. */ + uint64_t ingress_packets; + /** Total number of egress packets. */ + uint64_t egress_packets; + /** Total number of ingress bytes. */ + uint64_t ingress_bytes; + /** Total number of egress bytes. */ + uint64_t egress_bytes; +} +gxio_mpipe_stats_t; + + +#endif /* _SYS_HV_DRV_MPIPE_INTF_H */ diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h new file mode 100644 index 00000000000..6395faa6d9e --- /dev/null +++ b/arch/tile/include/hv/drv_srom_intf.h @@ -0,0 +1,41 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_srom_intf.h + * Interface definitions for the SPI Flash ROM driver. + */ + +#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H +#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H + +/** Read this offset to get the total device size. */ +#define SROM_TOTAL_SIZE_OFF 0xF0000000 + +/** Read this offset to get the device sector size. */ +#define SROM_SECTOR_SIZE_OFF 0xF0000004 + +/** Read this offset to get the device page size. */ +#define SROM_PAGE_SIZE_OFF 0xF0000008 + +/** Write this offset to flush any pending writes. */ +#define SROM_FLUSH_OFF 0xF1000000 + +/** Write this offset, plus the byte offset of the start of a sector, to + * erase a sector. Any write data is ignored, but there must be at least + * one byte of write data. Only applies when the driver is in MTD mode. + */ +#define SROM_ERASE_OFF 0xF2000000 + +#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */ diff --git a/arch/tile/include/hv/drv_trio_intf.h b/arch/tile/include/hv/drv_trio_intf.h new file mode 100644 index 00000000000..237e04dee66 --- /dev/null +++ b/arch/tile/include/hv/drv_trio_intf.h @@ -0,0 +1,199 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the trio driver. + */ + +#ifndef _SYS_HV_DRV_TRIO_INTF_H +#define _SYS_HV_DRV_TRIO_INTF_H + +#include <arch/trio.h> + +/** The vendor ID for all Tilera processors. */ +#define TILERA_VENDOR_ID 0x1a41 + +/** The device ID for the Gx36 processor. */ +#define TILERA_GX36_DEV_ID 0x0200 + +/** Device ID for our internal bridge when running as RC. */ +#define TILERA_GX36_RC_DEV_ID 0x2000 + +/** Maximum number of TRIO interfaces. */ +#define TILEGX_NUM_TRIO 2 + +/** Gx36 has max 3 PCIe MACs per TRIO interface. */ +#define TILEGX_TRIO_PCIES 3 + +/** Specify port properties for a PCIe MAC. */ +struct pcie_port_property +{ + /** If true, the link can be configured in PCIe root complex mode. */ + uint8_t allow_rc: 1; + + /** If true, the link can be configured in PCIe endpoint mode. */ + uint8_t allow_ep: 1; + + /** If true, the link can be configured in StreamIO mode. */ + uint8_t allow_sio: 1; + + /** If true, the link is allowed to support 1-lane operation. Software + * will not consider it an error if the link comes up as a x1 link. */ + uint8_t allow_x1: 1; + + /** If true, the link is allowed to support 2-lane operation. Software + * will not consider it an error if the link comes up as a x2 link. */ + uint8_t allow_x2: 1; + + /** If true, the link is allowed to support 4-lane operation. Software + * will not consider it an error if the link comes up as a x4 link. */ + uint8_t allow_x4: 1; + + /** If true, the link is allowed to support 8-lane operation. Software + * will not consider it an error if the link comes up as a x8 link. */ + uint8_t allow_x8: 1; + + /** If true, this link is connected to a device which may or may not + * be present. */ + uint8_t removable: 1; + +}; + +/** Configurations can be issued to configure a char stream interrupt. */ +typedef enum pcie_stream_intr_config_sel_e +{ + /** Interrupt configuration for memory map regions. */ + MEM_MAP_SEL, + + /** Interrupt configuration for push DMAs. */ + PUSH_DMA_SEL, + + /** Interrupt configuration for pull DMAs. */ + PULL_DMA_SEL, +} +pcie_stream_intr_config_sel_t; + + +/** The mmap file offset (PA) of the TRIO config region. */ +#define HV_TRIO_CONFIG_OFFSET \ + ((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_CFG << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) + +/** The maximum size of the TRIO config region. */ +#define HV_TRIO_CONFIG_SIZE \ + (1ULL << TRIO_CFG_REGION_ADDR__REGION_SHIFT) + +/** Size of the config region mapped into client. We can't use + * TRIO_MMIO_ADDRESS_SPACE__OFFSET_WIDTH because it + * will require the kernel to allocate 4GB VA space + * from the VMALLOC region which has a total range + * of 4GB. + */ +#define HV_TRIO_CONFIG_IOREMAP_SIZE \ + ((uint64_t) 1 << TRIO_CFG_REGION_ADDR__PROT_SHIFT) + +/** The mmap file offset (PA) of a scatter queue region. */ +#define HV_TRIO_SQ_OFFSET(queue) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_MAP_SQ << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((queue) << TRIO_MAP_SQ_REGION_ADDR__SQ_SEL_SHIFT)) + +/** The maximum size of a scatter queue region. */ +#define HV_TRIO_SQ_SIZE \ + (1ULL << TRIO_MAP_SQ_REGION_ADDR__SQ_SEL_SHIFT) + + +/** The "hardware MMIO region" of the first PIO region. */ +#define HV_TRIO_FIRST_PIO_REGION 8 + +/** The mmap file offset (PA) of a PIO region. */ +#define HV_TRIO_PIO_OFFSET(region) \ + (((unsigned long long)(region) + HV_TRIO_FIRST_PIO_REGION) \ + << TRIO_PIO_REGIONS_ADDR__REGION_SHIFT) + +/** The maximum size of a PIO region. */ +#define HV_TRIO_PIO_SIZE (1ULL << TRIO_PIO_REGIONS_ADDR__ADDR_WIDTH) + + +/** The mmap file offset (PA) of a push DMA region. */ +#define HV_TRIO_PUSH_DMA_OFFSET(ring) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_PUSH_DMA << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((ring) << TRIO_PUSH_DMA_REGION_ADDR__RING_SEL_SHIFT)) + +/** The mmap file offset (PA) of a pull DMA region. */ +#define HV_TRIO_PULL_DMA_OFFSET(ring) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_PULL_DMA << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((ring) << TRIO_PULL_DMA_REGION_ADDR__RING_SEL_SHIFT)) + +/** The maximum size of a DMA region. */ +#define HV_TRIO_DMA_REGION_SIZE \ + (1ULL << TRIO_PUSH_DMA_REGION_ADDR__RING_SEL_SHIFT) + + +/** The mmap file offset (PA) of a Mem-Map interrupt region. */ +#define HV_TRIO_MEM_MAP_INTR_OFFSET(map) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_MAP_MEM << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((map) << TRIO_MAP_MEM_REGION_ADDR__MAP_SEL_SHIFT)) + +/** The maximum size of a Mem-Map interrupt region. */ +#define HV_TRIO_MEM_MAP_INTR_SIZE \ + (1ULL << TRIO_MAP_MEM_REGION_ADDR__MAP_SEL_SHIFT) + + +/** A flag bit indicating a fixed resource allocation. */ +#define HV_TRIO_ALLOC_FIXED 0x01 + +/** TRIO requires that all mappings have 4kB aligned start addresses. */ +#define HV_TRIO_PAGE_SHIFT 12 + +/** TRIO requires that all mappings have 4kB aligned start addresses. */ +#define HV_TRIO_PAGE_SIZE (1ull << HV_TRIO_PAGE_SHIFT) + + +/* Specify all PCIe port properties for a TRIO. */ +struct pcie_trio_ports_property +{ + struct pcie_port_property ports[TILEGX_TRIO_PCIES]; + + /** Set if this TRIO belongs to a Gx72 device. */ + uint8_t is_gx72; +}; + +/* Flags indicating traffic class. */ +#define HV_TRIO_FLAG_TC_SHIFT 4 +#define HV_TRIO_FLAG_TC_RMASK 0xf +#define HV_TRIO_FLAG_TC(N) \ + ((((N) & HV_TRIO_FLAG_TC_RMASK) + 1) << HV_TRIO_FLAG_TC_SHIFT) + +/* Flags indicating virtual functions. */ +#define HV_TRIO_FLAG_VFUNC_SHIFT 8 +#define HV_TRIO_FLAG_VFUNC_RMASK 0xff +#define HV_TRIO_FLAG_VFUNC(N) \ + ((((N) & HV_TRIO_FLAG_VFUNC_RMASK) + 1) << HV_TRIO_FLAG_VFUNC_SHIFT) + + +/* Flag indicating an ordered PIO region. */ +#define HV_TRIO_PIO_FLAG_ORDERED (1 << 16) + +/* Flags indicating special types of PIO regions. */ +#define HV_TRIO_PIO_FLAG_SPACE_SHIFT 17 +#define HV_TRIO_PIO_FLAG_SPACE_MASK (0x3 << HV_TRIO_PIO_FLAG_SPACE_SHIFT) +#define HV_TRIO_PIO_FLAG_CONFIG_SPACE (0x1 << HV_TRIO_PIO_FLAG_SPACE_SHIFT) +#define HV_TRIO_PIO_FLAG_IO_SPACE (0x2 << HV_TRIO_PIO_FLAG_SPACE_SHIFT) + + +#endif /* _SYS_HV_DRV_TRIO_INTF_H */ diff --git a/arch/tile/include/hv/drv_uart_intf.h b/arch/tile/include/hv/drv_uart_intf.h new file mode 100644 index 00000000000..f5379e2404f --- /dev/null +++ b/arch/tile/include/hv/drv_uart_intf.h @@ -0,0 +1,33 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the UART driver. + */ + +#ifndef _SYS_HV_DRV_UART_INTF_H +#define _SYS_HV_DRV_UART_INTF_H + +#include <arch/uart.h> + +/** Number of UART ports supported. */ +#define TILEGX_UART_NR 2 + +/** The mmap file offset (PA) of the UART MMIO region. */ +#define HV_UART_MMIO_OFFSET 0 + +/** The maximum size of the UARTs MMIO region (64K Bytes). */ +#define HV_UART_MMIO_SIZE (1UL << 16) + +#endif /* _SYS_HV_DRV_UART_INTF_H */ diff --git a/arch/tile/include/hv/drv_usb_host_intf.h b/arch/tile/include/hv/drv_usb_host_intf.h new file mode 100644 index 00000000000..24ce774a3f1 --- /dev/null +++ b/arch/tile/include/hv/drv_usb_host_intf.h @@ -0,0 +1,39 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the USB host driver. + */ + +#ifndef _SYS_HV_DRV_USB_HOST_INTF_H +#define _SYS_HV_DRV_USB_HOST_INTF_H + +#include <arch/usb_host.h> + + +/** Offset for the EHCI register MMIO region. */ +#define HV_USB_HOST_MMIO_OFFSET_EHCI ((uint64_t) USB_HOST_HCCAPBASE_REG) + +/** Offset for the OHCI register MMIO region. */ +#define HV_USB_HOST_MMIO_OFFSET_OHCI ((uint64_t) USB_HOST_OHCD_HC_REVISION_REG) + +/** Size of the register MMIO region. This turns out to be the same for + * both EHCI and OHCI. */ +#define HV_USB_HOST_MMIO_SIZE ((uint64_t) 0x1000) + +/** The number of service domains supported by the USB host shim. */ +#define HV_USB_HOST_NUM_SVC_DOM 1 + + +#endif /* _SYS_HV_DRV_USB_HOST_INTF_H */ diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h index f13188ac281..2a20b266d94 100644 --- a/arch/tile/include/hv/drv_xgbe_intf.h +++ b/arch/tile/include/hv/drv_xgbe_intf.h @@ -460,7 +460,7 @@ typedef void* lepp_comp_t; * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for * our page size of exactly 65536. We add one for a "body" fragment. */ -#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1) +#define LEPP_MAX_FRAGS (65536 / HV_DEFAULT_PAGE_SIZE_SMALL + 2 + 1) /** Total number of bytes needed for an lepp_tso_cmd_t. */ #define LEPP_TSO_CMD_SIZE(num_frags, header_size) \ diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h index ee41bca4c8c..dfcdeb61ba3 100644 --- a/arch/tile/include/hv/hypervisor.h +++ b/arch/tile/include/hv/hypervisor.h @@ -17,13 +17,11 @@ * The hypervisor's public API. */ -#ifndef _TILE_HV_H -#define _TILE_HV_H +#ifndef _HV_HV_H +#define _HV_HV_H #include <arch/chip.h> -#include <hv/pagesize.h> - /* Linux builds want unsigned long constants, but assembler wants numbers */ #ifdef __ASSEMBLER__ /** One, for assembler */ @@ -44,15 +42,45 @@ */ #define HV_L1_SPAN (__HV_SIZE_ONE << HV_LOG2_L1_SPAN) -/** The size of small pages, in bytes. This value should be verified +/** The log2 of the initial size of small pages, in bytes. + * See HV_DEFAULT_PAGE_SIZE_SMALL. + */ +#define HV_LOG2_DEFAULT_PAGE_SIZE_SMALL 16 + +/** The initial size of small pages, in bytes. This value should be verified * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL). + * It may also be modified when installing a new context. */ -#define HV_PAGE_SIZE_SMALL (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_SMALL) +#define HV_DEFAULT_PAGE_SIZE_SMALL \ + (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_SMALL) -/** The size of large pages, in bytes. This value should be verified +/** The log2 of the initial size of large pages, in bytes. + * See HV_DEFAULT_PAGE_SIZE_LARGE. + */ +#define HV_LOG2_DEFAULT_PAGE_SIZE_LARGE 24 + +/** The initial size of large pages, in bytes. This value should be verified * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE). + * It may also be modified when installing a new context. + */ +#define HV_DEFAULT_PAGE_SIZE_LARGE \ + (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_LARGE) + +#if CHIP_VA_WIDTH() > 32 + +/** The log2 of the initial size of jumbo pages, in bytes. + * See HV_DEFAULT_PAGE_SIZE_JUMBO. + */ +#define HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO 32 + +/** The initial size of jumbo pages, in bytes. This value should + * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO). + * It may also be modified when installing a new context. */ -#define HV_PAGE_SIZE_LARGE (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_LARGE) +#define HV_DEFAULT_PAGE_SIZE_JUMBO \ + (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO) + +#endif /** The log2 of the granularity at which page tables must be aligned; * in other words, the CPA for a page table must have this many zero @@ -79,7 +107,22 @@ #define HV_DISPATCH_ENTRY_SIZE 32 /** Version of the hypervisor interface defined by this file */ -#define _HV_VERSION 11 +#define _HV_VERSION 13 + +/** Last version of the hypervisor interface with old hv_init() ABI. + * + * The change from version 12 to version 13 corresponds to launching + * the client by default at PL2 instead of PL1 (corresponding to the + * hv itself running at PL3 instead of PL2). To make this explicit, + * the hv_init() API was also extended so the client can report its + * desired PL, resulting in a more helpful failure diagnostic. If you + * call hv_init() with _HV_VERSION_OLD_HV_INIT and omit the client_pl + * argument, the hypervisor will assume client_pl = 1. + * + * Note that this is a deprecated solution and we do not expect to + * support clients of the Tilera hypervisor running at PL1 indefinitely. + */ +#define _HV_VERSION_OLD_HV_INIT 12 /* Index into hypervisor interface dispatch code blocks. * @@ -272,8 +315,14 @@ #define HV_DISPATCH_GET_IPI_PTE 56 #endif +/** hv_set_pte_super_shift */ +#define HV_DISPATCH_SET_PTE_SUPER_SHIFT 57 + +/** hv_console_set_ipi */ +#define HV_DISPATCH_CONSOLE_SET_IPI 63 + /** One more than the largest dispatch value */ -#define _HV_DISPATCH_END 57 +#define _HV_DISPATCH_END 64 #ifndef __ASSEMBLER__ @@ -346,7 +395,11 @@ typedef int HV_Errno; #ifndef __ASSEMBLER__ /** Pass HV_VERSION to hv_init to request this version of the interface. */ -typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber; +typedef enum { + HV_VERSION = _HV_VERSION, + HV_VERSION_OLD_HV_INIT = _HV_VERSION_OLD_HV_INIT, + +} HV_VersionNumber; /** Initializes the hypervisor. * @@ -354,9 +407,11 @@ typedef enum { HV_VERSION = _HV_VERSION } HV_VersionNumber; * that this program expects, typically HV_VERSION. * @param chip_num Architecture number of the chip the client was built for. * @param chip_rev_num Revision number of the chip the client was built for. + * @param client_pl Privilege level the client is built for + * (not required if interface_version_number == HV_VERSION_OLD_HV_INIT). */ void hv_init(HV_VersionNumber interface_version_number, - int chip_num, int chip_rev_num); + int chip_num, int chip_rev_num, int client_pl); /** Queries we can make for hv_sysconf(). @@ -393,7 +448,18 @@ typedef enum { * that the temperature has hit an upper limit and is no longer being * accurately tracked. */ - HV_SYSCONF_BOARD_TEMP = 6 + HV_SYSCONF_BOARD_TEMP = 6, + + /** Legal page size bitmask for hv_install_context(). + * For example, if 16KB and 64KB small pages are supported, + * it would return "HV_CTX_PG_SM_16K | HV_CTX_PG_SM_64K". + */ + HV_SYSCONF_VALID_PAGE_SIZES = 7, + + /** The size of jumbo pages, in bytes. + * If no jumbo pages are available, zero will be returned. + */ + HV_SYSCONF_PAGE_SIZE_JUMBO = 8, } HV_SysconfQuery; @@ -466,14 +532,36 @@ typedef enum { HV_CONFSTR_SWITCH_CONTROL = 14, /** Chip revision level. */ - HV_CONFSTR_CHIP_REV = 15 + HV_CONFSTR_CHIP_REV = 15, + + /** CPU module part number. */ + HV_CONFSTR_CPUMOD_PART_NUM = 16, + + /** CPU module serial number. */ + HV_CONFSTR_CPUMOD_SERIAL_NUM = 17, + + /** CPU module revision level. */ + HV_CONFSTR_CPUMOD_REV = 18, + + /** Human-readable CPU module description. */ + HV_CONFSTR_CPUMOD_DESC = 19, + + /** Per-tile hypervisor statistics. When this identifier is specified, + * the hv_confstr call takes two extra arguments. The first is the + * HV_XY_TO_LOTAR of the target tile's coordinates. The second is + * a flag word. The only current flag is the lowest bit, which means + * "zero out the stats instead of retrieving them"; in this case the + * buffer and buffer length are ignored. */ + HV_CONFSTR_HV_STATS = 20 } HV_ConfstrQuery; /** Query a configuration string from the hypervisor. * * @param query Identifier for the specific string to be retrieved - * (HV_CONFSTR_xxx). + * (HV_CONFSTR_xxx). Some strings may require or permit extra + * arguments to be appended which select specific objects to be + * described; see the string descriptions above. * @param buf Buffer in which to place the string. * @param len Length of the buffer. * @return If query is valid, then the length of the corresponding string, @@ -481,7 +569,7 @@ typedef enum { * was truncated. If query is invalid, HV_EINVAL. If the specified * buffer is not writable by the client, HV_EFAULT. */ -int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len); +int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len, ...); /** Tile coordinate */ typedef struct @@ -505,6 +593,30 @@ typedef struct */ int hv_get_ipi_pte(HV_Coord tile, int pl, HV_PTE* pte); +/** Configure the console interrupt. + * + * When the console client interrupt is enabled, the hypervisor will + * deliver the specified IPI to the client in the following situations: + * + * - The console has at least one character available for input. + * + * - The console can accept new characters for output, and the last call + * to hv_console_write() did not write all of the characters requested + * by the client. + * + * Note that in some system configurations, console interrupt will not + * be available; clients should be prepared for this routine to fail and + * to fall back to periodic console polling in that case. + * + * @param ipi Index of the IPI register which will receive the interrupt. + * @param event IPI event number for console interrupt. If less than 0, + * disable the console IPI interrupt. + * @param coord Tile to be targeted for console interrupt. + * @return 0 on success, otherwise, HV_EINVAL if illegal parameter, + * HV_ENOTSUP if console interrupt are not available. + */ +int hv_console_set_ipi(int ipi, int event, HV_Coord coord); + #else /* !CHIP_HAS_IPI() */ /** A set of interrupts. */ @@ -641,6 +753,12 @@ void hv_set_rtc(HV_RTCTime time); * new page table does not need to contain any mapping for the * hv_install_context address itself. * + * At most one HV_CTX_PG_SM_* flag may be specified in "flags"; + * if multiple flags are specified, HV_EINVAL is returned. + * Specifying none of the flags results in using the default page size. + * All cores participating in a given client must request the same + * page size, or the results are undefined. + * * @param page_table Root of the page table. * @param access PTE providing info on how to read the page table. This * value must be consistent between multiple tiles sharing a page table, @@ -659,8 +777,36 @@ int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid, #define HV_CTX_DIRECTIO 0x1 /**< Direct I/O requests are accepted from PL0. */ +#define HV_CTX_PG_SM_4K 0x10 /**< Use 4K small pages, if available. */ +#define HV_CTX_PG_SM_16K 0x20 /**< Use 16K small pages, if available. */ +#define HV_CTX_PG_SM_64K 0x40 /**< Use 64K small pages, if available. */ +#define HV_CTX_PG_SM_MASK 0xf0 /**< Mask of all possible small pages. */ + #ifndef __ASSEMBLER__ + +/** Set the number of pages ganged together by HV_PTE_SUPER at a + * particular level of the page table. + * + * The current TILE-Gx hardware only supports powers of four + * (i.e. log2_count must be a multiple of two), and the requested + * "super" page size must be less than the span of the next level in + * the page table. The largest size that can be requested is 64GB. + * + * The shift value is initially "0" for all page table levels, + * indicating that the HV_PTE_SUPER bit is effectively ignored. + * + * If you change the count from one non-zero value to another, the + * hypervisor will flush the entire TLB and TSB to avoid confusion. + * + * @param level Page table level (0, 1, or 2) + * @param log2_count Base-2 log of the number of pages to gang together, + * i.e. how much to shift left the base page size for the super page size. + * @return Zero on success, or a hypervisor error code on failure. + */ +int hv_set_pte_super_shift(int level, int log2_count); + + /** Value returned from hv_inquire_context(). */ typedef struct { @@ -1230,11 +1376,14 @@ HV_Errno hv_set_command_line(HV_VirtAddr buf, int length); * with the existing priority pages) or "red/black" (if they don't). * The bitmask provides information on which parts of the cache * have been used for pinned pages so far on this tile; if (1 << N) - * appears in the bitmask, that indicates that a page has been marked - * "priority" whose PFN equals N, mod 8. + * appears in the bitmask, that indicates that a 4KB region of the + * cache starting at (N * 4KB) is in use by a "priority" page. + * The portion of cache used by a particular page can be computed + * by taking the page's PA, modulo CHIP_L2_CACHE_SIZE(), and setting + * all the "4KB" bits corresponding to the actual page size. * @param bitmask A bitmap of priority page set values */ -void hv_set_caching(unsigned int bitmask); +void hv_set_caching(unsigned long bitmask); /** Zero out a specified number of pages. @@ -1843,12 +1992,12 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, #define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */ #define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */ #define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */ - /* Bits 13-15 are reserved for + /* Bits 13-14 are reserved for future use. */ +#define HV_PTE_INDEX_SUPER 15 /**< Pages ganged together for TLB */ #define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */ #define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */ - /* Bit 19 is reserved for - future use. */ +#define HV_PTE_INDEX_CLIENT2 19 /**< Page client state 2 */ #define HV_PTE_INDEX_LOTAR 20 /**< Page's LOTAR; must be high bits of word */ #define HV_PTE_LOTAR_BITS 12 /**< Number of bits in a LOTAR */ @@ -1861,15 +2010,6 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, of word */ #define HV_PTE_PTFN_BITS 29 /**< Number of bits in a PTFN */ -/** Position of the PFN field within the PTE (subset of the PTFN). */ -#define HV_PTE_INDEX_PFN (HV_PTE_INDEX_PTFN + (HV_LOG2_PAGE_SIZE_SMALL - \ - HV_LOG2_PAGE_TABLE_ALIGN)) - -/** Length of the PFN field within the PTE (subset of the PTFN). */ -#define HV_PTE_INDEX_PFN_BITS (HV_PTE_INDEX_PTFN_BITS - \ - (HV_LOG2_PAGE_SIZE_SMALL - \ - HV_LOG2_PAGE_TABLE_ALIGN)) - /* * Legal values for the PTE's mode field */ @@ -1949,7 +2089,10 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, /** Does this PTE map a page? * - * If this bit is set in the level-1 page table, the entry should be + * If this bit is set in a level-0 page table, the entry should be + * interpreted as a level-2 page table entry mapping a jumbo page. + * + * If this bit is set in a level-1 page table, the entry should be * interpreted as a level-2 page table entry mapping a large page. * * This bit should not be modified by the client while PRESENT is set, as @@ -1959,6 +2102,18 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, */ #define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE) +/** Does this PTE implicitly reference multiple pages? + * + * If this bit is set in the page table (either in the level-2 page table, + * or in a higher level page table in conjunction with the PAGE bit) + * then the PTE specifies a range of contiguous pages, not a single page. + * The hv_set_pte_super_shift() allows you to specify the count for + * each level of the page table. + * + * Note: this bit is not supported on TILEPro systems. + */ +#define HV_PTE_SUPER (__HV_PTE_ONE << HV_PTE_INDEX_SUPER) + /** Is this a global (non-ASID) mapping? * * If this bit is set, the translations established by this PTE will @@ -2038,6 +2193,13 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, */ #define HV_PTE_CLIENT1 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT1) +/** Client-private bit in PTE. + * + * This bit is guaranteed not to be inspected or modified by the + * hypervisor. + */ +#define HV_PTE_CLIENT2 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT2) + /** Non-coherent (NC) bit in PTE. * * If this bit is set, the mapping that is set up will be non-coherent @@ -2170,8 +2332,10 @@ hv_pte_clear_##name(HV_PTE pte) \ */ _HV_BIT(present, PRESENT) _HV_BIT(page, PAGE) +_HV_BIT(super, SUPER) _HV_BIT(client0, CLIENT0) _HV_BIT(client1, CLIENT1) +_HV_BIT(client2, CLIENT2) _HV_BIT(migrating, MIGRATING) _HV_BIT(nc, NC) _HV_BIT(readable, READABLE) @@ -2214,40 +2378,11 @@ hv_pte_set_mode(HV_PTE pte, unsigned int val) * * This field contains the upper bits of the CPA (client physical * address) of the target page; the complete CPA is this field with - * HV_LOG2_PAGE_SIZE_SMALL zero bits appended to it. - * - * For PTEs in a level-1 page table where the Page bit is set, the - * CPA must be aligned modulo the large page size. - */ -static __inline unsigned int -hv_pte_get_pfn(const HV_PTE pte) -{ - return pte.val >> HV_PTE_INDEX_PFN; -} - - -/** Set the page frame number into a PTE. See hv_pte_get_pfn. */ -static __inline HV_PTE -hv_pte_set_pfn(HV_PTE pte, unsigned int val) -{ - /* - * Note that the use of "PTFN" in the next line is intentional; we - * don't want any garbage lower bits left in that field. - */ - pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS) - 1) << HV_PTE_INDEX_PTFN); - pte.val |= (__hv64) val << HV_PTE_INDEX_PFN; - return pte; -} - -/** Get the page table frame number from the PTE. - * - * This field contains the upper bits of the CPA (client physical - * address) of the target page table; the complete CPA is this field with - * with HV_PAGE_TABLE_ALIGN zero bits appended to it. + * HV_LOG2_PAGE_TABLE_ALIGN zero bits appended to it. * - * For PTEs in a level-1 page table when the Page bit is not set, the - * CPA must be aligned modulo the sticter of HV_PAGE_TABLE_ALIGN and - * the level-2 page table size. + * For all PTEs in the lowest-level page table, and for all PTEs with + * the Page bit set in all page tables, the CPA must be aligned modulo + * the relevant page size. */ static __inline unsigned long hv_pte_get_ptfn(const HV_PTE pte) @@ -2255,7 +2390,6 @@ hv_pte_get_ptfn(const HV_PTE pte) return pte.val >> HV_PTE_INDEX_PTFN; } - /** Set the page table frame number into a PTE. See hv_pte_get_ptfn. */ static __inline HV_PTE hv_pte_set_ptfn(HV_PTE pte, unsigned long val) @@ -2265,6 +2399,20 @@ hv_pte_set_ptfn(HV_PTE pte, unsigned long val) return pte; } +/** Get the client physical address from the PTE. See hv_pte_set_ptfn. */ +static __inline HV_PhysAddr +hv_pte_get_pa(const HV_PTE pte) +{ + return (__hv64) hv_pte_get_ptfn(pte) << HV_LOG2_PAGE_TABLE_ALIGN; +} + +/** Set the client physical address into a PTE. See hv_pte_get_ptfn. */ +static __inline HV_PTE +hv_pte_set_pa(HV_PTE pte, HV_PhysAddr pa) +{ + return hv_pte_set_ptfn(pte, pa >> HV_LOG2_PAGE_TABLE_ALIGN); +} + /** Get the remote tile caching this page. * @@ -2300,28 +2448,20 @@ hv_pte_set_lotar(HV_PTE pte, unsigned int val) #endif /* !__ASSEMBLER__ */ -/** Converts a client physical address to a pfn. */ -#define HV_CPA_TO_PFN(p) ((p) >> HV_LOG2_PAGE_SIZE_SMALL) - -/** Converts a pfn to a client physical address. */ -#define HV_PFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_SIZE_SMALL) - /** Converts a client physical address to a ptfn. */ #define HV_CPA_TO_PTFN(p) ((p) >> HV_LOG2_PAGE_TABLE_ALIGN) /** Converts a ptfn to a client physical address. */ #define HV_PTFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_TABLE_ALIGN) -/** Converts a ptfn to a pfn. */ -#define HV_PTFN_TO_PFN(p) \ - ((p) >> (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN)) - -/** Converts a pfn to a ptfn. */ -#define HV_PFN_TO_PTFN(p) \ - ((p) << (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN)) - #if CHIP_VA_WIDTH() > 32 +/* + * Note that we currently do not allow customizing the page size + * of the L0 pages, but fix them at 4GB, so we do not use the + * "_HV_xxx" nomenclature for the L0 macros. + */ + /** Log number of HV_PTE entries in L0 page table */ #define HV_LOG2_L0_ENTRIES (CHIP_VA_WIDTH() - HV_LOG2_L1_SPAN) @@ -2351,69 +2491,104 @@ hv_pte_set_lotar(HV_PTE pte, unsigned int val) #endif /* CHIP_VA_WIDTH() > 32 */ /** Log number of HV_PTE entries in L1 page table */ -#define HV_LOG2_L1_ENTRIES (HV_LOG2_L1_SPAN - HV_LOG2_PAGE_SIZE_LARGE) +#define _HV_LOG2_L1_ENTRIES(log2_page_size_large) \ + (HV_LOG2_L1_SPAN - log2_page_size_large) /** Number of HV_PTE entries in L1 page table */ -#define HV_L1_ENTRIES (1 << HV_LOG2_L1_ENTRIES) +#define _HV_L1_ENTRIES(log2_page_size_large) \ + (1 << _HV_LOG2_L1_ENTRIES(log2_page_size_large)) /** Log size of L1 page table in bytes */ -#define HV_LOG2_L1_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L1_ENTRIES) +#define _HV_LOG2_L1_SIZE(log2_page_size_large) \ + (HV_LOG2_PTE_SIZE + _HV_LOG2_L1_ENTRIES(log2_page_size_large)) /** Size of L1 page table in bytes */ -#define HV_L1_SIZE (1 << HV_LOG2_L1_SIZE) +#define _HV_L1_SIZE(log2_page_size_large) \ + (1 << _HV_LOG2_L1_SIZE(log2_page_size_large)) /** Log number of HV_PTE entries in level-2 page table */ -#define HV_LOG2_L2_ENTRIES (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL) +#define _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small) \ + (log2_page_size_large - log2_page_size_small) /** Number of HV_PTE entries in level-2 page table */ -#define HV_L2_ENTRIES (1 << HV_LOG2_L2_ENTRIES) +#define _HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) \ + (1 << _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small)) /** Log size of level-2 page table in bytes */ -#define HV_LOG2_L2_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L2_ENTRIES) +#define _HV_LOG2_L2_SIZE(log2_page_size_large, log2_page_size_small) \ + (HV_LOG2_PTE_SIZE + \ + _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small)) /** Size of level-2 page table in bytes */ -#define HV_L2_SIZE (1 << HV_LOG2_L2_SIZE) +#define _HV_L2_SIZE(log2_page_size_large, log2_page_size_small) \ + (1 << _HV_LOG2_L2_SIZE(log2_page_size_large, log2_page_size_small)) #ifdef __ASSEMBLER__ #if CHIP_VA_WIDTH() > 32 /** Index in L1 for a specific VA */ -#define HV_L1_INDEX(va) \ - (((va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1)) +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((va) >> log2_page_size_large) & (_HV_L1_ENTRIES(log2_page_size_large) - 1)) #else /* CHIP_VA_WIDTH() > 32 */ /** Index in L1 for a specific VA */ -#define HV_L1_INDEX(va) \ - (((va) >> HV_LOG2_PAGE_SIZE_LARGE)) +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((va) >> log2_page_size_large)) #endif /* CHIP_VA_WIDTH() > 32 */ /** Index in level-2 page table for a specific VA */ -#define HV_L2_INDEX(va) \ - (((va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1)) +#define _HV_L2_INDEX(va, log2_page_size_large, log2_page_size_small) \ + (((va) >> log2_page_size_small) & \ + (_HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) - 1)) #else /* __ASSEMBLER __ */ #if CHIP_VA_WIDTH() > 32 /** Index in L1 for a specific VA */ -#define HV_L1_INDEX(va) \ - (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1)) +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((HV_VirtAddr)(va) >> log2_page_size_large) & \ + (_HV_L1_ENTRIES(log2_page_size_large) - 1)) #else /* CHIP_VA_WIDTH() > 32 */ /** Index in L1 for a specific VA */ -#define HV_L1_INDEX(va) \ - (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE)) +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((HV_VirtAddr)(va) >> log2_page_size_large)) #endif /* CHIP_VA_WIDTH() > 32 */ /** Index in level-2 page table for a specific VA */ -#define HV_L2_INDEX(va) \ - (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1)) +#define _HV_L2_INDEX(va, log2_page_size_large, log2_page_size_small) \ + (((HV_VirtAddr)(va) >> log2_page_size_small) & \ + (_HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) - 1)) #endif /* __ASSEMBLER __ */ -#endif /* _TILE_HV_H */ +/** Position of the PFN field within the PTE (subset of the PTFN). */ +#define _HV_PTE_INDEX_PFN(log2_page_size) \ + (HV_PTE_INDEX_PTFN + (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +/** Length of the PFN field within the PTE (subset of the PTFN). */ +#define _HV_PTE_INDEX_PFN_BITS(log2_page_size) \ + (HV_PTE_INDEX_PTFN_BITS - (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +/** Converts a client physical address to a pfn. */ +#define _HV_CPA_TO_PFN(p, log2_page_size) ((p) >> log2_page_size) + +/** Converts a pfn to a client physical address. */ +#define _HV_PFN_TO_CPA(p, log2_page_size) \ + (((HV_PhysAddr)(p)) << log2_page_size) + +/** Converts a ptfn to a pfn. */ +#define _HV_PTFN_TO_PFN(p, log2_page_size) \ + ((p) >> (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +/** Converts a pfn to a ptfn. */ +#define _HV_PFN_TO_PTFN(p, log2_page_size) \ + ((p) << (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +#endif /* _HV_HV_H */ diff --git a/arch/tile/include/hv/iorpc.h b/arch/tile/include/hv/iorpc.h new file mode 100644 index 00000000000..ddf1604482b --- /dev/null +++ b/arch/tile/include/hv/iorpc.h @@ -0,0 +1,714 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ +#ifndef _HV_IORPC_H_ +#define _HV_IORPC_H_ + +/** + * + * Error codes and struct definitions for the IO RPC library. + * + * The hypervisor's IO RPC component provides a convenient way for + * driver authors to proxy system calls between user space, linux, and + * the hypervisor driver. The core of the system is a set of Python + * files that take ".idl" files as input and generates the following + * source code: + * + * - _rpc_call() routines for use in userspace IO libraries. These + * routines take an argument list specified in the .idl file, pack the + * arguments in to a buffer, and read or write that buffer via the + * Linux iorpc driver. + * + * - dispatch_read() and dispatch_write() routines that hypervisor + * drivers can use to implement most of their dev_pread() and + * dev_pwrite() methods. These routines decode the incoming parameter + * blob, permission check and translate parameters where appropriate, + * and then invoke a callback routine for whichever RPC call has + * arrived. The driver simply implements the set of callback + * routines. + * + * The IO RPC system also includes the Linux 'iorpc' driver, which + * proxies calls between the userspace library and the hypervisor + * driver. The Linux driver is almost entirely device agnostic; it + * watches for special flags indicating cases where a memory buffer + * address might need to be translated, etc. As a result, driver + * writers can avoid many of the problem cases related to registering + * hardware resources like memory pages or interrupts. However, the + * drivers must be careful to obey the conventions documented below in + * order to work properly with the generic Linux iorpc driver. + * + * @section iorpc_domains Service Domains + * + * All iorpc-based drivers must support a notion of service domains. + * A service domain is basically an application context - state + * indicating resources that are allocated to that particular app + * which it may access and (perhaps) other applications may not + * access. Drivers can support any number of service domains they + * choose. In some cases the design is limited by a number of service + * domains supported by the IO hardware; in other cases the service + * domains are a purely software concept and the driver chooses a + * maximum number of domains based on how much state memory it is + * willing to preallocate. + * + * For example, the mPIPE driver only supports as many service domains + * as are supported by the mPIPE hardware. This limitation is + * required because the hardware implements its own MMIO protection + * scheme to allow large MMIO mappings while still protecting small + * register ranges within the page that should only be accessed by the + * hypervisor. + * + * In contrast, drivers with no hardware service domain limitations + * (for instance the TRIO shim) can implement an arbitrary number of + * service domains. In these cases, each service domain is limited to + * a carefully restricted set of legal MMIO addresses if necessary to + * keep one application from corrupting another application's state. + * + * @section iorpc_conventions System Call Conventions + * + * The driver's open routine is responsible for allocating a new + * service domain for each hv_dev_open() call. By convention, the + * return value from open() should be the service domain number on + * success, or GXIO_ERR_NO_SVC_DOM if no more service domains are + * available. + * + * The implementations of hv_dev_pread() and hv_dev_pwrite() are + * responsible for validating the devhdl value passed up by the + * client. Since the device handle returned by hv_dev_open() should + * embed the positive service domain number, drivers should make sure + * that DRV_HDL2BITS(devhdl) is a legal service domain. If the client + * passes an illegal service domain number, the routine should return + * GXIO_ERR_INVAL_SVC_DOM. Once the service domain number has been + * validated, the driver can copy to/from the client buffer and call + * the dispatch_read() or dispatch_write() methods created by the RPC + * generator. + * + * The hv_dev_close() implementation should reset all service domain + * state and put the service domain back on a free list for + * reallocation by a future application. In most cases, this will + * require executing a hardware reset or drain flow and denying any + * MMIO regions that were created for the service domain. + * + * @section iorpc_data Special Data Types + * + * The .idl file syntax allows the creation of syscalls with special + * parameters that require permission checks or translations as part + * of the system call path. Because of limitations in the code + * generator, APIs are generally limited to just one of these special + * parameters per system call, and they are sometimes required to be + * the first or last parameter to the call. Special parameters + * include: + * + * @subsection iorpc_mem_buffer MEM_BUFFER + * + * The MEM_BUFFER() datatype allows user space to "register" memory + * buffers with a device. Registering memory accomplishes two tasks: + * Linux keeps track of all buffers that might be modified by a + * hardware device, and the hardware device drivers bind registered + * buffers to particular hardware resources like ingress NotifRings. + * The MEM_BUFFER() idl syntax can take extra flags like ALIGN_64KB, + * ALIGN_SELF_SIZE, and FLAGS indicating that memory buffers must have + * certain alignment or that the user should be able to pass a "memory + * flags" word specifying attributes like nt_hint or IO cache pinning. + * The parser will accept multiple MEM_BUFFER() flags. + * + * Implementations must obey the following conventions when + * registering memory buffers via the iorpc flow. These rules are a + * result of the Linux driver implementation, which needs to keep + * track of how many times a particular page has been registered with + * the hardware so that it can release the page when all those + * registrations are cleared. + * + * - Memory registrations that refer to a resource which has already + * been bound must return GXIO_ERR_ALREADY_INIT. Thus, it is an + * error to register memory twice without resetting (i.e. closing) the + * resource in between. This convention keeps the Linux driver from + * having to track which particular devices a page is bound to. + * + * - At present, a memory registration is only cleared when the + * service domain is reset. In this case, the Linux driver simply + * closes the HV device file handle and then decrements the reference + * counts of all pages that were previously registered with the + * device. + * + * - In the future, we may add a mechanism for unregistering memory. + * One possible implementation would require that the user specify + * which buffer is currently registered. The HV would then verify + * that that page was actually the one currently mapped and return + * success or failure to Linux, which would then only decrement the + * page reference count if the addresses were mapped. Another scheme + * might allow Linux to pass a token to the HV to be returned when the + * resource is unmapped. + * + * @subsection iorpc_interrupt INTERRUPT + * + * The INTERRUPT .idl datatype allows the client to bind hardware + * interrupts to a particular combination of IPI parameters - CPU, IPI + * PL, and event bit number. This data is passed via a special + * datatype so that the Linux driver can validate the CPU and PL and + * the HV generic iorpc code can translate client CPUs to real CPUs. + * + * @subsection iorpc_pollfd_setup POLLFD_SETUP + * + * The POLLFD_SETUP .idl datatype allows the client to set up hardware + * interrupt bindings which are received by Linux but which are made + * visible to user processes as state transitions on a file descriptor; + * this allows user processes to use Linux primitives, such as poll(), to + * await particular hardware events. This data is passed via a special + * datatype so that the Linux driver may recognize the pollable file + * descriptor and translate it to a set of interrupt target information, + * and so that the HV generic iorpc code can translate client CPUs to real + * CPUs. + * + * @subsection iorpc_pollfd POLLFD + * + * The POLLFD .idl datatype allows manipulation of hardware interrupt + * bindings set up via the POLLFD_SETUP datatype; common operations are + * resetting the state of the requested interrupt events, and unbinding any + * bound interrupts. This data is passed via a special datatype so that + * the Linux driver may recognize the pollable file descriptor and + * translate it to an interrupt identifier previously supplied by the + * hypervisor as the result of an earlier pollfd_setup operation. + * + * @subsection iorpc_blob BLOB + * + * The BLOB .idl datatype allows the client to write an arbitrary + * length string of bytes up to the hypervisor driver. This can be + * useful for passing up large, arbitrarily structured data like + * classifier programs. The iorpc stack takes care of validating the + * buffer VA and CPA as the data passes up to the hypervisor. Unlike + * MEM_BUFFER(), the buffer is not registered - Linux does not bump + * page refcounts and the HV driver should not reuse the buffer once + * the system call is complete. + * + * @section iorpc_translation Translating User Space Calls + * + * The ::iorpc_offset structure describes the formatting of the offset + * that is passed to pread() or pwrite() as part of the generated RPC code. + * When the user calls up to Linux, the rpc code fills in all the fields of + * the offset, including a 16-bit opcode, a 16 bit format indicator, and 32 + * bits of user-specified "sub-offset". The opcode indicates which syscall + * is being requested. The format indicates whether there is a "prefix + * struct" at the start of the memory buffer passed to pwrite(), and if so + * what data is in that prefix struct. These prefix structs are used to + * implement special datatypes like MEM_BUFFER() and INTERRUPT - we arrange + * to put data that needs translation and permission checks at the start of + * the buffer so that the Linux driver and generic portions of the HV iorpc + * code can easily access the data. The 32 bits of user-specified + * "sub-offset" are most useful for pread() calls where the user needs to + * also pass in a few bits indicating which register to read, etc. + * + * The Linux iorpc driver watches for system calls that contain prefix + * structs so that it can translate parameters and bump reference + * counts as appropriate. It does not (currently) have any knowledge + * of the per-device opcodes - it doesn't care what operation you're + * doing to mPIPE, so long as it can do all the generic book-keeping. + * The hv/iorpc.h header file defines all of the generic encoding bits + * needed to translate iorpc calls without knowing which particular + * opcode is being issued. + * + * @section iorpc_globals Global iorpc Calls + * + * Implementing mmap() required adding some special iorpc syscalls + * that are only called by the Linux driver, never by userspace. + * These include get_mmio_base() and check_mmio_offset(). These + * routines are described in globals.idl and must be included in every + * iorpc driver. By providing these routines in every driver, Linux's + * mmap implementation can easily get the PTE bits it needs and + * validate the PA offset without needing to know the per-device + * opcodes to perform those tasks. + * + * @section iorpc_kernel Supporting gxio APIs in the Kernel + * + * The iorpc code generator also supports generation of kernel code + * implementing the gxio APIs. This capability is currently used by + * the mPIPE network driver, and will likely be used by the TRIO root + * complex and endpoint drivers and perhaps an in-kernel crypto + * driver. Each driver that wants to instantiate iorpc calls in the + * kernel needs to generate a kernel version of the generate rpc code + * and (probably) copy any related gxio source files into the kernel. + * The mPIPE driver provides a good example of this pattern. + */ + +#ifdef __KERNEL__ +#include <linux/stddef.h> +#else +#include <stddef.h> +#endif + +#if defined(__HV__) +#include <hv/hypervisor.h> +#elif defined(__KERNEL__) +#include <hv/hypervisor.h> +#include <linux/types.h> +#else +#include <stdint.h> +#endif + + +/** Code indicating translation services required within the RPC path. + * These indicate whether there is a translatable struct at the start + * of the RPC buffer and what information that struct contains. + */ +enum iorpc_format_e +{ + /** No translation required, no prefix struct. */ + IORPC_FORMAT_NONE, + + /** No translation required, no prefix struct, no access to this + * operation from user space. */ + IORPC_FORMAT_NONE_NOUSER, + + /** Prefix struct contains user VA and size. */ + IORPC_FORMAT_USER_MEM, + + /** Prefix struct contains CPA, size, and homing bits. */ + IORPC_FORMAT_KERNEL_MEM, + + /** Prefix struct contains interrupt. */ + IORPC_FORMAT_KERNEL_INTERRUPT, + + /** Prefix struct contains user-level interrupt. */ + IORPC_FORMAT_USER_INTERRUPT, + + /** Prefix struct contains pollfd_setup (interrupt information). */ + IORPC_FORMAT_KERNEL_POLLFD_SETUP, + + /** Prefix struct contains user-level pollfd_setup (file descriptor). */ + IORPC_FORMAT_USER_POLLFD_SETUP, + + /** Prefix struct contains pollfd (interrupt cookie). */ + IORPC_FORMAT_KERNEL_POLLFD, + + /** Prefix struct contains user-level pollfd (file descriptor). */ + IORPC_FORMAT_USER_POLLFD, +}; + + +/** Generate an opcode given format and code. */ +#define IORPC_OPCODE(FORMAT, CODE) (((FORMAT) << 16) | (CODE)) + +/** The offset passed through the read() and write() system calls + combines an opcode with 32 bits of user-specified offset. */ +union iorpc_offset +{ +#ifndef __BIG_ENDIAN__ + uint64_t offset; /**< All bits. */ + + struct + { + uint16_t code; /**< RPC code. */ + uint16_t format; /**< iorpc_format_e */ + uint32_t sub_offset; /**< caller-specified offset. */ + }; + + uint32_t opcode; /**< Opcode combines code & format. */ +#else + uint64_t offset; /**< All bits. */ + + struct + { + uint32_t sub_offset; /**< caller-specified offset. */ + uint16_t format; /**< iorpc_format_e */ + uint16_t code; /**< RPC code. */ + }; + + struct + { + uint32_t padding; + uint32_t opcode; /**< Opcode combines code & format. */ + }; +#endif +}; + + +/** Homing and cache hinting bits that can be used by IO devices. */ +struct iorpc_mem_attr +{ + unsigned int lotar_x:4; /**< lotar X bits (or Gx page_mask). */ + unsigned int lotar_y:4; /**< lotar Y bits (or Gx page_offset). */ + unsigned int hfh:1; /**< Uses hash-for-home. */ + unsigned int nt_hint:1; /**< Non-temporal hint. */ + unsigned int io_pin:1; /**< Only fill 'IO' cache ways. */ +}; + +/** Set the nt_hint bit. */ +#define IORPC_MEM_BUFFER_FLAG_NT_HINT (1 << 0) + +/** Set the IO pin bit. */ +#define IORPC_MEM_BUFFER_FLAG_IO_PIN (1 << 1) + + +/** A structure used to describe memory registration. Different + protection levels describe memory differently, so this union + contains all the different possible descriptions. As a request + moves up the call chain, each layer translates from one + description format to the next. In particular, the Linux iorpc + driver translates user VAs into CPAs and homing parameters. */ +union iorpc_mem_buffer +{ + struct + { + uint64_t va; /**< User virtual address. */ + uint64_t size; /**< Buffer size. */ + unsigned int flags; /**< nt_hint, IO pin. */ + } + user; /**< Buffer as described by user apps. */ + + struct + { + unsigned long long cpa; /**< Client physical address. */ +#if defined(__KERNEL__) || defined(__HV__) + size_t size; /**< Buffer size. */ + HV_PTE pte; /**< PTE describing memory homing. */ +#else + uint64_t size; + uint64_t pte; +#endif + unsigned int flags; /**< nt_hint, IO pin. */ + } + kernel; /**< Buffer as described by kernel. */ + + struct + { + unsigned long long pa; /**< Physical address. */ + size_t size; /**< Buffer size. */ + struct iorpc_mem_attr attr; /**< Homing and locality hint bits. */ + } + hv; /**< Buffer parameters for HV driver. */ +}; + + +/** A structure used to describe interrupts. The format differs slightly + * for user and kernel interrupts. As with the mem_buffer_t, translation + * between the formats is done at each level. */ +union iorpc_interrupt +{ + struct + { + int cpu; /**< CPU. */ + int event; /**< evt_num */ + } + user; /**< Interrupt as described by user applications. */ + + struct + { + int x; /**< X coord. */ + int y; /**< Y coord. */ + int ipi; /**< int_num */ + int event; /**< evt_num */ + } + kernel; /**< Interrupt as described by the kernel. */ + +}; + + +/** A structure used to describe interrupts used with poll(). The format + * differs significantly for requests from user to kernel, and kernel to + * hypervisor. As with the mem_buffer_t, translation between the formats + * is done at each level. */ +union iorpc_pollfd_setup +{ + struct + { + int fd; /**< Pollable file descriptor. */ + } + user; /**< pollfd_setup as described by user applications. */ + + struct + { + int x; /**< X coord. */ + int y; /**< Y coord. */ + int ipi; /**< int_num */ + int event; /**< evt_num */ + } + kernel; /**< pollfd_setup as described by the kernel. */ + +}; + + +/** A structure used to describe previously set up interrupts used with + * poll(). The format differs significantly for requests from user to + * kernel, and kernel to hypervisor. As with the mem_buffer_t, translation + * between the formats is done at each level. */ +union iorpc_pollfd +{ + struct + { + int fd; /**< Pollable file descriptor. */ + } + user; /**< pollfd as described by user applications. */ + + struct + { + int cookie; /**< hv cookie returned by the pollfd_setup operation. */ + } + kernel; /**< pollfd as described by the kernel. */ + +}; + + +/** The various iorpc devices use error codes from -1100 to -1299. + * + * This range is distinct from netio (-700 to -799), the hypervisor + * (-800 to -899), tilepci (-900 to -999), ilib (-1000 to -1099), + * gxcr (-1300 to -1399) and gxpci (-1400 to -1499). + */ +enum gxio_err_e { + + /** Largest iorpc error number. */ + GXIO_ERR_MAX = -1101, + + + /********************************************************/ + /* Generic Error Codes */ + /********************************************************/ + + /** Bad RPC opcode - possible version incompatibility. */ + GXIO_ERR_OPCODE = -1101, + + /** Invalid parameter. */ + GXIO_ERR_INVAL = -1102, + + /** Memory buffer did not meet alignment requirements. */ + GXIO_ERR_ALIGNMENT = -1103, + + /** Memory buffers must be coherent and cacheable. */ + GXIO_ERR_COHERENCE = -1104, + + /** Resource already initialized. */ + GXIO_ERR_ALREADY_INIT = -1105, + + /** No service domains available. */ + GXIO_ERR_NO_SVC_DOM = -1106, + + /** Illegal service domain number. */ + GXIO_ERR_INVAL_SVC_DOM = -1107, + + /** Illegal MMIO address. */ + GXIO_ERR_MMIO_ADDRESS = -1108, + + /** Illegal interrupt binding. */ + GXIO_ERR_INTERRUPT = -1109, + + /** Unreasonable client memory. */ + GXIO_ERR_CLIENT_MEMORY = -1110, + + /** No more IOTLB entries. */ + GXIO_ERR_IOTLB_ENTRY = -1111, + + /** Invalid memory size. */ + GXIO_ERR_INVAL_MEMORY_SIZE = -1112, + + /** Unsupported operation. */ + GXIO_ERR_UNSUPPORTED_OP = -1113, + + /** Insufficient DMA credits. */ + GXIO_ERR_DMA_CREDITS = -1114, + + /** Operation timed out. */ + GXIO_ERR_TIMEOUT = -1115, + + /** No such device or object. */ + GXIO_ERR_NO_DEVICE = -1116, + + /** Device or resource busy. */ + GXIO_ERR_BUSY = -1117, + + /** I/O error. */ + GXIO_ERR_IO = -1118, + + /** Permissions error. */ + GXIO_ERR_PERM = -1119, + + + + /********************************************************/ + /* Test Device Error Codes */ + /********************************************************/ + + /** Illegal register number. */ + GXIO_TEST_ERR_REG_NUMBER = -1120, + + /** Illegal buffer slot. */ + GXIO_TEST_ERR_BUFFER_SLOT = -1121, + + + /********************************************************/ + /* MPIPE Error Codes */ + /********************************************************/ + + + /** Invalid buffer size. */ + GXIO_MPIPE_ERR_INVAL_BUFFER_SIZE = -1131, + + /** Cannot allocate buffer stack. */ + GXIO_MPIPE_ERR_NO_BUFFER_STACK = -1140, + + /** Invalid buffer stack number. */ + GXIO_MPIPE_ERR_BAD_BUFFER_STACK = -1141, + + /** Cannot allocate NotifRing. */ + GXIO_MPIPE_ERR_NO_NOTIF_RING = -1142, + + /** Invalid NotifRing number. */ + GXIO_MPIPE_ERR_BAD_NOTIF_RING = -1143, + + /** Cannot allocate NotifGroup. */ + GXIO_MPIPE_ERR_NO_NOTIF_GROUP = -1144, + + /** Invalid NotifGroup number. */ + GXIO_MPIPE_ERR_BAD_NOTIF_GROUP = -1145, + + /** Cannot allocate bucket. */ + GXIO_MPIPE_ERR_NO_BUCKET = -1146, + + /** Invalid bucket number. */ + GXIO_MPIPE_ERR_BAD_BUCKET = -1147, + + /** Cannot allocate eDMA ring. */ + GXIO_MPIPE_ERR_NO_EDMA_RING = -1148, + + /** Invalid eDMA ring number. */ + GXIO_MPIPE_ERR_BAD_EDMA_RING = -1149, + + /** Invalid channel number. */ + GXIO_MPIPE_ERR_BAD_CHANNEL = -1150, + + /** Bad configuration. */ + GXIO_MPIPE_ERR_BAD_CONFIG = -1151, + + /** Empty iqueue. */ + GXIO_MPIPE_ERR_IQUEUE_EMPTY = -1152, + + /** Empty rules. */ + GXIO_MPIPE_ERR_RULES_EMPTY = -1160, + + /** Full rules. */ + GXIO_MPIPE_ERR_RULES_FULL = -1161, + + /** Corrupt rules. */ + GXIO_MPIPE_ERR_RULES_CORRUPT = -1162, + + /** Invalid rules. */ + GXIO_MPIPE_ERR_RULES_INVALID = -1163, + + /** Classifier is too big. */ + GXIO_MPIPE_ERR_CLASSIFIER_TOO_BIG = -1170, + + /** Classifier is too complex. */ + GXIO_MPIPE_ERR_CLASSIFIER_TOO_COMPLEX = -1171, + + /** Classifier has bad header. */ + GXIO_MPIPE_ERR_CLASSIFIER_BAD_HEADER = -1172, + + /** Classifier has bad contents. */ + GXIO_MPIPE_ERR_CLASSIFIER_BAD_CONTENTS = -1173, + + /** Classifier encountered invalid symbol. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_SYMBOL = -1174, + + /** Classifier encountered invalid bounds. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_BOUNDS = -1175, + + /** Classifier encountered invalid relocation. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_RELOCATION = -1176, + + /** Classifier encountered undefined symbol. */ + GXIO_MPIPE_ERR_CLASSIFIER_UNDEF_SYMBOL = -1177, + + + /********************************************************/ + /* TRIO Error Codes */ + /********************************************************/ + + /** Cannot allocate memory map region. */ + GXIO_TRIO_ERR_NO_MEMORY_MAP = -1180, + + /** Invalid memory map region number. */ + GXIO_TRIO_ERR_BAD_MEMORY_MAP = -1181, + + /** Cannot allocate scatter queue. */ + GXIO_TRIO_ERR_NO_SCATTER_QUEUE = -1182, + + /** Invalid scatter queue number. */ + GXIO_TRIO_ERR_BAD_SCATTER_QUEUE = -1183, + + /** Cannot allocate push DMA ring. */ + GXIO_TRIO_ERR_NO_PUSH_DMA_RING = -1184, + + /** Invalid push DMA ring index. */ + GXIO_TRIO_ERR_BAD_PUSH_DMA_RING = -1185, + + /** Cannot allocate pull DMA ring. */ + GXIO_TRIO_ERR_NO_PULL_DMA_RING = -1186, + + /** Invalid pull DMA ring index. */ + GXIO_TRIO_ERR_BAD_PULL_DMA_RING = -1187, + + /** Cannot allocate PIO region. */ + GXIO_TRIO_ERR_NO_PIO = -1188, + + /** Invalid PIO region index. */ + GXIO_TRIO_ERR_BAD_PIO = -1189, + + /** Cannot allocate ASID. */ + GXIO_TRIO_ERR_NO_ASID = -1190, + + /** Invalid ASID. */ + GXIO_TRIO_ERR_BAD_ASID = -1191, + + + /********************************************************/ + /* MICA Error Codes */ + /********************************************************/ + + /** No such accelerator type. */ + GXIO_MICA_ERR_BAD_ACCEL_TYPE = -1220, + + /** Cannot allocate context. */ + GXIO_MICA_ERR_NO_CONTEXT = -1221, + + /** PKA command queue is full, can't add another command. */ + GXIO_MICA_ERR_PKA_CMD_QUEUE_FULL = -1222, + + /** PKA result queue is empty, can't get a result from the queue. */ + GXIO_MICA_ERR_PKA_RESULT_QUEUE_EMPTY = -1223, + + /********************************************************/ + /* GPIO Error Codes */ + /********************************************************/ + + /** Pin not available. Either the physical pin does not exist, or + * it is reserved by the hypervisor for system usage. */ + GXIO_GPIO_ERR_PIN_UNAVAILABLE = -1240, + + /** Pin busy. The pin exists, and is available for use via GXIO, but + * it has been attached by some other process or driver. */ + GXIO_GPIO_ERR_PIN_BUSY = -1241, + + /** Cannot access unattached pin. One or more of the pins being + * manipulated by this call are not attached to the requesting + * context. */ + GXIO_GPIO_ERR_PIN_UNATTACHED = -1242, + + /** Invalid I/O mode for pin. The wiring of the pin in the system + * is such that the I/O mode or electrical control parameters + * requested could cause damage. */ + GXIO_GPIO_ERR_PIN_INVALID_MODE = -1243, + + /** Smallest iorpc error number. */ + GXIO_ERR_MIN = -1299 +}; + + +#endif /* !_HV_IORPC_H_ */ diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild new file mode 100644 index 00000000000..97dfbecec6b --- /dev/null +++ b/arch/tile/include/uapi/arch/Kbuild @@ -0,0 +1,17 @@ +# UAPI Header export list +header-y += abi.h +header-y += chip.h +header-y += chip_tilegx.h +header-y += chip_tilepro.h +header-y += icache.h +header-y += interrupts.h +header-y += interrupts_32.h +header-y += interrupts_64.h +header-y += opcode.h +header-y += opcode_tilegx.h +header-y += opcode_tilepro.h +header-y += sim.h +header-y += sim_def.h +header-y += spr_def.h +header-y += spr_def_32.h +header-y += spr_def_64.h diff --git a/arch/tile/include/arch/abi.h b/arch/tile/include/uapi/arch/abi.h index 8affc76f771..c55a3d43264 100644 --- a/arch/tile/include/arch/abi.h +++ b/arch/tile/include/uapi/arch/abi.h @@ -15,13 +15,78 @@ /** * @file * - * ABI-related register definitions helpful when writing assembly code. + * ABI-related register definitions. */ #ifndef __ARCH_ABI_H__ -#define __ARCH_ABI_H__ -#include <arch/chip.h> +#if !defined __need_int_reg_t && !defined __DOXYGEN__ +# define __ARCH_ABI_H__ +# include <arch/chip.h> +#endif + +/* Provide the basic machine types. */ +#ifndef __INT_REG_BITS + +/** Number of bits in a register. */ +#if defined __tilegx__ +# define __INT_REG_BITS 64 +#elif defined __tilepro__ +# define __INT_REG_BITS 32 +#elif !defined __need_int_reg_t +# include <arch/chip.h> +# define __INT_REG_BITS CHIP_WORD_SIZE() +#else +# error Unrecognized architecture with __need_int_reg_t +#endif + +#if __INT_REG_BITS == 64 + +#ifndef __ASSEMBLER__ +/** Unsigned type that can hold a register. */ +typedef unsigned long long __uint_reg_t; + +/** Signed type that can hold a register. */ +typedef long long __int_reg_t; +#endif + +/** String prefix to use for printf(). */ +#define __INT_REG_FMT "ll" + +#else + +#ifndef __ASSEMBLER__ +/** Unsigned type that can hold a register. */ +typedef unsigned long __uint_reg_t; + +/** Signed type that can hold a register. */ +typedef long __int_reg_t; +#endif + +/** String prefix to use for printf(). */ +#define __INT_REG_FMT "l" + +#endif +#endif /* __INT_REG_BITS */ + + +#ifndef __need_int_reg_t + + +#ifndef __ASSEMBLER__ +/** Unsigned type that can hold a register. */ +typedef __uint_reg_t uint_reg_t; + +/** Signed type that can hold a register. */ +typedef __int_reg_t int_reg_t; +#endif + +/** String prefix to use for printf(). */ +#define INT_REG_FMT __INT_REG_FMT + +/** Number of bits in a register. */ +#define INT_REG_BITS __INT_REG_BITS + /* Registers 0 - 55 are "normal", but some perform special roles. */ @@ -59,7 +124,7 @@ * The ABI requires callers to allocate a caller state save area of * this many bytes at the bottom of each stack frame. */ -#define C_ABI_SAVE_AREA_SIZE (2 * (CHIP_WORD_SIZE() / 8)) +#define C_ABI_SAVE_AREA_SIZE (2 * (INT_REG_BITS / 8)) /** * The operand to an 'info' opcode directing the backtracer to not @@ -67,30 +132,10 @@ */ #define INFO_OP_CANNOT_BACKTRACE 2 -#ifndef __ASSEMBLER__ -#if CHIP_WORD_SIZE() > 32 -/** Unsigned type that can hold a register. */ -typedef unsigned long long uint_reg_t; +#endif /* !__need_int_reg_t */ -/** Signed type that can hold a register. */ -typedef long long int_reg_t; - -/** String prefix to use for printf(). */ -#define INT_REG_FMT "ll" - -#elif !defined(__LP64__) /* avoid confusion with LP64 cross-build tools */ - -/** Unsigned type that can hold a register. */ -typedef unsigned long uint_reg_t; - -/** Signed type that can hold a register. */ -typedef long int_reg_t; - -/** String prefix to use for printf(). */ -#define INT_REG_FMT "l" - -#endif -#endif /* __ASSEMBLER__ */ +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_int_reg_t #endif /* !__ARCH_ABI_H__ */ diff --git a/arch/tile/include/arch/chip.h b/arch/tile/include/uapi/arch/chip.h index 926d3db0e91..4c91f90b936 100644 --- a/arch/tile/include/arch/chip.h +++ b/arch/tile/include/uapi/arch/chip.h @@ -12,9 +12,7 @@ * more details. */ -#if __tile_chip__ == 0 -#include <arch/chip_tile64.h> -#elif __tile_chip__ == 1 +#if __tile_chip__ == 1 #include <arch/chip_tilepro.h> #elif defined(__tilegx__) #include <arch/chip_tilegx.h> diff --git a/arch/tile/include/arch/chip_tile64.h b/arch/tile/include/uapi/arch/chip_tilegx.h index 261aaba092d..ea8e4f2c948 100644 --- a/arch/tile/include/arch/chip_tile64.h +++ b/arch/tile/include/uapi/arch/chip_tilegx.h @@ -1,5 +1,5 @@ /* - * Copyright 2010 Tilera Corporation. All Rights Reserved. + * Copyright 2011 Tilera Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -15,7 +15,7 @@ /* * @file * Global header file. - * This header file specifies defines for TILE64. + * This header file specifies defines for TILE-Gx. */ #ifndef __ARCH_CHIP_H__ @@ -26,7 +26,7 @@ * This is intended for cross-compiling; native compilation should * use the predefined __tile_chip__ symbol. */ -#define TILE_CHIP 0 +#define TILE_CHIP 10 /** Specify chip revision. * This provides for the case of a respin of a particular chip type; @@ -37,27 +37,27 @@ #define TILE_CHIP_REV 0 /** The name of this architecture. */ -#define CHIP_ARCH_NAME "tile64" +#define CHIP_ARCH_NAME "tilegx" /** The ELF e_machine type for binaries for this chip. */ -#define CHIP_ELF_TYPE() EM_TILE64 +#define CHIP_ELF_TYPE() EM_TILEGX /** The alternate ELF e_machine type for binaries for this chip. */ -#define CHIP_COMPAT_ELF_TYPE() 0x2506 +#define CHIP_COMPAT_ELF_TYPE() 0x2597 /** What is the native word size of the machine? */ -#define CHIP_WORD_SIZE() 32 +#define CHIP_WORD_SIZE() 64 /** How many bits of a virtual address are used. Extra bits must be * the sign extension of the low bits. */ -#define CHIP_VA_WIDTH() 32 +#define CHIP_VA_WIDTH() 42 /** How many bits are in a physical address? */ -#define CHIP_PA_WIDTH() 36 +#define CHIP_PA_WIDTH() 40 /** Size of the L2 cache, in bytes. */ -#define CHIP_L2_CACHE_SIZE() 65536 +#define CHIP_L2_CACHE_SIZE() 262144 /** Log size of an L2 cache line in bytes. */ #define CHIP_L2_LOG_LINE_SIZE() 6 @@ -66,13 +66,13 @@ #define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE()) /** Associativity of the L2 cache. */ -#define CHIP_L2_ASSOC() 2 +#define CHIP_L2_ASSOC() 8 /** Size of the L1 data cache, in bytes. */ -#define CHIP_L1D_CACHE_SIZE() 8192 +#define CHIP_L1D_CACHE_SIZE() 32768 /** Log size of an L1 data cache line in bytes. */ -#define CHIP_L1D_LOG_LINE_SIZE() 4 +#define CHIP_L1D_LOG_LINE_SIZE() 6 /** Size of an L1 data cache line, in bytes. */ #define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE()) @@ -81,7 +81,7 @@ #define CHIP_L1D_ASSOC() 2 /** Size of the L1 instruction cache, in bytes. */ -#define CHIP_L1I_CACHE_SIZE() 8192 +#define CHIP_L1I_CACHE_SIZE() 32768 /** Log size of an L1 instruction cache line in bytes. */ #define CHIP_L1I_LOG_LINE_SIZE() 6 @@ -90,148 +90,148 @@ #define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE()) /** Associativity of the L1 instruction cache. */ -#define CHIP_L1I_ASSOC() 1 +#define CHIP_L1I_ASSOC() 2 /** Stride with which flush instructions must be issued. */ #define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE() /** Stride with which inv instructions must be issued. */ -#define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE() +#define CHIP_INV_STRIDE() CHIP_L2_LINE_SIZE() /** Stride with which finv instructions must be issued. */ -#define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE() +#define CHIP_FINV_STRIDE() CHIP_L2_LINE_SIZE() /** Can the local cache coherently cache data that is homed elsewhere? */ -#define CHIP_HAS_COHERENT_LOCAL_CACHE() 0 +#define CHIP_HAS_COHERENT_LOCAL_CACHE() 1 /** How many simultaneous outstanding victims can the L2 cache have? */ -#define CHIP_MAX_OUTSTANDING_VICTIMS() 2 +#define CHIP_MAX_OUTSTANDING_VICTIMS() 128 /** Does the TLB support the NC and NOALLOC bits? */ -#define CHIP_HAS_NC_AND_NOALLOC_BITS() 0 +#define CHIP_HAS_NC_AND_NOALLOC_BITS() 1 /** Does the chip support hash-for-home caching? */ -#define CHIP_HAS_CBOX_HOME_MAP() 0 +#define CHIP_HAS_CBOX_HOME_MAP() 1 /** Number of entries in the chip's home map tables. */ -/* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */ +#define CHIP_CBOX_HOME_MAP_SIZE() 128 /** Do uncacheable requests miss in the cache regardless of whether * there is matching data? */ -#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0 +#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 1 /** Does the mf instruction wait for victims? */ -#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1 +#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 0 /** Does the chip have an "inv" instruction that doesn't also flush? */ -#define CHIP_HAS_INV() 0 +#define CHIP_HAS_INV() 1 /** Does the chip have a "wh64" instruction? */ -#define CHIP_HAS_WH64() 0 +#define CHIP_HAS_WH64() 1 /** Does this chip have a 'dword_align' instruction? */ #define CHIP_HAS_DWORD_ALIGN() 0 /** Number of performance counters. */ -#define CHIP_PERFORMANCE_COUNTERS() 2 +#define CHIP_PERFORMANCE_COUNTERS() 4 /** Does this chip have auxiliary performance counters? */ -#define CHIP_HAS_AUX_PERF_COUNTERS() 0 +#define CHIP_HAS_AUX_PERF_COUNTERS() 1 /** Is the CBOX_MSR1 SPR supported? */ #define CHIP_HAS_CBOX_MSR1() 0 /** Is the TILE_RTF_HWM SPR supported? */ -#define CHIP_HAS_TILE_RTF_HWM() 0 +#define CHIP_HAS_TILE_RTF_HWM() 1 /** Is the TILE_WRITE_PENDING SPR supported? */ #define CHIP_HAS_TILE_WRITE_PENDING() 0 /** Is the PROC_STATUS SPR supported? */ -#define CHIP_HAS_PROC_STATUS_SPR() 0 +#define CHIP_HAS_PROC_STATUS_SPR() 1 /** Is the DSTREAM_PF SPR supported? */ -#define CHIP_HAS_DSTREAM_PF() 0 +#define CHIP_HAS_DSTREAM_PF() 1 /** Log of the number of mshims we have. */ #define CHIP_LOG_NUM_MSHIMS() 2 /** Are the bases of the interrupt vector areas fixed? */ -#define CHIP_HAS_FIXED_INTVEC_BASE() 1 +#define CHIP_HAS_FIXED_INTVEC_BASE() 0 /** Are the interrupt masks split up into 2 SPRs? */ -#define CHIP_HAS_SPLIT_INTR_MASK() 1 +#define CHIP_HAS_SPLIT_INTR_MASK() 0 /** Is the cycle count split up into 2 SPRs? */ -#define CHIP_HAS_SPLIT_CYCLE() 1 +#define CHIP_HAS_SPLIT_CYCLE() 0 /** Does the chip have a static network? */ -#define CHIP_HAS_SN() 1 +#define CHIP_HAS_SN() 0 /** Does the chip have a static network processor? */ -#define CHIP_HAS_SN_PROC() 1 +#define CHIP_HAS_SN_PROC() 0 /** Size of the L1 static network processor instruction cache, in bytes. */ -#define CHIP_L1SNI_CACHE_SIZE() 2048 +/* #define CHIP_L1SNI_CACHE_SIZE() -- does not apply to chip 10 */ /** Does the chip have DMA support in each tile? */ -#define CHIP_HAS_TILE_DMA() 1 +#define CHIP_HAS_TILE_DMA() 0 /** Does the chip have the second revision of the directly accessible * dynamic networks? This encapsulates a number of characteristics, * including the absence of the catch-all, the absence of inline message * tags, the absence of support for network context-switching, and so on. */ -#define CHIP_HAS_REV1_XDN() 0 +#define CHIP_HAS_REV1_XDN() 1 /** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */ -#define CHIP_HAS_CMPEXCH() 0 +#define CHIP_HAS_CMPEXCH() 1 /** Does the chip have memory-mapped I/O support? */ -#define CHIP_HAS_MMIO() 0 +#define CHIP_HAS_MMIO() 1 /** Does the chip have post-completion interrupts? */ -#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0 +#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 1 /** Does the chip have native single step support? */ -#define CHIP_HAS_SINGLE_STEP() 0 +#define CHIP_HAS_SINGLE_STEP() 1 #ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */ /** How many entries are present in the instruction TLB? */ -#define CHIP_ITLB_ENTRIES() 8 +#define CHIP_ITLB_ENTRIES() 16 /** How many entries are present in the data TLB? */ -#define CHIP_DTLB_ENTRIES() 16 +#define CHIP_DTLB_ENTRIES() 32 /** How many MAF entries does the XAUI shim have? */ -#define CHIP_XAUI_MAF_ENTRIES() 16 +#define CHIP_XAUI_MAF_ENTRIES() 32 /** Does the memory shim have a source-id table? */ -#define CHIP_HAS_MSHIM_SRCID_TABLE() 1 +#define CHIP_HAS_MSHIM_SRCID_TABLE() 0 /** Does the L1 instruction cache clear on reset? */ -#define CHIP_HAS_L1I_CLEAR_ON_RESET() 0 +#define CHIP_HAS_L1I_CLEAR_ON_RESET() 1 /** Does the chip come out of reset with valid coordinates on all tiles? * Note that if defined, this also implies that the upper left is 1,1. */ -#define CHIP_HAS_VALID_TILE_COORD_RESET() 0 +#define CHIP_HAS_VALID_TILE_COORD_RESET() 1 /** Does the chip have unified packet formats? */ -#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0 +#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 1 /** Does the chip support write reordering? */ -#define CHIP_HAS_WRITE_REORDERING() 0 +#define CHIP_HAS_WRITE_REORDERING() 1 /** Does the chip support Y-X routing as well as X-Y? */ -#define CHIP_HAS_Y_X_ROUTING() 0 +#define CHIP_HAS_Y_X_ROUTING() 1 /** Is INTCTRL_3 managed with the correct MPL? */ -#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0 +#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 1 /** Is it possible to configure the chip to be big-endian? */ -#define CHIP_HAS_BIG_ENDIAN_CONFIG() 0 +#define CHIP_HAS_BIG_ENDIAN_CONFIG() 1 /** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */ #define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0 @@ -240,19 +240,19 @@ #define CHIP_HAS_DIAG_TRACE_WAY() 0 /** Is the MEM_STRIPE_CONFIG SPR supported? */ -#define CHIP_HAS_MEM_STRIPE_CONFIG() 0 +#define CHIP_HAS_MEM_STRIPE_CONFIG() 1 /** Are the TLB_PERF SPRs supported? */ -#define CHIP_HAS_TLB_PERF() 0 +#define CHIP_HAS_TLB_PERF() 1 /** Is the VDN_SNOOP_SHIM_CTL SPR supported? */ #define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0 /** Does the chip support rev1 DMA packets? */ -#define CHIP_HAS_REV1_DMA_PACKETS() 0 +#define CHIP_HAS_REV1_DMA_PACKETS() 1 /** Does the chip have an IPI shim? */ -#define CHIP_HAS_IPI() 0 +#define CHIP_HAS_IPI() 1 #endif /* !__OPEN_SOURCE__ */ #endif /* __ARCH_CHIP_H__ */ diff --git a/arch/tile/include/arch/chip_tilepro.h b/arch/tile/include/uapi/arch/chip_tilepro.h index 70017699a74..70017699a74 100644 --- a/arch/tile/include/arch/chip_tilepro.h +++ b/arch/tile/include/uapi/arch/chip_tilepro.h diff --git a/arch/tile/include/arch/icache.h b/arch/tile/include/uapi/arch/icache.h index 5c87c901633..762eafa8a11 100644 --- a/arch/tile/include/arch/icache.h +++ b/arch/tile/include/uapi/arch/icache.h @@ -16,7 +16,7 @@ /** * @file * - * Support for invalidating bytes in the instruction + * Support for invalidating bytes in the instruction cache. */ #ifndef __ARCH_ICACHE_H__ @@ -30,11 +30,10 @@ * * @param addr The start of memory to be invalidated. * @param size The number of bytes to be invalidated. - * @param page_size The system's page size, typically the PAGE_SIZE constant - * in sys/page.h. This value must be a power of two no larger - * than the page containing the code to be invalidated. If the value - * is smaller than the actual page size, this function will still - * work, but may run slower than necessary. + * @param page_size The system's page size, e.g. getpagesize() in userspace. + * This value must be a power of two no larger than the page containing + * the code to be invalidated. If the value is smaller than the actual page + * size, this function will still work, but may run slower than necessary. */ static __inline void invalidate_icache(const void* addr, unsigned long size, diff --git a/arch/tile/include/arch/interrupts.h b/arch/tile/include/uapi/arch/interrupts.h index 20f8f07d2de..20f8f07d2de 100644 --- a/arch/tile/include/arch/interrupts.h +++ b/arch/tile/include/uapi/arch/interrupts.h diff --git a/arch/tile/include/uapi/arch/interrupts_32.h b/arch/tile/include/uapi/arch/interrupts_32.h new file mode 100644 index 00000000000..2efe3f68b2d --- /dev/null +++ b/arch/tile/include/uapi/arch/interrupts_32.h @@ -0,0 +1,309 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __ARCH_INTERRUPTS_H__ +#define __ARCH_INTERRUPTS_H__ + +#ifndef __KERNEL__ +/** Mask for an interrupt. */ +/* Note: must handle breaking interrupts into high and low words manually. */ +#define INT_MASK_LO(intno) (1 << (intno)) +#define INT_MASK_HI(intno) (1 << ((intno) - 32)) + +#ifndef __ASSEMBLER__ +#define INT_MASK(intno) (1ULL << (intno)) +#endif +#endif + + +/** Where a given interrupt executes */ +#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8)) + +/** Where to store a vector for a given interrupt. */ +#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0) + +/** The base address of user-level interrupts. */ +#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0) + + +/** Additional synthetic interrupt. */ +#define INT_BREAKPOINT (63) + +#define INT_ITLB_MISS 0 +#define INT_MEM_ERROR 1 +#define INT_ILL 2 +#define INT_GPV 3 +#define INT_SN_ACCESS 4 +#define INT_IDN_ACCESS 5 +#define INT_UDN_ACCESS 6 +#define INT_IDN_REFILL 7 +#define INT_UDN_REFILL 8 +#define INT_IDN_COMPLETE 9 +#define INT_UDN_COMPLETE 10 +#define INT_SWINT_3 11 +#define INT_SWINT_2 12 +#define INT_SWINT_1 13 +#define INT_SWINT_0 14 +#define INT_UNALIGN_DATA 15 +#define INT_DTLB_MISS 16 +#define INT_DTLB_ACCESS 17 +#define INT_DMATLB_MISS 18 +#define INT_DMATLB_ACCESS 19 +#define INT_SNITLB_MISS 20 +#define INT_SN_NOTIFY 21 +#define INT_SN_FIREWALL 22 +#define INT_IDN_FIREWALL 23 +#define INT_UDN_FIREWALL 24 +#define INT_TILE_TIMER 25 +#define INT_IDN_TIMER 26 +#define INT_UDN_TIMER 27 +#define INT_DMA_NOTIFY 28 +#define INT_IDN_CA 29 +#define INT_UDN_CA 30 +#define INT_IDN_AVAIL 31 +#define INT_UDN_AVAIL 32 +#define INT_PERF_COUNT 33 +#define INT_INTCTRL_3 34 +#define INT_INTCTRL_2 35 +#define INT_INTCTRL_1 36 +#define INT_INTCTRL_0 37 +#define INT_BOOT_ACCESS 38 +#define INT_WORLD_ACCESS 39 +#define INT_I_ASID 40 +#define INT_D_ASID 41 +#define INT_DMA_ASID 42 +#define INT_SNI_ASID 43 +#define INT_DMA_CPL 44 +#define INT_SN_CPL 45 +#define INT_DOUBLE_FAULT 46 +#define INT_SN_STATIC_ACCESS 47 +#define INT_AUX_PERF_COUNT 48 + +#define NUM_INTERRUPTS 49 + +#ifndef __ASSEMBLER__ +#define QUEUED_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + 0) +#define NONQUEUED_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ + 0) +#define CRITICAL_MASKED_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + 0) +#define CRITICAL_UNMASKED_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ + 0) +#define MASKABLE_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + 0) +#define UNMASKABLE_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ + 0) +#define SYNC_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ + 0) +#define NON_SYNC_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + 0) +#endif /* !__ASSEMBLER__ */ +#endif /* !__ARCH_INTERRUPTS_H__ */ diff --git a/arch/tile/include/uapi/arch/interrupts_64.h b/arch/tile/include/uapi/arch/interrupts_64.h new file mode 100644 index 00000000000..13c9f918234 --- /dev/null +++ b/arch/tile/include/uapi/arch/interrupts_64.h @@ -0,0 +1,278 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __ARCH_INTERRUPTS_H__ +#define __ARCH_INTERRUPTS_H__ + +#ifndef __KERNEL__ +/** Mask for an interrupt. */ +#ifdef __ASSEMBLER__ +/* Note: must handle breaking interrupts into high and low words manually. */ +#define INT_MASK(intno) (1 << (intno)) +#else +#define INT_MASK(intno) (1ULL << (intno)) +#endif +#endif + + +/** Where a given interrupt executes */ +#define INTERRUPT_VECTOR(i, pl) (0xFC000000 + ((pl) << 24) + ((i) << 8)) + +/** Where to store a vector for a given interrupt. */ +#define USER_INTERRUPT_VECTOR(i) INTERRUPT_VECTOR(i, 0) + +/** The base address of user-level interrupts. */ +#define USER_INTERRUPT_VECTOR_BASE INTERRUPT_VECTOR(0, 0) + + +/** Additional synthetic interrupt. */ +#define INT_BREAKPOINT (63) + +#define INT_MEM_ERROR 0 +#define INT_SINGLE_STEP_3 1 +#define INT_SINGLE_STEP_2 2 +#define INT_SINGLE_STEP_1 3 +#define INT_SINGLE_STEP_0 4 +#define INT_IDN_COMPLETE 5 +#define INT_UDN_COMPLETE 6 +#define INT_ITLB_MISS 7 +#define INT_ILL 8 +#define INT_GPV 9 +#define INT_IDN_ACCESS 10 +#define INT_UDN_ACCESS 11 +#define INT_SWINT_3 12 +#define INT_SWINT_2 13 +#define INT_SWINT_1 14 +#define INT_SWINT_0 15 +#define INT_ILL_TRANS 16 +#define INT_UNALIGN_DATA 17 +#define INT_DTLB_MISS 18 +#define INT_DTLB_ACCESS 19 +#define INT_IDN_FIREWALL 20 +#define INT_UDN_FIREWALL 21 +#define INT_TILE_TIMER 22 +#define INT_AUX_TILE_TIMER 23 +#define INT_IDN_TIMER 24 +#define INT_UDN_TIMER 25 +#define INT_IDN_AVAIL 26 +#define INT_UDN_AVAIL 27 +#define INT_IPI_3 28 +#define INT_IPI_2 29 +#define INT_IPI_1 30 +#define INT_IPI_0 31 +#define INT_PERF_COUNT 32 +#define INT_AUX_PERF_COUNT 33 +#define INT_INTCTRL_3 34 +#define INT_INTCTRL_2 35 +#define INT_INTCTRL_1 36 +#define INT_INTCTRL_0 37 +#define INT_BOOT_ACCESS 38 +#define INT_WORLD_ACCESS 39 +#define INT_I_ASID 40 +#define INT_D_ASID 41 +#define INT_DOUBLE_FAULT 42 + +#define NUM_INTERRUPTS 43 + +#ifndef __ASSEMBLER__ +#define QUEUED_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + 0) +#define NONQUEUED_INTERRUPTS ( \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + 0) +#define CRITICAL_MASKED_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + 0) +#define CRITICAL_UNMASKED_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + 0) +#define MASKABLE_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + 0) +#define UNMASKABLE_INTERRUPTS ( \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + 0) +#define SYNC_INTERRUPTS ( \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + 0) +#define NON_SYNC_INTERRUPTS ( \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + 0) +#endif /* !__ASSEMBLER__ */ +#endif /* !__ARCH_INTERRUPTS_H__ */ diff --git a/arch/tile/include/uapi/arch/opcode.h b/arch/tile/include/uapi/arch/opcode.h new file mode 100644 index 00000000000..92d15229ece --- /dev/null +++ b/arch/tile/include/uapi/arch/opcode.h @@ -0,0 +1,21 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#if defined(__tilepro__) +#include <arch/opcode_tilepro.h> +#elif defined(__tilegx__) +#include <arch/opcode_tilegx.h> +#else +#error Unexpected Tilera chip type +#endif diff --git a/arch/tile/include/uapi/arch/opcode_tilegx.h b/arch/tile/include/uapi/arch/opcode_tilegx.h new file mode 100644 index 00000000000..d76ff2db745 --- /dev/null +++ b/arch/tile/include/uapi/arch/opcode_tilegx.h @@ -0,0 +1,1406 @@ +/* TILE-Gx opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +#ifndef __ARCH_OPCODE_H__ +#define __ARCH_OPCODE_H__ + +#ifndef __ASSEMBLER__ + +typedef unsigned long long tilegx_bundle_bits; + +/* These are the bits that determine if a bundle is in the X encoding. */ +#define TILEGX_BUNDLE_MODE_MASK ((tilegx_bundle_bits)3 << 62) + +enum +{ + /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */ + TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE = 3, + + /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */ + TILEGX_NUM_PIPELINE_ENCODINGS = 5, + + /* Log base 2 of TILEGX_BUNDLE_SIZE_IN_BYTES. */ + TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES = 3, + + /* Instructions take this many bytes. */ + TILEGX_BUNDLE_SIZE_IN_BYTES = 1 << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES, + + /* Log base 2 of TILEGX_BUNDLE_ALIGNMENT_IN_BYTES. */ + TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3, + + /* Bundles should be aligned modulo this number of bytes. */ + TILEGX_BUNDLE_ALIGNMENT_IN_BYTES = + (1 << TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES), + + /* Number of registers (some are magic, such as network I/O). */ + TILEGX_NUM_REGISTERS = 64, +}; + +/* Make a few "tile_" variables to simplify common code between + architectures. */ + +typedef tilegx_bundle_bits tile_bundle_bits; +#define TILE_BUNDLE_SIZE_IN_BYTES TILEGX_BUNDLE_SIZE_IN_BYTES +#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES +#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \ + TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES +#define TILE_BPT_BUNDLE TILEGX_BPT_BUNDLE + +/* 64-bit pattern for a { bpt ; nop } bundle. */ +#define TILEGX_BPT_BUNDLE 0x286a44ae51485000ULL + +static __inline unsigned int +get_BFEnd_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_BFOpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 24)) & 0xf); +} + +static __inline unsigned int +get_BFStart_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3f); +} + +static __inline unsigned int +get_BrOff_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 37)) & 0x0001ffc0); +} + +static __inline unsigned int +get_BrType_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 54)) & 0x1f); +} + +static __inline unsigned int +get_Dest_Imm8_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 43)) & 0x000000c0); +} + +static __inline unsigned int +get_Dest_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3f); +} + +static __inline unsigned int +get_Dest_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x3f); +} + +static __inline unsigned int +get_Dest_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3f); +} + +static __inline unsigned int +get_Dest_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x3f); +} + +static __inline unsigned int +get_Imm16_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xffff); +} + +static __inline unsigned int +get_Imm16_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xffff); +} + +static __inline unsigned int +get_Imm8OpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 20)) & 0xff); +} + +static __inline unsigned int +get_Imm8OpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 51)) & 0xff); +} + +static __inline unsigned int +get_Imm8_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xff); +} + +static __inline unsigned int +get_Imm8_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xff); +} + +static __inline unsigned int +get_Imm8_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xff); +} + +static __inline unsigned int +get_Imm8_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xff); +} + +static __inline unsigned int +get_JumpOff_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x7ffffff); +} + +static __inline unsigned int +get_JumpOpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 58)) & 0x1); +} + +static __inline unsigned int +get_MF_Imm14_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3fff); +} + +static __inline unsigned int +get_MT_Imm14_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 37)) & 0x00003fc0); +} + +static __inline unsigned int +get_Mode(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 62)) & 0x3); +} + +static __inline unsigned int +get_Opcode_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 28)) & 0x7); +} + +static __inline unsigned int +get_Opcode_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 59)) & 0x7); +} + +static __inline unsigned int +get_Opcode_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 27)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 58)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y2(tilegx_bundle_bits n) +{ + return (((n >> 26)) & 0x00000001) | + (((unsigned int)(n >> 56)) & 0x00000002); +} + +static __inline unsigned int +get_RRROpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3ff); +} + +static __inline unsigned int +get_RRROpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3ff); +} + +static __inline unsigned int +get_RRROpcodeExtension_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3); +} + +static __inline unsigned int +get_RRROpcodeExtension_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3); +} + +static __inline unsigned int +get_ShAmt_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_ShAmt_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_ShAmt_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_ShAmt_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3ff); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3ff); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3); +} + +static __inline unsigned int +get_SrcA_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 6)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 6)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y2(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 20)) & 0x3f); +} + +static __inline unsigned int +get_SrcBDest_Y2(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 51)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + + +static __inline int +sign_extend(int n, int num_bits) +{ + int shift = (int)(sizeof(int) * 8 - num_bits); + return (n << shift) >> shift; +} + + + +static __inline tilegx_bundle_bits +create_BFEnd_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_BFOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 24); +} + +static __inline tilegx_bundle_bits +create_BFStart_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 18); +} + +static __inline tilegx_bundle_bits +create_BrOff_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilegx_bundle_bits)(n & 0x0001ffc0)) << 37); +} + +static __inline tilegx_bundle_bits +create_BrType_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x1f)) << 54); +} + +static __inline tilegx_bundle_bits +create_Dest_Imm8_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilegx_bundle_bits)(n & 0x000000c0)) << 43); +} + +static __inline tilegx_bundle_bits +create_Dest_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 0); +} + +static __inline tilegx_bundle_bits +create_Dest_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 31); +} + +static __inline tilegx_bundle_bits +create_Dest_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 0); +} + +static __inline tilegx_bundle_bits +create_Dest_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 31); +} + +static __inline tilegx_bundle_bits +create_Imm16_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xffff) << 12); +} + +static __inline tilegx_bundle_bits +create_Imm16_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xffff)) << 43); +} + +static __inline tilegx_bundle_bits +create_Imm8OpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 20); +} + +static __inline tilegx_bundle_bits +create_Imm8OpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xff)) << 51); +} + +static __inline tilegx_bundle_bits +create_Imm8_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 12); +} + +static __inline tilegx_bundle_bits +create_Imm8_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xff)) << 43); +} + +static __inline tilegx_bundle_bits +create_Imm8_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 12); +} + +static __inline tilegx_bundle_bits +create_Imm8_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xff)) << 43); +} + +static __inline tilegx_bundle_bits +create_JumpOff_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x7ffffff)) << 31); +} + +static __inline tilegx_bundle_bits +create_JumpOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x1)) << 58); +} + +static __inline tilegx_bundle_bits +create_MF_Imm14_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3fff)) << 37); +} + +static __inline tilegx_bundle_bits +create_MT_Imm14_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilegx_bundle_bits)(n & 0x00003fc0)) << 37); +} + +static __inline tilegx_bundle_bits +create_Mode(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3)) << 62); +} + +static __inline tilegx_bundle_bits +create_Opcode_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x7) << 28); +} + +static __inline tilegx_bundle_bits +create_Opcode_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x7)) << 59); +} + +static __inline tilegx_bundle_bits +create_Opcode_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 27); +} + +static __inline tilegx_bundle_bits +create_Opcode_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xf)) << 58); +} + +static __inline tilegx_bundle_bits +create_Opcode_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x00000001) << 26) | + (((tilegx_bundle_bits)(n & 0x00000002)) << 56); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 18); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3ff)) << 49); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 18); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3)) << 49); +} + +static __inline tilegx_bundle_bits +create_ShAmt_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_ShAmt_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_ShAmt_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_ShAmt_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 18); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3ff)) << 49); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 18); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3)) << 49); +} + +static __inline tilegx_bundle_bits +create_SrcA_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 6); +} + +static __inline tilegx_bundle_bits +create_SrcA_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 37); +} + +static __inline tilegx_bundle_bits +create_SrcA_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 6); +} + +static __inline tilegx_bundle_bits +create_SrcA_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 37); +} + +static __inline tilegx_bundle_bits +create_SrcA_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 20); +} + +static __inline tilegx_bundle_bits +create_SrcBDest_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 51); +} + +static __inline tilegx_bundle_bits +create_SrcB_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_SrcB_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_SrcB_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_SrcB_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + + +enum +{ + ADDI_IMM8_OPCODE_X0 = 1, + ADDI_IMM8_OPCODE_X1 = 1, + ADDI_OPCODE_Y0 = 0, + ADDI_OPCODE_Y1 = 1, + ADDLI_OPCODE_X0 = 1, + ADDLI_OPCODE_X1 = 0, + ADDXI_IMM8_OPCODE_X0 = 2, + ADDXI_IMM8_OPCODE_X1 = 2, + ADDXI_OPCODE_Y0 = 1, + ADDXI_OPCODE_Y1 = 2, + ADDXLI_OPCODE_X0 = 2, + ADDXLI_OPCODE_X1 = 1, + ADDXSC_RRR_0_OPCODE_X0 = 1, + ADDXSC_RRR_0_OPCODE_X1 = 1, + ADDX_RRR_0_OPCODE_X0 = 2, + ADDX_RRR_0_OPCODE_X1 = 2, + ADDX_RRR_0_OPCODE_Y0 = 0, + ADDX_SPECIAL_0_OPCODE_Y1 = 0, + ADD_RRR_0_OPCODE_X0 = 3, + ADD_RRR_0_OPCODE_X1 = 3, + ADD_RRR_0_OPCODE_Y0 = 1, + ADD_SPECIAL_0_OPCODE_Y1 = 1, + ANDI_IMM8_OPCODE_X0 = 3, + ANDI_IMM8_OPCODE_X1 = 3, + ANDI_OPCODE_Y0 = 2, + ANDI_OPCODE_Y1 = 3, + AND_RRR_0_OPCODE_X0 = 4, + AND_RRR_0_OPCODE_X1 = 4, + AND_RRR_5_OPCODE_Y0 = 0, + AND_RRR_5_OPCODE_Y1 = 0, + BEQZT_BRANCH_OPCODE_X1 = 16, + BEQZ_BRANCH_OPCODE_X1 = 17, + BFEXTS_BF_OPCODE_X0 = 4, + BFEXTU_BF_OPCODE_X0 = 5, + BFINS_BF_OPCODE_X0 = 6, + BF_OPCODE_X0 = 3, + BGEZT_BRANCH_OPCODE_X1 = 18, + BGEZ_BRANCH_OPCODE_X1 = 19, + BGTZT_BRANCH_OPCODE_X1 = 20, + BGTZ_BRANCH_OPCODE_X1 = 21, + BLBCT_BRANCH_OPCODE_X1 = 22, + BLBC_BRANCH_OPCODE_X1 = 23, + BLBST_BRANCH_OPCODE_X1 = 24, + BLBS_BRANCH_OPCODE_X1 = 25, + BLEZT_BRANCH_OPCODE_X1 = 26, + BLEZ_BRANCH_OPCODE_X1 = 27, + BLTZT_BRANCH_OPCODE_X1 = 28, + BLTZ_BRANCH_OPCODE_X1 = 29, + BNEZT_BRANCH_OPCODE_X1 = 30, + BNEZ_BRANCH_OPCODE_X1 = 31, + BRANCH_OPCODE_X1 = 2, + CMOVEQZ_RRR_0_OPCODE_X0 = 5, + CMOVEQZ_RRR_4_OPCODE_Y0 = 0, + CMOVNEZ_RRR_0_OPCODE_X0 = 6, + CMOVNEZ_RRR_4_OPCODE_Y0 = 1, + CMPEQI_IMM8_OPCODE_X0 = 4, + CMPEQI_IMM8_OPCODE_X1 = 4, + CMPEQI_OPCODE_Y0 = 3, + CMPEQI_OPCODE_Y1 = 4, + CMPEQ_RRR_0_OPCODE_X0 = 7, + CMPEQ_RRR_0_OPCODE_X1 = 5, + CMPEQ_RRR_3_OPCODE_Y0 = 0, + CMPEQ_RRR_3_OPCODE_Y1 = 2, + CMPEXCH4_RRR_0_OPCODE_X1 = 6, + CMPEXCH_RRR_0_OPCODE_X1 = 7, + CMPLES_RRR_0_OPCODE_X0 = 8, + CMPLES_RRR_0_OPCODE_X1 = 8, + CMPLES_RRR_2_OPCODE_Y0 = 0, + CMPLES_RRR_2_OPCODE_Y1 = 0, + CMPLEU_RRR_0_OPCODE_X0 = 9, + CMPLEU_RRR_0_OPCODE_X1 = 9, + CMPLEU_RRR_2_OPCODE_Y0 = 1, + CMPLEU_RRR_2_OPCODE_Y1 = 1, + CMPLTSI_IMM8_OPCODE_X0 = 5, + CMPLTSI_IMM8_OPCODE_X1 = 5, + CMPLTSI_OPCODE_Y0 = 4, + CMPLTSI_OPCODE_Y1 = 5, + CMPLTS_RRR_0_OPCODE_X0 = 10, + CMPLTS_RRR_0_OPCODE_X1 = 10, + CMPLTS_RRR_2_OPCODE_Y0 = 2, + CMPLTS_RRR_2_OPCODE_Y1 = 2, + CMPLTUI_IMM8_OPCODE_X0 = 6, + CMPLTUI_IMM8_OPCODE_X1 = 6, + CMPLTU_RRR_0_OPCODE_X0 = 11, + CMPLTU_RRR_0_OPCODE_X1 = 11, + CMPLTU_RRR_2_OPCODE_Y0 = 3, + CMPLTU_RRR_2_OPCODE_Y1 = 3, + CMPNE_RRR_0_OPCODE_X0 = 12, + CMPNE_RRR_0_OPCODE_X1 = 12, + CMPNE_RRR_3_OPCODE_Y0 = 1, + CMPNE_RRR_3_OPCODE_Y1 = 3, + CMULAF_RRR_0_OPCODE_X0 = 13, + CMULA_RRR_0_OPCODE_X0 = 14, + CMULFR_RRR_0_OPCODE_X0 = 15, + CMULF_RRR_0_OPCODE_X0 = 16, + CMULHR_RRR_0_OPCODE_X0 = 17, + CMULH_RRR_0_OPCODE_X0 = 18, + CMUL_RRR_0_OPCODE_X0 = 19, + CNTLZ_UNARY_OPCODE_X0 = 1, + CNTLZ_UNARY_OPCODE_Y0 = 1, + CNTTZ_UNARY_OPCODE_X0 = 2, + CNTTZ_UNARY_OPCODE_Y0 = 2, + CRC32_32_RRR_0_OPCODE_X0 = 20, + CRC32_8_RRR_0_OPCODE_X0 = 21, + DBLALIGN2_RRR_0_OPCODE_X0 = 22, + DBLALIGN2_RRR_0_OPCODE_X1 = 13, + DBLALIGN4_RRR_0_OPCODE_X0 = 23, + DBLALIGN4_RRR_0_OPCODE_X1 = 14, + DBLALIGN6_RRR_0_OPCODE_X0 = 24, + DBLALIGN6_RRR_0_OPCODE_X1 = 15, + DBLALIGN_RRR_0_OPCODE_X0 = 25, + DRAIN_UNARY_OPCODE_X1 = 1, + DTLBPR_UNARY_OPCODE_X1 = 2, + EXCH4_RRR_0_OPCODE_X1 = 16, + EXCH_RRR_0_OPCODE_X1 = 17, + FDOUBLE_ADDSUB_RRR_0_OPCODE_X0 = 26, + FDOUBLE_ADD_FLAGS_RRR_0_OPCODE_X0 = 27, + FDOUBLE_MUL_FLAGS_RRR_0_OPCODE_X0 = 28, + FDOUBLE_PACK1_RRR_0_OPCODE_X0 = 29, + FDOUBLE_PACK2_RRR_0_OPCODE_X0 = 30, + FDOUBLE_SUB_FLAGS_RRR_0_OPCODE_X0 = 31, + FDOUBLE_UNPACK_MAX_RRR_0_OPCODE_X0 = 32, + FDOUBLE_UNPACK_MIN_RRR_0_OPCODE_X0 = 33, + FETCHADD4_RRR_0_OPCODE_X1 = 18, + FETCHADDGEZ4_RRR_0_OPCODE_X1 = 19, + FETCHADDGEZ_RRR_0_OPCODE_X1 = 20, + FETCHADD_RRR_0_OPCODE_X1 = 21, + FETCHAND4_RRR_0_OPCODE_X1 = 22, + FETCHAND_RRR_0_OPCODE_X1 = 23, + FETCHOR4_RRR_0_OPCODE_X1 = 24, + FETCHOR_RRR_0_OPCODE_X1 = 25, + FINV_UNARY_OPCODE_X1 = 3, + FLUSHWB_UNARY_OPCODE_X1 = 4, + FLUSH_UNARY_OPCODE_X1 = 5, + FNOP_UNARY_OPCODE_X0 = 3, + FNOP_UNARY_OPCODE_X1 = 6, + FNOP_UNARY_OPCODE_Y0 = 3, + FNOP_UNARY_OPCODE_Y1 = 8, + FSINGLE_ADD1_RRR_0_OPCODE_X0 = 34, + FSINGLE_ADDSUB2_RRR_0_OPCODE_X0 = 35, + FSINGLE_MUL1_RRR_0_OPCODE_X0 = 36, + FSINGLE_MUL2_RRR_0_OPCODE_X0 = 37, + FSINGLE_PACK1_UNARY_OPCODE_X0 = 4, + FSINGLE_PACK1_UNARY_OPCODE_Y0 = 4, + FSINGLE_PACK2_RRR_0_OPCODE_X0 = 38, + FSINGLE_SUB1_RRR_0_OPCODE_X0 = 39, + ICOH_UNARY_OPCODE_X1 = 7, + ILL_UNARY_OPCODE_X1 = 8, + ILL_UNARY_OPCODE_Y1 = 9, + IMM8_OPCODE_X0 = 4, + IMM8_OPCODE_X1 = 3, + INV_UNARY_OPCODE_X1 = 9, + IRET_UNARY_OPCODE_X1 = 10, + JALRP_UNARY_OPCODE_X1 = 11, + JALRP_UNARY_OPCODE_Y1 = 10, + JALR_UNARY_OPCODE_X1 = 12, + JALR_UNARY_OPCODE_Y1 = 11, + JAL_JUMP_OPCODE_X1 = 0, + JRP_UNARY_OPCODE_X1 = 13, + JRP_UNARY_OPCODE_Y1 = 12, + JR_UNARY_OPCODE_X1 = 14, + JR_UNARY_OPCODE_Y1 = 13, + JUMP_OPCODE_X1 = 4, + J_JUMP_OPCODE_X1 = 1, + LD1S_ADD_IMM8_OPCODE_X1 = 7, + LD1S_OPCODE_Y2 = 0, + LD1S_UNARY_OPCODE_X1 = 15, + LD1U_ADD_IMM8_OPCODE_X1 = 8, + LD1U_OPCODE_Y2 = 1, + LD1U_UNARY_OPCODE_X1 = 16, + LD2S_ADD_IMM8_OPCODE_X1 = 9, + LD2S_OPCODE_Y2 = 2, + LD2S_UNARY_OPCODE_X1 = 17, + LD2U_ADD_IMM8_OPCODE_X1 = 10, + LD2U_OPCODE_Y2 = 3, + LD2U_UNARY_OPCODE_X1 = 18, + LD4S_ADD_IMM8_OPCODE_X1 = 11, + LD4S_OPCODE_Y2 = 1, + LD4S_UNARY_OPCODE_X1 = 19, + LD4U_ADD_IMM8_OPCODE_X1 = 12, + LD4U_OPCODE_Y2 = 2, + LD4U_UNARY_OPCODE_X1 = 20, + LDNA_UNARY_OPCODE_X1 = 21, + LDNT1S_ADD_IMM8_OPCODE_X1 = 13, + LDNT1S_UNARY_OPCODE_X1 = 22, + LDNT1U_ADD_IMM8_OPCODE_X1 = 14, + LDNT1U_UNARY_OPCODE_X1 = 23, + LDNT2S_ADD_IMM8_OPCODE_X1 = 15, + LDNT2S_UNARY_OPCODE_X1 = 24, + LDNT2U_ADD_IMM8_OPCODE_X1 = 16, + LDNT2U_UNARY_OPCODE_X1 = 25, + LDNT4S_ADD_IMM8_OPCODE_X1 = 17, + LDNT4S_UNARY_OPCODE_X1 = 26, + LDNT4U_ADD_IMM8_OPCODE_X1 = 18, + LDNT4U_UNARY_OPCODE_X1 = 27, + LDNT_ADD_IMM8_OPCODE_X1 = 19, + LDNT_UNARY_OPCODE_X1 = 28, + LD_ADD_IMM8_OPCODE_X1 = 20, + LD_OPCODE_Y2 = 3, + LD_UNARY_OPCODE_X1 = 29, + LNK_UNARY_OPCODE_X1 = 30, + LNK_UNARY_OPCODE_Y1 = 14, + LWNA_ADD_IMM8_OPCODE_X1 = 21, + MFSPR_IMM8_OPCODE_X1 = 22, + MF_UNARY_OPCODE_X1 = 31, + MM_BF_OPCODE_X0 = 7, + MNZ_RRR_0_OPCODE_X0 = 40, + MNZ_RRR_0_OPCODE_X1 = 26, + MNZ_RRR_4_OPCODE_Y0 = 2, + MNZ_RRR_4_OPCODE_Y1 = 2, + MODE_OPCODE_YA2 = 1, + MODE_OPCODE_YB2 = 2, + MODE_OPCODE_YC2 = 3, + MTSPR_IMM8_OPCODE_X1 = 23, + MULAX_RRR_0_OPCODE_X0 = 41, + MULAX_RRR_3_OPCODE_Y0 = 2, + MULA_HS_HS_RRR_0_OPCODE_X0 = 42, + MULA_HS_HS_RRR_9_OPCODE_Y0 = 0, + MULA_HS_HU_RRR_0_OPCODE_X0 = 43, + MULA_HS_LS_RRR_0_OPCODE_X0 = 44, + MULA_HS_LU_RRR_0_OPCODE_X0 = 45, + MULA_HU_HU_RRR_0_OPCODE_X0 = 46, + MULA_HU_HU_RRR_9_OPCODE_Y0 = 1, + MULA_HU_LS_RRR_0_OPCODE_X0 = 47, + MULA_HU_LU_RRR_0_OPCODE_X0 = 48, + MULA_LS_LS_RRR_0_OPCODE_X0 = 49, + MULA_LS_LS_RRR_9_OPCODE_Y0 = 2, + MULA_LS_LU_RRR_0_OPCODE_X0 = 50, + MULA_LU_LU_RRR_0_OPCODE_X0 = 51, + MULA_LU_LU_RRR_9_OPCODE_Y0 = 3, + MULX_RRR_0_OPCODE_X0 = 52, + MULX_RRR_3_OPCODE_Y0 = 3, + MUL_HS_HS_RRR_0_OPCODE_X0 = 53, + MUL_HS_HS_RRR_8_OPCODE_Y0 = 0, + MUL_HS_HU_RRR_0_OPCODE_X0 = 54, + MUL_HS_LS_RRR_0_OPCODE_X0 = 55, + MUL_HS_LU_RRR_0_OPCODE_X0 = 56, + MUL_HU_HU_RRR_0_OPCODE_X0 = 57, + MUL_HU_HU_RRR_8_OPCODE_Y0 = 1, + MUL_HU_LS_RRR_0_OPCODE_X0 = 58, + MUL_HU_LU_RRR_0_OPCODE_X0 = 59, + MUL_LS_LS_RRR_0_OPCODE_X0 = 60, + MUL_LS_LS_RRR_8_OPCODE_Y0 = 2, + MUL_LS_LU_RRR_0_OPCODE_X0 = 61, + MUL_LU_LU_RRR_0_OPCODE_X0 = 62, + MUL_LU_LU_RRR_8_OPCODE_Y0 = 3, + MZ_RRR_0_OPCODE_X0 = 63, + MZ_RRR_0_OPCODE_X1 = 27, + MZ_RRR_4_OPCODE_Y0 = 3, + MZ_RRR_4_OPCODE_Y1 = 3, + NAP_UNARY_OPCODE_X1 = 32, + NOP_UNARY_OPCODE_X0 = 5, + NOP_UNARY_OPCODE_X1 = 33, + NOP_UNARY_OPCODE_Y0 = 5, + NOP_UNARY_OPCODE_Y1 = 15, + NOR_RRR_0_OPCODE_X0 = 64, + NOR_RRR_0_OPCODE_X1 = 28, + NOR_RRR_5_OPCODE_Y0 = 1, + NOR_RRR_5_OPCODE_Y1 = 1, + ORI_IMM8_OPCODE_X0 = 7, + ORI_IMM8_OPCODE_X1 = 24, + OR_RRR_0_OPCODE_X0 = 65, + OR_RRR_0_OPCODE_X1 = 29, + OR_RRR_5_OPCODE_Y0 = 2, + OR_RRR_5_OPCODE_Y1 = 2, + PCNT_UNARY_OPCODE_X0 = 6, + PCNT_UNARY_OPCODE_Y0 = 6, + REVBITS_UNARY_OPCODE_X0 = 7, + REVBITS_UNARY_OPCODE_Y0 = 7, + REVBYTES_UNARY_OPCODE_X0 = 8, + REVBYTES_UNARY_OPCODE_Y0 = 8, + ROTLI_SHIFT_OPCODE_X0 = 1, + ROTLI_SHIFT_OPCODE_X1 = 1, + ROTLI_SHIFT_OPCODE_Y0 = 0, + ROTLI_SHIFT_OPCODE_Y1 = 0, + ROTL_RRR_0_OPCODE_X0 = 66, + ROTL_RRR_0_OPCODE_X1 = 30, + ROTL_RRR_6_OPCODE_Y0 = 0, + ROTL_RRR_6_OPCODE_Y1 = 0, + RRR_0_OPCODE_X0 = 5, + RRR_0_OPCODE_X1 = 5, + RRR_0_OPCODE_Y0 = 5, + RRR_0_OPCODE_Y1 = 6, + RRR_1_OPCODE_Y0 = 6, + RRR_1_OPCODE_Y1 = 7, + RRR_2_OPCODE_Y0 = 7, + RRR_2_OPCODE_Y1 = 8, + RRR_3_OPCODE_Y0 = 8, + RRR_3_OPCODE_Y1 = 9, + RRR_4_OPCODE_Y0 = 9, + RRR_4_OPCODE_Y1 = 10, + RRR_5_OPCODE_Y0 = 10, + RRR_5_OPCODE_Y1 = 11, + RRR_6_OPCODE_Y0 = 11, + RRR_6_OPCODE_Y1 = 12, + RRR_7_OPCODE_Y0 = 12, + RRR_7_OPCODE_Y1 = 13, + RRR_8_OPCODE_Y0 = 13, + RRR_9_OPCODE_Y0 = 14, + SHIFT_OPCODE_X0 = 6, + SHIFT_OPCODE_X1 = 6, + SHIFT_OPCODE_Y0 = 15, + SHIFT_OPCODE_Y1 = 14, + SHL16INSLI_OPCODE_X0 = 7, + SHL16INSLI_OPCODE_X1 = 7, + SHL1ADDX_RRR_0_OPCODE_X0 = 67, + SHL1ADDX_RRR_0_OPCODE_X1 = 31, + SHL1ADDX_RRR_7_OPCODE_Y0 = 1, + SHL1ADDX_RRR_7_OPCODE_Y1 = 1, + SHL1ADD_RRR_0_OPCODE_X0 = 68, + SHL1ADD_RRR_0_OPCODE_X1 = 32, + SHL1ADD_RRR_1_OPCODE_Y0 = 0, + SHL1ADD_RRR_1_OPCODE_Y1 = 0, + SHL2ADDX_RRR_0_OPCODE_X0 = 69, + SHL2ADDX_RRR_0_OPCODE_X1 = 33, + SHL2ADDX_RRR_7_OPCODE_Y0 = 2, + SHL2ADDX_RRR_7_OPCODE_Y1 = 2, + SHL2ADD_RRR_0_OPCODE_X0 = 70, + SHL2ADD_RRR_0_OPCODE_X1 = 34, + SHL2ADD_RRR_1_OPCODE_Y0 = 1, + SHL2ADD_RRR_1_OPCODE_Y1 = 1, + SHL3ADDX_RRR_0_OPCODE_X0 = 71, + SHL3ADDX_RRR_0_OPCODE_X1 = 35, + SHL3ADDX_RRR_7_OPCODE_Y0 = 3, + SHL3ADDX_RRR_7_OPCODE_Y1 = 3, + SHL3ADD_RRR_0_OPCODE_X0 = 72, + SHL3ADD_RRR_0_OPCODE_X1 = 36, + SHL3ADD_RRR_1_OPCODE_Y0 = 2, + SHL3ADD_RRR_1_OPCODE_Y1 = 2, + SHLI_SHIFT_OPCODE_X0 = 2, + SHLI_SHIFT_OPCODE_X1 = 2, + SHLI_SHIFT_OPCODE_Y0 = 1, + SHLI_SHIFT_OPCODE_Y1 = 1, + SHLXI_SHIFT_OPCODE_X0 = 3, + SHLXI_SHIFT_OPCODE_X1 = 3, + SHLX_RRR_0_OPCODE_X0 = 73, + SHLX_RRR_0_OPCODE_X1 = 37, + SHL_RRR_0_OPCODE_X0 = 74, + SHL_RRR_0_OPCODE_X1 = 38, + SHL_RRR_6_OPCODE_Y0 = 1, + SHL_RRR_6_OPCODE_Y1 = 1, + SHRSI_SHIFT_OPCODE_X0 = 4, + SHRSI_SHIFT_OPCODE_X1 = 4, + SHRSI_SHIFT_OPCODE_Y0 = 2, + SHRSI_SHIFT_OPCODE_Y1 = 2, + SHRS_RRR_0_OPCODE_X0 = 75, + SHRS_RRR_0_OPCODE_X1 = 39, + SHRS_RRR_6_OPCODE_Y0 = 2, + SHRS_RRR_6_OPCODE_Y1 = 2, + SHRUI_SHIFT_OPCODE_X0 = 5, + SHRUI_SHIFT_OPCODE_X1 = 5, + SHRUI_SHIFT_OPCODE_Y0 = 3, + SHRUI_SHIFT_OPCODE_Y1 = 3, + SHRUXI_SHIFT_OPCODE_X0 = 6, + SHRUXI_SHIFT_OPCODE_X1 = 6, + SHRUX_RRR_0_OPCODE_X0 = 76, + SHRUX_RRR_0_OPCODE_X1 = 40, + SHRU_RRR_0_OPCODE_X0 = 77, + SHRU_RRR_0_OPCODE_X1 = 41, + SHRU_RRR_6_OPCODE_Y0 = 3, + SHRU_RRR_6_OPCODE_Y1 = 3, + SHUFFLEBYTES_RRR_0_OPCODE_X0 = 78, + ST1_ADD_IMM8_OPCODE_X1 = 25, + ST1_OPCODE_Y2 = 0, + ST1_RRR_0_OPCODE_X1 = 42, + ST2_ADD_IMM8_OPCODE_X1 = 26, + ST2_OPCODE_Y2 = 1, + ST2_RRR_0_OPCODE_X1 = 43, + ST4_ADD_IMM8_OPCODE_X1 = 27, + ST4_OPCODE_Y2 = 2, + ST4_RRR_0_OPCODE_X1 = 44, + STNT1_ADD_IMM8_OPCODE_X1 = 28, + STNT1_RRR_0_OPCODE_X1 = 45, + STNT2_ADD_IMM8_OPCODE_X1 = 29, + STNT2_RRR_0_OPCODE_X1 = 46, + STNT4_ADD_IMM8_OPCODE_X1 = 30, + STNT4_RRR_0_OPCODE_X1 = 47, + STNT_ADD_IMM8_OPCODE_X1 = 31, + STNT_RRR_0_OPCODE_X1 = 48, + ST_ADD_IMM8_OPCODE_X1 = 32, + ST_OPCODE_Y2 = 3, + ST_RRR_0_OPCODE_X1 = 49, + SUBXSC_RRR_0_OPCODE_X0 = 79, + SUBXSC_RRR_0_OPCODE_X1 = 50, + SUBX_RRR_0_OPCODE_X0 = 80, + SUBX_RRR_0_OPCODE_X1 = 51, + SUBX_RRR_0_OPCODE_Y0 = 2, + SUBX_RRR_0_OPCODE_Y1 = 2, + SUB_RRR_0_OPCODE_X0 = 81, + SUB_RRR_0_OPCODE_X1 = 52, + SUB_RRR_0_OPCODE_Y0 = 3, + SUB_RRR_0_OPCODE_Y1 = 3, + SWINT0_UNARY_OPCODE_X1 = 34, + SWINT1_UNARY_OPCODE_X1 = 35, + SWINT2_UNARY_OPCODE_X1 = 36, + SWINT3_UNARY_OPCODE_X1 = 37, + TBLIDXB0_UNARY_OPCODE_X0 = 9, + TBLIDXB0_UNARY_OPCODE_Y0 = 9, + TBLIDXB1_UNARY_OPCODE_X0 = 10, + TBLIDXB1_UNARY_OPCODE_Y0 = 10, + TBLIDXB2_UNARY_OPCODE_X0 = 11, + TBLIDXB2_UNARY_OPCODE_Y0 = 11, + TBLIDXB3_UNARY_OPCODE_X0 = 12, + TBLIDXB3_UNARY_OPCODE_Y0 = 12, + UNARY_RRR_0_OPCODE_X0 = 82, + UNARY_RRR_0_OPCODE_X1 = 53, + UNARY_RRR_1_OPCODE_Y0 = 3, + UNARY_RRR_1_OPCODE_Y1 = 3, + V1ADDI_IMM8_OPCODE_X0 = 8, + V1ADDI_IMM8_OPCODE_X1 = 33, + V1ADDUC_RRR_0_OPCODE_X0 = 83, + V1ADDUC_RRR_0_OPCODE_X1 = 54, + V1ADD_RRR_0_OPCODE_X0 = 84, + V1ADD_RRR_0_OPCODE_X1 = 55, + V1ADIFFU_RRR_0_OPCODE_X0 = 85, + V1AVGU_RRR_0_OPCODE_X0 = 86, + V1CMPEQI_IMM8_OPCODE_X0 = 9, + V1CMPEQI_IMM8_OPCODE_X1 = 34, + V1CMPEQ_RRR_0_OPCODE_X0 = 87, + V1CMPEQ_RRR_0_OPCODE_X1 = 56, + V1CMPLES_RRR_0_OPCODE_X0 = 88, + V1CMPLES_RRR_0_OPCODE_X1 = 57, + V1CMPLEU_RRR_0_OPCODE_X0 = 89, + V1CMPLEU_RRR_0_OPCODE_X1 = 58, + V1CMPLTSI_IMM8_OPCODE_X0 = 10, + V1CMPLTSI_IMM8_OPCODE_X1 = 35, + V1CMPLTS_RRR_0_OPCODE_X0 = 90, + V1CMPLTS_RRR_0_OPCODE_X1 = 59, + V1CMPLTUI_IMM8_OPCODE_X0 = 11, + V1CMPLTUI_IMM8_OPCODE_X1 = 36, + V1CMPLTU_RRR_0_OPCODE_X0 = 91, + V1CMPLTU_RRR_0_OPCODE_X1 = 60, + V1CMPNE_RRR_0_OPCODE_X0 = 92, + V1CMPNE_RRR_0_OPCODE_X1 = 61, + V1DDOTPUA_RRR_0_OPCODE_X0 = 161, + V1DDOTPUSA_RRR_0_OPCODE_X0 = 93, + V1DDOTPUS_RRR_0_OPCODE_X0 = 94, + V1DDOTPU_RRR_0_OPCODE_X0 = 162, + V1DOTPA_RRR_0_OPCODE_X0 = 95, + V1DOTPUA_RRR_0_OPCODE_X0 = 163, + V1DOTPUSA_RRR_0_OPCODE_X0 = 96, + V1DOTPUS_RRR_0_OPCODE_X0 = 97, + V1DOTPU_RRR_0_OPCODE_X0 = 164, + V1DOTP_RRR_0_OPCODE_X0 = 98, + V1INT_H_RRR_0_OPCODE_X0 = 99, + V1INT_H_RRR_0_OPCODE_X1 = 62, + V1INT_L_RRR_0_OPCODE_X0 = 100, + V1INT_L_RRR_0_OPCODE_X1 = 63, + V1MAXUI_IMM8_OPCODE_X0 = 12, + V1MAXUI_IMM8_OPCODE_X1 = 37, + V1MAXU_RRR_0_OPCODE_X0 = 101, + V1MAXU_RRR_0_OPCODE_X1 = 64, + V1MINUI_IMM8_OPCODE_X0 = 13, + V1MINUI_IMM8_OPCODE_X1 = 38, + V1MINU_RRR_0_OPCODE_X0 = 102, + V1MINU_RRR_0_OPCODE_X1 = 65, + V1MNZ_RRR_0_OPCODE_X0 = 103, + V1MNZ_RRR_0_OPCODE_X1 = 66, + V1MULTU_RRR_0_OPCODE_X0 = 104, + V1MULUS_RRR_0_OPCODE_X0 = 105, + V1MULU_RRR_0_OPCODE_X0 = 106, + V1MZ_RRR_0_OPCODE_X0 = 107, + V1MZ_RRR_0_OPCODE_X1 = 67, + V1SADAU_RRR_0_OPCODE_X0 = 108, + V1SADU_RRR_0_OPCODE_X0 = 109, + V1SHLI_SHIFT_OPCODE_X0 = 7, + V1SHLI_SHIFT_OPCODE_X1 = 7, + V1SHL_RRR_0_OPCODE_X0 = 110, + V1SHL_RRR_0_OPCODE_X1 = 68, + V1SHRSI_SHIFT_OPCODE_X0 = 8, + V1SHRSI_SHIFT_OPCODE_X1 = 8, + V1SHRS_RRR_0_OPCODE_X0 = 111, + V1SHRS_RRR_0_OPCODE_X1 = 69, + V1SHRUI_SHIFT_OPCODE_X0 = 9, + V1SHRUI_SHIFT_OPCODE_X1 = 9, + V1SHRU_RRR_0_OPCODE_X0 = 112, + V1SHRU_RRR_0_OPCODE_X1 = 70, + V1SUBUC_RRR_0_OPCODE_X0 = 113, + V1SUBUC_RRR_0_OPCODE_X1 = 71, + V1SUB_RRR_0_OPCODE_X0 = 114, + V1SUB_RRR_0_OPCODE_X1 = 72, + V2ADDI_IMM8_OPCODE_X0 = 14, + V2ADDI_IMM8_OPCODE_X1 = 39, + V2ADDSC_RRR_0_OPCODE_X0 = 115, + V2ADDSC_RRR_0_OPCODE_X1 = 73, + V2ADD_RRR_0_OPCODE_X0 = 116, + V2ADD_RRR_0_OPCODE_X1 = 74, + V2ADIFFS_RRR_0_OPCODE_X0 = 117, + V2AVGS_RRR_0_OPCODE_X0 = 118, + V2CMPEQI_IMM8_OPCODE_X0 = 15, + V2CMPEQI_IMM8_OPCODE_X1 = 40, + V2CMPEQ_RRR_0_OPCODE_X0 = 119, + V2CMPEQ_RRR_0_OPCODE_X1 = 75, + V2CMPLES_RRR_0_OPCODE_X0 = 120, + V2CMPLES_RRR_0_OPCODE_X1 = 76, + V2CMPLEU_RRR_0_OPCODE_X0 = 121, + V2CMPLEU_RRR_0_OPCODE_X1 = 77, + V2CMPLTSI_IMM8_OPCODE_X0 = 16, + V2CMPLTSI_IMM8_OPCODE_X1 = 41, + V2CMPLTS_RRR_0_OPCODE_X0 = 122, + V2CMPLTS_RRR_0_OPCODE_X1 = 78, + V2CMPLTUI_IMM8_OPCODE_X0 = 17, + V2CMPLTUI_IMM8_OPCODE_X1 = 42, + V2CMPLTU_RRR_0_OPCODE_X0 = 123, + V2CMPLTU_RRR_0_OPCODE_X1 = 79, + V2CMPNE_RRR_0_OPCODE_X0 = 124, + V2CMPNE_RRR_0_OPCODE_X1 = 80, + V2DOTPA_RRR_0_OPCODE_X0 = 125, + V2DOTP_RRR_0_OPCODE_X0 = 126, + V2INT_H_RRR_0_OPCODE_X0 = 127, + V2INT_H_RRR_0_OPCODE_X1 = 81, + V2INT_L_RRR_0_OPCODE_X0 = 128, + V2INT_L_RRR_0_OPCODE_X1 = 82, + V2MAXSI_IMM8_OPCODE_X0 = 18, + V2MAXSI_IMM8_OPCODE_X1 = 43, + V2MAXS_RRR_0_OPCODE_X0 = 129, + V2MAXS_RRR_0_OPCODE_X1 = 83, + V2MINSI_IMM8_OPCODE_X0 = 19, + V2MINSI_IMM8_OPCODE_X1 = 44, + V2MINS_RRR_0_OPCODE_X0 = 130, + V2MINS_RRR_0_OPCODE_X1 = 84, + V2MNZ_RRR_0_OPCODE_X0 = 131, + V2MNZ_RRR_0_OPCODE_X1 = 85, + V2MULFSC_RRR_0_OPCODE_X0 = 132, + V2MULS_RRR_0_OPCODE_X0 = 133, + V2MULTS_RRR_0_OPCODE_X0 = 134, + V2MZ_RRR_0_OPCODE_X0 = 135, + V2MZ_RRR_0_OPCODE_X1 = 86, + V2PACKH_RRR_0_OPCODE_X0 = 136, + V2PACKH_RRR_0_OPCODE_X1 = 87, + V2PACKL_RRR_0_OPCODE_X0 = 137, + V2PACKL_RRR_0_OPCODE_X1 = 88, + V2PACKUC_RRR_0_OPCODE_X0 = 138, + V2PACKUC_RRR_0_OPCODE_X1 = 89, + V2SADAS_RRR_0_OPCODE_X0 = 139, + V2SADAU_RRR_0_OPCODE_X0 = 140, + V2SADS_RRR_0_OPCODE_X0 = 141, + V2SADU_RRR_0_OPCODE_X0 = 142, + V2SHLI_SHIFT_OPCODE_X0 = 10, + V2SHLI_SHIFT_OPCODE_X1 = 10, + V2SHLSC_RRR_0_OPCODE_X0 = 143, + V2SHLSC_RRR_0_OPCODE_X1 = 90, + V2SHL_RRR_0_OPCODE_X0 = 144, + V2SHL_RRR_0_OPCODE_X1 = 91, + V2SHRSI_SHIFT_OPCODE_X0 = 11, + V2SHRSI_SHIFT_OPCODE_X1 = 11, + V2SHRS_RRR_0_OPCODE_X0 = 145, + V2SHRS_RRR_0_OPCODE_X1 = 92, + V2SHRUI_SHIFT_OPCODE_X0 = 12, + V2SHRUI_SHIFT_OPCODE_X1 = 12, + V2SHRU_RRR_0_OPCODE_X0 = 146, + V2SHRU_RRR_0_OPCODE_X1 = 93, + V2SUBSC_RRR_0_OPCODE_X0 = 147, + V2SUBSC_RRR_0_OPCODE_X1 = 94, + V2SUB_RRR_0_OPCODE_X0 = 148, + V2SUB_RRR_0_OPCODE_X1 = 95, + V4ADDSC_RRR_0_OPCODE_X0 = 149, + V4ADDSC_RRR_0_OPCODE_X1 = 96, + V4ADD_RRR_0_OPCODE_X0 = 150, + V4ADD_RRR_0_OPCODE_X1 = 97, + V4INT_H_RRR_0_OPCODE_X0 = 151, + V4INT_H_RRR_0_OPCODE_X1 = 98, + V4INT_L_RRR_0_OPCODE_X0 = 152, + V4INT_L_RRR_0_OPCODE_X1 = 99, + V4PACKSC_RRR_0_OPCODE_X0 = 153, + V4PACKSC_RRR_0_OPCODE_X1 = 100, + V4SHLSC_RRR_0_OPCODE_X0 = 154, + V4SHLSC_RRR_0_OPCODE_X1 = 101, + V4SHL_RRR_0_OPCODE_X0 = 155, + V4SHL_RRR_0_OPCODE_X1 = 102, + V4SHRS_RRR_0_OPCODE_X0 = 156, + V4SHRS_RRR_0_OPCODE_X1 = 103, + V4SHRU_RRR_0_OPCODE_X0 = 157, + V4SHRU_RRR_0_OPCODE_X1 = 104, + V4SUBSC_RRR_0_OPCODE_X0 = 158, + V4SUBSC_RRR_0_OPCODE_X1 = 105, + V4SUB_RRR_0_OPCODE_X0 = 159, + V4SUB_RRR_0_OPCODE_X1 = 106, + WH64_UNARY_OPCODE_X1 = 38, + XORI_IMM8_OPCODE_X0 = 20, + XORI_IMM8_OPCODE_X1 = 45, + XOR_RRR_0_OPCODE_X0 = 160, + XOR_RRR_0_OPCODE_X1 = 107, + XOR_RRR_5_OPCODE_Y0 = 3, + XOR_RRR_5_OPCODE_Y1 = 3 +}; + + +#endif /* __ASSEMBLER__ */ + +#endif /* __ARCH_OPCODE_H__ */ diff --git a/arch/tile/include/uapi/arch/opcode_tilepro.h b/arch/tile/include/uapi/arch/opcode_tilepro.h new file mode 100644 index 00000000000..4451cff1a86 --- /dev/null +++ b/arch/tile/include/uapi/arch/opcode_tilepro.h @@ -0,0 +1,1472 @@ +/* TILEPro opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +#ifndef __ARCH_OPCODE_H__ +#define __ARCH_OPCODE_H__ + +#ifndef __ASSEMBLER__ + +typedef unsigned long long tilepro_bundle_bits; + +/* This is the bit that determines if a bundle is in the Y encoding. */ +#define TILEPRO_BUNDLE_Y_ENCODING_MASK ((tilepro_bundle_bits)1 << 63) + +enum +{ + /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */ + TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE = 3, + + /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */ + TILEPRO_NUM_PIPELINE_ENCODINGS = 5, + + /* Log base 2 of TILEPRO_BUNDLE_SIZE_IN_BYTES. */ + TILEPRO_LOG2_BUNDLE_SIZE_IN_BYTES = 3, + + /* Instructions take this many bytes. */ + TILEPRO_BUNDLE_SIZE_IN_BYTES = 1 << TILEPRO_LOG2_BUNDLE_SIZE_IN_BYTES, + + /* Log base 2 of TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES. */ + TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3, + + /* Bundles should be aligned modulo this number of bytes. */ + TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES = + (1 << TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES), + + /* Log base 2 of TILEPRO_SN_INSTRUCTION_SIZE_IN_BYTES. */ + TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES = 1, + + /* Static network instructions take this many bytes. */ + TILEPRO_SN_INSTRUCTION_SIZE_IN_BYTES = + (1 << TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES), + + /* Number of registers (some are magic, such as network I/O). */ + TILEPRO_NUM_REGISTERS = 64, + + /* Number of static network registers. */ + TILEPRO_NUM_SN_REGISTERS = 4 +}; + +/* Make a few "tile_" variables to simplify common code between + architectures. */ + +typedef tilepro_bundle_bits tile_bundle_bits; +#define TILE_BUNDLE_SIZE_IN_BYTES TILEPRO_BUNDLE_SIZE_IN_BYTES +#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES +#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \ + TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES +#define TILE_BPT_BUNDLE TILEPRO_BPT_BUNDLE + +/* 64-bit pattern for a { bpt ; nop } bundle. */ +#define TILEPRO_BPT_BUNDLE 0x400b3cae70166000ULL + +static __inline unsigned int +get_BrOff_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3ff); +} + +static __inline unsigned int +get_BrOff_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x00007fff) | + (((unsigned int)(n >> 20)) & 0x00018000); +} + +static __inline unsigned int +get_BrType_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0xf); +} + +static __inline unsigned int +get_Dest_Imm8_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 43)) & 0x000000c0); +} + +static __inline unsigned int +get_Dest_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 2)) & 0x3); +} + +static __inline unsigned int +get_Dest_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3f); +} + +static __inline unsigned int +get_Dest_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x3f); +} + +static __inline unsigned int +get_Dest_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3f); +} + +static __inline unsigned int +get_Dest_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x3f); +} + +static __inline unsigned int +get_Imm16_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xffff); +} + +static __inline unsigned int +get_Imm16_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xffff); +} + +static __inline unsigned int +get_Imm8_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0xff); +} + +static __inline unsigned int +get_Imm8_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xff); +} + +static __inline unsigned int +get_Imm8_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xff); +} + +static __inline unsigned int +get_Imm8_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xff); +} + +static __inline unsigned int +get_Imm8_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xff); +} + +static __inline unsigned int +get_ImmOpcodeExtension_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 20)) & 0x7f); +} + +static __inline unsigned int +get_ImmOpcodeExtension_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 51)) & 0x7f); +} + +static __inline unsigned int +get_ImmRROpcodeExtension_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 8)) & 0x3); +} + +static __inline unsigned int +get_JOffLong_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x00007fff) | + (((unsigned int)(n >> 20)) & 0x00018000) | + (((unsigned int)(n >> 14)) & 0x001e0000) | + (((unsigned int)(n >> 16)) & 0x07e00000) | + (((unsigned int)(n >> 31)) & 0x18000000); +} + +static __inline unsigned int +get_JOff_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x00007fff) | + (((unsigned int)(n >> 20)) & 0x00018000) | + (((unsigned int)(n >> 14)) & 0x001e0000) | + (((unsigned int)(n >> 16)) & 0x07e00000) | + (((unsigned int)(n >> 31)) & 0x08000000); +} + +static __inline unsigned int +get_MF_Imm15_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x00003fff) | + (((unsigned int)(n >> 44)) & 0x00004000); +} + +static __inline unsigned int +get_MMEnd_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x1f); +} + +static __inline unsigned int +get_MMEnd_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x1f); +} + +static __inline unsigned int +get_MMStart_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 23)) & 0x1f); +} + +static __inline unsigned int +get_MMStart_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 54)) & 0x1f); +} + +static __inline unsigned int +get_MT_Imm15_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 37)) & 0x00003fc0) | + (((unsigned int)(n >> 44)) & 0x00004000); +} + +static __inline unsigned int +get_Mode(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 63)) & 0x1); +} + +static __inline unsigned int +get_NoRegOpcodeExtension_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0xf); +} + +static __inline unsigned int +get_Opcode_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 10)) & 0x3f); +} + +static __inline unsigned int +get_Opcode_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 28)) & 0x7); +} + +static __inline unsigned int +get_Opcode_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 59)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 27)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 59)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y2(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 56)) & 0x7); +} + +static __inline unsigned int +get_RROpcodeExtension_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 4)) & 0xf); +} + +static __inline unsigned int +get_RRROpcodeExtension_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x1ff); +} + +static __inline unsigned int +get_RRROpcodeExtension_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x1ff); +} + +static __inline unsigned int +get_RRROpcodeExtension_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3); +} + +static __inline unsigned int +get_RRROpcodeExtension_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3); +} + +static __inline unsigned int +get_RouteOpcodeExtension_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3ff); +} + +static __inline unsigned int +get_S_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 27)) & 0x1); +} + +static __inline unsigned int +get_S_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 58)) & 0x1); +} + +static __inline unsigned int +get_ShAmt_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x1f); +} + +static __inline unsigned int +get_ShAmt_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x1f); +} + +static __inline unsigned int +get_ShAmt_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x1f); +} + +static __inline unsigned int +get_ShAmt_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x1f); +} + +static __inline unsigned int +get_SrcA_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 6)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 6)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y2(tilepro_bundle_bits n) +{ + return (((n >> 26)) & 0x00000001) | + (((unsigned int)(n >> 50)) & 0x0000003e); +} + +static __inline unsigned int +get_SrcBDest_Y2(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 20)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_Src_SN(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3); +} + +static __inline unsigned int +get_UnOpcodeExtension_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x1f); +} + +static __inline unsigned int +get_UnOpcodeExtension_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x1f); +} + +static __inline unsigned int +get_UnOpcodeExtension_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x1f); +} + +static __inline unsigned int +get_UnOpcodeExtension_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x1f); +} + +static __inline unsigned int +get_UnShOpcodeExtension_X0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 17)) & 0x3ff); +} + +static __inline unsigned int +get_UnShOpcodeExtension_X1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 48)) & 0x3ff); +} + +static __inline unsigned int +get_UnShOpcodeExtension_Y0(tilepro_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 17)) & 0x7); +} + +static __inline unsigned int +get_UnShOpcodeExtension_Y1(tilepro_bundle_bits n) +{ + return (((unsigned int)(n >> 48)) & 0x7); +} + + +static __inline int +sign_extend(int n, int num_bits) +{ + int shift = (int)(sizeof(int) * 8 - num_bits); + return (n << shift) >> shift; +} + + + +static __inline tilepro_bundle_bits +create_BrOff_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 0); +} + +static __inline tilepro_bundle_bits +create_BrOff_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) | + (((tilepro_bundle_bits)(n & 0x00018000)) << 20); +} + +static __inline tilepro_bundle_bits +create_BrType_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xf)) << 31); +} + +static __inline tilepro_bundle_bits +create_Dest_Imm8_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilepro_bundle_bits)(n & 0x000000c0)) << 43); +} + +static __inline tilepro_bundle_bits +create_Dest_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 2); +} + +static __inline tilepro_bundle_bits +create_Dest_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 0); +} + +static __inline tilepro_bundle_bits +create_Dest_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 31); +} + +static __inline tilepro_bundle_bits +create_Dest_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 0); +} + +static __inline tilepro_bundle_bits +create_Dest_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 31); +} + +static __inline tilepro_bundle_bits +create_Imm16_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xffff) << 12); +} + +static __inline tilepro_bundle_bits +create_Imm16_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xffff)) << 43); +} + +static __inline tilepro_bundle_bits +create_Imm8_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 0); +} + +static __inline tilepro_bundle_bits +create_Imm8_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 12); +} + +static __inline tilepro_bundle_bits +create_Imm8_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xff)) << 43); +} + +static __inline tilepro_bundle_bits +create_Imm8_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 12); +} + +static __inline tilepro_bundle_bits +create_Imm8_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xff)) << 43); +} + +static __inline tilepro_bundle_bits +create_ImmOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x7f) << 20); +} + +static __inline tilepro_bundle_bits +create_ImmOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x7f)) << 51); +} + +static __inline tilepro_bundle_bits +create_ImmRROpcodeExtension_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 8); +} + +static __inline tilepro_bundle_bits +create_JOffLong_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) | + (((tilepro_bundle_bits)(n & 0x00018000)) << 20) | + (((tilepro_bundle_bits)(n & 0x001e0000)) << 14) | + (((tilepro_bundle_bits)(n & 0x07e00000)) << 16) | + (((tilepro_bundle_bits)(n & 0x18000000)) << 31); +} + +static __inline tilepro_bundle_bits +create_JOff_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x00007fff)) << 43) | + (((tilepro_bundle_bits)(n & 0x00018000)) << 20) | + (((tilepro_bundle_bits)(n & 0x001e0000)) << 14) | + (((tilepro_bundle_bits)(n & 0x07e00000)) << 16) | + (((tilepro_bundle_bits)(n & 0x08000000)) << 31); +} + +static __inline tilepro_bundle_bits +create_MF_Imm15_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x00003fff)) << 37) | + (((tilepro_bundle_bits)(n & 0x00004000)) << 44); +} + +static __inline tilepro_bundle_bits +create_MMEnd_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 18); +} + +static __inline tilepro_bundle_bits +create_MMEnd_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 49); +} + +static __inline tilepro_bundle_bits +create_MMStart_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 23); +} + +static __inline tilepro_bundle_bits +create_MMStart_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 54); +} + +static __inline tilepro_bundle_bits +create_MT_Imm15_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilepro_bundle_bits)(n & 0x00003fc0)) << 37) | + (((tilepro_bundle_bits)(n & 0x00004000)) << 44); +} + +static __inline tilepro_bundle_bits +create_Mode(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1)) << 63); +} + +static __inline tilepro_bundle_bits +create_NoRegOpcodeExtension_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 0); +} + +static __inline tilepro_bundle_bits +create_Opcode_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 10); +} + +static __inline tilepro_bundle_bits +create_Opcode_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x7) << 28); +} + +static __inline tilepro_bundle_bits +create_Opcode_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xf)) << 59); +} + +static __inline tilepro_bundle_bits +create_Opcode_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 27); +} + +static __inline tilepro_bundle_bits +create_Opcode_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0xf)) << 59); +} + +static __inline tilepro_bundle_bits +create_Opcode_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x7)) << 56); +} + +static __inline tilepro_bundle_bits +create_RROpcodeExtension_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 4); +} + +static __inline tilepro_bundle_bits +create_RRROpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1ff) << 18); +} + +static __inline tilepro_bundle_bits +create_RRROpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1ff)) << 49); +} + +static __inline tilepro_bundle_bits +create_RRROpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 18); +} + +static __inline tilepro_bundle_bits +create_RRROpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3)) << 49); +} + +static __inline tilepro_bundle_bits +create_RouteOpcodeExtension_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 0); +} + +static __inline tilepro_bundle_bits +create_S_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1) << 27); +} + +static __inline tilepro_bundle_bits +create_S_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1)) << 58); +} + +static __inline tilepro_bundle_bits +create_ShAmt_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 12); +} + +static __inline tilepro_bundle_bits +create_ShAmt_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 43); +} + +static __inline tilepro_bundle_bits +create_ShAmt_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 12); +} + +static __inline tilepro_bundle_bits +create_ShAmt_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 43); +} + +static __inline tilepro_bundle_bits +create_SrcA_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 6); +} + +static __inline tilepro_bundle_bits +create_SrcA_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 37); +} + +static __inline tilepro_bundle_bits +create_SrcA_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 6); +} + +static __inline tilepro_bundle_bits +create_SrcA_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 37); +} + +static __inline tilepro_bundle_bits +create_SrcA_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x00000001) << 26) | + (((tilepro_bundle_bits)(n & 0x0000003e)) << 50); +} + +static __inline tilepro_bundle_bits +create_SrcBDest_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 20); +} + +static __inline tilepro_bundle_bits +create_SrcB_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilepro_bundle_bits +create_SrcB_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilepro_bundle_bits +create_SrcB_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilepro_bundle_bits +create_SrcB_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilepro_bundle_bits +create_Src_SN(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 0); +} + +static __inline tilepro_bundle_bits +create_UnOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 12); +} + +static __inline tilepro_bundle_bits +create_UnOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 43); +} + +static __inline tilepro_bundle_bits +create_UnOpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x1f) << 12); +} + +static __inline tilepro_bundle_bits +create_UnOpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x1f)) << 43); +} + +static __inline tilepro_bundle_bits +create_UnShOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 17); +} + +static __inline tilepro_bundle_bits +create_UnShOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x3ff)) << 48); +} + +static __inline tilepro_bundle_bits +create_UnShOpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x7) << 17); +} + +static __inline tilepro_bundle_bits +create_UnShOpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilepro_bundle_bits)(n & 0x7)) << 48); +} + + +enum +{ + ADDBS_U_SPECIAL_0_OPCODE_X0 = 98, + ADDBS_U_SPECIAL_0_OPCODE_X1 = 68, + ADDB_SPECIAL_0_OPCODE_X0 = 1, + ADDB_SPECIAL_0_OPCODE_X1 = 1, + ADDHS_SPECIAL_0_OPCODE_X0 = 99, + ADDHS_SPECIAL_0_OPCODE_X1 = 69, + ADDH_SPECIAL_0_OPCODE_X0 = 2, + ADDH_SPECIAL_0_OPCODE_X1 = 2, + ADDIB_IMM_0_OPCODE_X0 = 1, + ADDIB_IMM_0_OPCODE_X1 = 1, + ADDIH_IMM_0_OPCODE_X0 = 2, + ADDIH_IMM_0_OPCODE_X1 = 2, + ADDI_IMM_0_OPCODE_X0 = 3, + ADDI_IMM_0_OPCODE_X1 = 3, + ADDI_IMM_1_OPCODE_SN = 1, + ADDI_OPCODE_Y0 = 9, + ADDI_OPCODE_Y1 = 7, + ADDLIS_OPCODE_X0 = 1, + ADDLIS_OPCODE_X1 = 2, + ADDLI_OPCODE_X0 = 2, + ADDLI_OPCODE_X1 = 3, + ADDS_SPECIAL_0_OPCODE_X0 = 96, + ADDS_SPECIAL_0_OPCODE_X1 = 66, + ADD_SPECIAL_0_OPCODE_X0 = 3, + ADD_SPECIAL_0_OPCODE_X1 = 3, + ADD_SPECIAL_0_OPCODE_Y0 = 0, + ADD_SPECIAL_0_OPCODE_Y1 = 0, + ADIFFB_U_SPECIAL_0_OPCODE_X0 = 4, + ADIFFH_SPECIAL_0_OPCODE_X0 = 5, + ANDI_IMM_0_OPCODE_X0 = 1, + ANDI_IMM_0_OPCODE_X1 = 4, + ANDI_OPCODE_Y0 = 10, + ANDI_OPCODE_Y1 = 8, + AND_SPECIAL_0_OPCODE_X0 = 6, + AND_SPECIAL_0_OPCODE_X1 = 4, + AND_SPECIAL_2_OPCODE_Y0 = 0, + AND_SPECIAL_2_OPCODE_Y1 = 0, + AULI_OPCODE_X0 = 3, + AULI_OPCODE_X1 = 4, + AVGB_U_SPECIAL_0_OPCODE_X0 = 7, + AVGH_SPECIAL_0_OPCODE_X0 = 8, + BBNST_BRANCH_OPCODE_X1 = 15, + BBNS_BRANCH_OPCODE_X1 = 14, + BBNS_OPCODE_SN = 63, + BBST_BRANCH_OPCODE_X1 = 13, + BBS_BRANCH_OPCODE_X1 = 12, + BBS_OPCODE_SN = 62, + BGEZT_BRANCH_OPCODE_X1 = 7, + BGEZ_BRANCH_OPCODE_X1 = 6, + BGEZ_OPCODE_SN = 61, + BGZT_BRANCH_OPCODE_X1 = 5, + BGZ_BRANCH_OPCODE_X1 = 4, + BGZ_OPCODE_SN = 58, + BITX_UN_0_SHUN_0_OPCODE_X0 = 1, + BITX_UN_0_SHUN_0_OPCODE_Y0 = 1, + BLEZT_BRANCH_OPCODE_X1 = 11, + BLEZ_BRANCH_OPCODE_X1 = 10, + BLEZ_OPCODE_SN = 59, + BLZT_BRANCH_OPCODE_X1 = 9, + BLZ_BRANCH_OPCODE_X1 = 8, + BLZ_OPCODE_SN = 60, + BNZT_BRANCH_OPCODE_X1 = 3, + BNZ_BRANCH_OPCODE_X1 = 2, + BNZ_OPCODE_SN = 57, + BPT_NOREG_RR_IMM_0_OPCODE_SN = 1, + BRANCH_OPCODE_X1 = 5, + BYTEX_UN_0_SHUN_0_OPCODE_X0 = 2, + BYTEX_UN_0_SHUN_0_OPCODE_Y0 = 2, + BZT_BRANCH_OPCODE_X1 = 1, + BZ_BRANCH_OPCODE_X1 = 0, + BZ_OPCODE_SN = 56, + CLZ_UN_0_SHUN_0_OPCODE_X0 = 3, + CLZ_UN_0_SHUN_0_OPCODE_Y0 = 3, + CRC32_32_SPECIAL_0_OPCODE_X0 = 9, + CRC32_8_SPECIAL_0_OPCODE_X0 = 10, + CTZ_UN_0_SHUN_0_OPCODE_X0 = 4, + CTZ_UN_0_SHUN_0_OPCODE_Y0 = 4, + DRAIN_UN_0_SHUN_0_OPCODE_X1 = 1, + DTLBPR_UN_0_SHUN_0_OPCODE_X1 = 2, + DWORD_ALIGN_SPECIAL_0_OPCODE_X0 = 95, + FINV_UN_0_SHUN_0_OPCODE_X1 = 3, + FLUSH_UN_0_SHUN_0_OPCODE_X1 = 4, + FNOP_NOREG_RR_IMM_0_OPCODE_SN = 3, + FNOP_UN_0_SHUN_0_OPCODE_X0 = 5, + FNOP_UN_0_SHUN_0_OPCODE_X1 = 5, + FNOP_UN_0_SHUN_0_OPCODE_Y0 = 5, + FNOP_UN_0_SHUN_0_OPCODE_Y1 = 1, + HALT_NOREG_RR_IMM_0_OPCODE_SN = 0, + ICOH_UN_0_SHUN_0_OPCODE_X1 = 6, + ILL_UN_0_SHUN_0_OPCODE_X1 = 7, + ILL_UN_0_SHUN_0_OPCODE_Y1 = 2, + IMM_0_OPCODE_SN = 0, + IMM_0_OPCODE_X0 = 4, + IMM_0_OPCODE_X1 = 6, + IMM_1_OPCODE_SN = 1, + IMM_OPCODE_0_X0 = 5, + INTHB_SPECIAL_0_OPCODE_X0 = 11, + INTHB_SPECIAL_0_OPCODE_X1 = 5, + INTHH_SPECIAL_0_OPCODE_X0 = 12, + INTHH_SPECIAL_0_OPCODE_X1 = 6, + INTLB_SPECIAL_0_OPCODE_X0 = 13, + INTLB_SPECIAL_0_OPCODE_X1 = 7, + INTLH_SPECIAL_0_OPCODE_X0 = 14, + INTLH_SPECIAL_0_OPCODE_X1 = 8, + INV_UN_0_SHUN_0_OPCODE_X1 = 8, + IRET_UN_0_SHUN_0_OPCODE_X1 = 9, + JALB_OPCODE_X1 = 13, + JALF_OPCODE_X1 = 12, + JALRP_SPECIAL_0_OPCODE_X1 = 9, + JALRR_IMM_1_OPCODE_SN = 3, + JALR_RR_IMM_0_OPCODE_SN = 5, + JALR_SPECIAL_0_OPCODE_X1 = 10, + JB_OPCODE_X1 = 11, + JF_OPCODE_X1 = 10, + JRP_SPECIAL_0_OPCODE_X1 = 11, + JRR_IMM_1_OPCODE_SN = 2, + JR_RR_IMM_0_OPCODE_SN = 4, + JR_SPECIAL_0_OPCODE_X1 = 12, + LBADD_IMM_0_OPCODE_X1 = 22, + LBADD_U_IMM_0_OPCODE_X1 = 23, + LB_OPCODE_Y2 = 0, + LB_UN_0_SHUN_0_OPCODE_X1 = 10, + LB_U_OPCODE_Y2 = 1, + LB_U_UN_0_SHUN_0_OPCODE_X1 = 11, + LHADD_IMM_0_OPCODE_X1 = 24, + LHADD_U_IMM_0_OPCODE_X1 = 25, + LH_OPCODE_Y2 = 2, + LH_UN_0_SHUN_0_OPCODE_X1 = 12, + LH_U_OPCODE_Y2 = 3, + LH_U_UN_0_SHUN_0_OPCODE_X1 = 13, + LNK_SPECIAL_0_OPCODE_X1 = 13, + LWADD_IMM_0_OPCODE_X1 = 26, + LWADD_NA_IMM_0_OPCODE_X1 = 27, + LW_NA_UN_0_SHUN_0_OPCODE_X1 = 24, + LW_OPCODE_Y2 = 4, + LW_UN_0_SHUN_0_OPCODE_X1 = 14, + MAXB_U_SPECIAL_0_OPCODE_X0 = 15, + MAXB_U_SPECIAL_0_OPCODE_X1 = 14, + MAXH_SPECIAL_0_OPCODE_X0 = 16, + MAXH_SPECIAL_0_OPCODE_X1 = 15, + MAXIB_U_IMM_0_OPCODE_X0 = 4, + MAXIB_U_IMM_0_OPCODE_X1 = 5, + MAXIH_IMM_0_OPCODE_X0 = 5, + MAXIH_IMM_0_OPCODE_X1 = 6, + MFSPR_IMM_0_OPCODE_X1 = 7, + MF_UN_0_SHUN_0_OPCODE_X1 = 15, + MINB_U_SPECIAL_0_OPCODE_X0 = 17, + MINB_U_SPECIAL_0_OPCODE_X1 = 16, + MINH_SPECIAL_0_OPCODE_X0 = 18, + MINH_SPECIAL_0_OPCODE_X1 = 17, + MINIB_U_IMM_0_OPCODE_X0 = 6, + MINIB_U_IMM_0_OPCODE_X1 = 8, + MINIH_IMM_0_OPCODE_X0 = 7, + MINIH_IMM_0_OPCODE_X1 = 9, + MM_OPCODE_X0 = 6, + MM_OPCODE_X1 = 7, + MNZB_SPECIAL_0_OPCODE_X0 = 19, + MNZB_SPECIAL_0_OPCODE_X1 = 18, + MNZH_SPECIAL_0_OPCODE_X0 = 20, + MNZH_SPECIAL_0_OPCODE_X1 = 19, + MNZ_SPECIAL_0_OPCODE_X0 = 21, + MNZ_SPECIAL_0_OPCODE_X1 = 20, + MNZ_SPECIAL_1_OPCODE_Y0 = 0, + MNZ_SPECIAL_1_OPCODE_Y1 = 1, + MOVEI_IMM_1_OPCODE_SN = 0, + MOVE_RR_IMM_0_OPCODE_SN = 8, + MTSPR_IMM_0_OPCODE_X1 = 10, + MULHHA_SS_SPECIAL_0_OPCODE_X0 = 22, + MULHHA_SS_SPECIAL_7_OPCODE_Y0 = 0, + MULHHA_SU_SPECIAL_0_OPCODE_X0 = 23, + MULHHA_UU_SPECIAL_0_OPCODE_X0 = 24, + MULHHA_UU_SPECIAL_7_OPCODE_Y0 = 1, + MULHHSA_UU_SPECIAL_0_OPCODE_X0 = 25, + MULHH_SS_SPECIAL_0_OPCODE_X0 = 26, + MULHH_SS_SPECIAL_6_OPCODE_Y0 = 0, + MULHH_SU_SPECIAL_0_OPCODE_X0 = 27, + MULHH_UU_SPECIAL_0_OPCODE_X0 = 28, + MULHH_UU_SPECIAL_6_OPCODE_Y0 = 1, + MULHLA_SS_SPECIAL_0_OPCODE_X0 = 29, + MULHLA_SU_SPECIAL_0_OPCODE_X0 = 30, + MULHLA_US_SPECIAL_0_OPCODE_X0 = 31, + MULHLA_UU_SPECIAL_0_OPCODE_X0 = 32, + MULHLSA_UU_SPECIAL_0_OPCODE_X0 = 33, + MULHLSA_UU_SPECIAL_5_OPCODE_Y0 = 0, + MULHL_SS_SPECIAL_0_OPCODE_X0 = 34, + MULHL_SU_SPECIAL_0_OPCODE_X0 = 35, + MULHL_US_SPECIAL_0_OPCODE_X0 = 36, + MULHL_UU_SPECIAL_0_OPCODE_X0 = 37, + MULLLA_SS_SPECIAL_0_OPCODE_X0 = 38, + MULLLA_SS_SPECIAL_7_OPCODE_Y0 = 2, + MULLLA_SU_SPECIAL_0_OPCODE_X0 = 39, + MULLLA_UU_SPECIAL_0_OPCODE_X0 = 40, + MULLLA_UU_SPECIAL_7_OPCODE_Y0 = 3, + MULLLSA_UU_SPECIAL_0_OPCODE_X0 = 41, + MULLL_SS_SPECIAL_0_OPCODE_X0 = 42, + MULLL_SS_SPECIAL_6_OPCODE_Y0 = 2, + MULLL_SU_SPECIAL_0_OPCODE_X0 = 43, + MULLL_UU_SPECIAL_0_OPCODE_X0 = 44, + MULLL_UU_SPECIAL_6_OPCODE_Y0 = 3, + MVNZ_SPECIAL_0_OPCODE_X0 = 45, + MVNZ_SPECIAL_1_OPCODE_Y0 = 1, + MVZ_SPECIAL_0_OPCODE_X0 = 46, + MVZ_SPECIAL_1_OPCODE_Y0 = 2, + MZB_SPECIAL_0_OPCODE_X0 = 47, + MZB_SPECIAL_0_OPCODE_X1 = 21, + MZH_SPECIAL_0_OPCODE_X0 = 48, + MZH_SPECIAL_0_OPCODE_X1 = 22, + MZ_SPECIAL_0_OPCODE_X0 = 49, + MZ_SPECIAL_0_OPCODE_X1 = 23, + MZ_SPECIAL_1_OPCODE_Y0 = 3, + MZ_SPECIAL_1_OPCODE_Y1 = 2, + NAP_UN_0_SHUN_0_OPCODE_X1 = 16, + NOP_NOREG_RR_IMM_0_OPCODE_SN = 2, + NOP_UN_0_SHUN_0_OPCODE_X0 = 6, + NOP_UN_0_SHUN_0_OPCODE_X1 = 17, + NOP_UN_0_SHUN_0_OPCODE_Y0 = 6, + NOP_UN_0_SHUN_0_OPCODE_Y1 = 3, + NOREG_RR_IMM_0_OPCODE_SN = 0, + NOR_SPECIAL_0_OPCODE_X0 = 50, + NOR_SPECIAL_0_OPCODE_X1 = 24, + NOR_SPECIAL_2_OPCODE_Y0 = 1, + NOR_SPECIAL_2_OPCODE_Y1 = 1, + ORI_IMM_0_OPCODE_X0 = 8, + ORI_IMM_0_OPCODE_X1 = 11, + ORI_OPCODE_Y0 = 11, + ORI_OPCODE_Y1 = 9, + OR_SPECIAL_0_OPCODE_X0 = 51, + OR_SPECIAL_0_OPCODE_X1 = 25, + OR_SPECIAL_2_OPCODE_Y0 = 2, + OR_SPECIAL_2_OPCODE_Y1 = 2, + PACKBS_U_SPECIAL_0_OPCODE_X0 = 103, + PACKBS_U_SPECIAL_0_OPCODE_X1 = 73, + PACKHB_SPECIAL_0_OPCODE_X0 = 52, + PACKHB_SPECIAL_0_OPCODE_X1 = 26, + PACKHS_SPECIAL_0_OPCODE_X0 = 102, + PACKHS_SPECIAL_0_OPCODE_X1 = 72, + PACKLB_SPECIAL_0_OPCODE_X0 = 53, + PACKLB_SPECIAL_0_OPCODE_X1 = 27, + PCNT_UN_0_SHUN_0_OPCODE_X0 = 7, + PCNT_UN_0_SHUN_0_OPCODE_Y0 = 7, + RLI_SHUN_0_OPCODE_X0 = 1, + RLI_SHUN_0_OPCODE_X1 = 1, + RLI_SHUN_0_OPCODE_Y0 = 1, + RLI_SHUN_0_OPCODE_Y1 = 1, + RL_SPECIAL_0_OPCODE_X0 = 54, + RL_SPECIAL_0_OPCODE_X1 = 28, + RL_SPECIAL_3_OPCODE_Y0 = 0, + RL_SPECIAL_3_OPCODE_Y1 = 0, + RR_IMM_0_OPCODE_SN = 0, + S1A_SPECIAL_0_OPCODE_X0 = 55, + S1A_SPECIAL_0_OPCODE_X1 = 29, + S1A_SPECIAL_0_OPCODE_Y0 = 1, + S1A_SPECIAL_0_OPCODE_Y1 = 1, + S2A_SPECIAL_0_OPCODE_X0 = 56, + S2A_SPECIAL_0_OPCODE_X1 = 30, + S2A_SPECIAL_0_OPCODE_Y0 = 2, + S2A_SPECIAL_0_OPCODE_Y1 = 2, + S3A_SPECIAL_0_OPCODE_X0 = 57, + S3A_SPECIAL_0_OPCODE_X1 = 31, + S3A_SPECIAL_5_OPCODE_Y0 = 1, + S3A_SPECIAL_5_OPCODE_Y1 = 1, + SADAB_U_SPECIAL_0_OPCODE_X0 = 58, + SADAH_SPECIAL_0_OPCODE_X0 = 59, + SADAH_U_SPECIAL_0_OPCODE_X0 = 60, + SADB_U_SPECIAL_0_OPCODE_X0 = 61, + SADH_SPECIAL_0_OPCODE_X0 = 62, + SADH_U_SPECIAL_0_OPCODE_X0 = 63, + SBADD_IMM_0_OPCODE_X1 = 28, + SB_OPCODE_Y2 = 5, + SB_SPECIAL_0_OPCODE_X1 = 32, + SEQB_SPECIAL_0_OPCODE_X0 = 64, + SEQB_SPECIAL_0_OPCODE_X1 = 33, + SEQH_SPECIAL_0_OPCODE_X0 = 65, + SEQH_SPECIAL_0_OPCODE_X1 = 34, + SEQIB_IMM_0_OPCODE_X0 = 9, + SEQIB_IMM_0_OPCODE_X1 = 12, + SEQIH_IMM_0_OPCODE_X0 = 10, + SEQIH_IMM_0_OPCODE_X1 = 13, + SEQI_IMM_0_OPCODE_X0 = 11, + SEQI_IMM_0_OPCODE_X1 = 14, + SEQI_OPCODE_Y0 = 12, + SEQI_OPCODE_Y1 = 10, + SEQ_SPECIAL_0_OPCODE_X0 = 66, + SEQ_SPECIAL_0_OPCODE_X1 = 35, + SEQ_SPECIAL_5_OPCODE_Y0 = 2, + SEQ_SPECIAL_5_OPCODE_Y1 = 2, + SHADD_IMM_0_OPCODE_X1 = 29, + SHL8II_IMM_0_OPCODE_SN = 3, + SHLB_SPECIAL_0_OPCODE_X0 = 67, + SHLB_SPECIAL_0_OPCODE_X1 = 36, + SHLH_SPECIAL_0_OPCODE_X0 = 68, + SHLH_SPECIAL_0_OPCODE_X1 = 37, + SHLIB_SHUN_0_OPCODE_X0 = 2, + SHLIB_SHUN_0_OPCODE_X1 = 2, + SHLIH_SHUN_0_OPCODE_X0 = 3, + SHLIH_SHUN_0_OPCODE_X1 = 3, + SHLI_SHUN_0_OPCODE_X0 = 4, + SHLI_SHUN_0_OPCODE_X1 = 4, + SHLI_SHUN_0_OPCODE_Y0 = 2, + SHLI_SHUN_0_OPCODE_Y1 = 2, + SHL_SPECIAL_0_OPCODE_X0 = 69, + SHL_SPECIAL_0_OPCODE_X1 = 38, + SHL_SPECIAL_3_OPCODE_Y0 = 1, + SHL_SPECIAL_3_OPCODE_Y1 = 1, + SHR1_RR_IMM_0_OPCODE_SN = 9, + SHRB_SPECIAL_0_OPCODE_X0 = 70, + SHRB_SPECIAL_0_OPCODE_X1 = 39, + SHRH_SPECIAL_0_OPCODE_X0 = 71, + SHRH_SPECIAL_0_OPCODE_X1 = 40, + SHRIB_SHUN_0_OPCODE_X0 = 5, + SHRIB_SHUN_0_OPCODE_X1 = 5, + SHRIH_SHUN_0_OPCODE_X0 = 6, + SHRIH_SHUN_0_OPCODE_X1 = 6, + SHRI_SHUN_0_OPCODE_X0 = 7, + SHRI_SHUN_0_OPCODE_X1 = 7, + SHRI_SHUN_0_OPCODE_Y0 = 3, + SHRI_SHUN_0_OPCODE_Y1 = 3, + SHR_SPECIAL_0_OPCODE_X0 = 72, + SHR_SPECIAL_0_OPCODE_X1 = 41, + SHR_SPECIAL_3_OPCODE_Y0 = 2, + SHR_SPECIAL_3_OPCODE_Y1 = 2, + SHUN_0_OPCODE_X0 = 7, + SHUN_0_OPCODE_X1 = 8, + SHUN_0_OPCODE_Y0 = 13, + SHUN_0_OPCODE_Y1 = 11, + SH_OPCODE_Y2 = 6, + SH_SPECIAL_0_OPCODE_X1 = 42, + SLTB_SPECIAL_0_OPCODE_X0 = 73, + SLTB_SPECIAL_0_OPCODE_X1 = 43, + SLTB_U_SPECIAL_0_OPCODE_X0 = 74, + SLTB_U_SPECIAL_0_OPCODE_X1 = 44, + SLTEB_SPECIAL_0_OPCODE_X0 = 75, + SLTEB_SPECIAL_0_OPCODE_X1 = 45, + SLTEB_U_SPECIAL_0_OPCODE_X0 = 76, + SLTEB_U_SPECIAL_0_OPCODE_X1 = 46, + SLTEH_SPECIAL_0_OPCODE_X0 = 77, + SLTEH_SPECIAL_0_OPCODE_X1 = 47, + SLTEH_U_SPECIAL_0_OPCODE_X0 = 78, + SLTEH_U_SPECIAL_0_OPCODE_X1 = 48, + SLTE_SPECIAL_0_OPCODE_X0 = 79, + SLTE_SPECIAL_0_OPCODE_X1 = 49, + SLTE_SPECIAL_4_OPCODE_Y0 = 0, + SLTE_SPECIAL_4_OPCODE_Y1 = 0, + SLTE_U_SPECIAL_0_OPCODE_X0 = 80, + SLTE_U_SPECIAL_0_OPCODE_X1 = 50, + SLTE_U_SPECIAL_4_OPCODE_Y0 = 1, + SLTE_U_SPECIAL_4_OPCODE_Y1 = 1, + SLTH_SPECIAL_0_OPCODE_X0 = 81, + SLTH_SPECIAL_0_OPCODE_X1 = 51, + SLTH_U_SPECIAL_0_OPCODE_X0 = 82, + SLTH_U_SPECIAL_0_OPCODE_X1 = 52, + SLTIB_IMM_0_OPCODE_X0 = 12, + SLTIB_IMM_0_OPCODE_X1 = 15, + SLTIB_U_IMM_0_OPCODE_X0 = 13, + SLTIB_U_IMM_0_OPCODE_X1 = 16, + SLTIH_IMM_0_OPCODE_X0 = 14, + SLTIH_IMM_0_OPCODE_X1 = 17, + SLTIH_U_IMM_0_OPCODE_X0 = 15, + SLTIH_U_IMM_0_OPCODE_X1 = 18, + SLTI_IMM_0_OPCODE_X0 = 16, + SLTI_IMM_0_OPCODE_X1 = 19, + SLTI_OPCODE_Y0 = 14, + SLTI_OPCODE_Y1 = 12, + SLTI_U_IMM_0_OPCODE_X0 = 17, + SLTI_U_IMM_0_OPCODE_X1 = 20, + SLTI_U_OPCODE_Y0 = 15, + SLTI_U_OPCODE_Y1 = 13, + SLT_SPECIAL_0_OPCODE_X0 = 83, + SLT_SPECIAL_0_OPCODE_X1 = 53, + SLT_SPECIAL_4_OPCODE_Y0 = 2, + SLT_SPECIAL_4_OPCODE_Y1 = 2, + SLT_U_SPECIAL_0_OPCODE_X0 = 84, + SLT_U_SPECIAL_0_OPCODE_X1 = 54, + SLT_U_SPECIAL_4_OPCODE_Y0 = 3, + SLT_U_SPECIAL_4_OPCODE_Y1 = 3, + SNEB_SPECIAL_0_OPCODE_X0 = 85, + SNEB_SPECIAL_0_OPCODE_X1 = 55, + SNEH_SPECIAL_0_OPCODE_X0 = 86, + SNEH_SPECIAL_0_OPCODE_X1 = 56, + SNE_SPECIAL_0_OPCODE_X0 = 87, + SNE_SPECIAL_0_OPCODE_X1 = 57, + SNE_SPECIAL_5_OPCODE_Y0 = 3, + SNE_SPECIAL_5_OPCODE_Y1 = 3, + SPECIAL_0_OPCODE_X0 = 0, + SPECIAL_0_OPCODE_X1 = 1, + SPECIAL_0_OPCODE_Y0 = 1, + SPECIAL_0_OPCODE_Y1 = 1, + SPECIAL_1_OPCODE_Y0 = 2, + SPECIAL_1_OPCODE_Y1 = 2, + SPECIAL_2_OPCODE_Y0 = 3, + SPECIAL_2_OPCODE_Y1 = 3, + SPECIAL_3_OPCODE_Y0 = 4, + SPECIAL_3_OPCODE_Y1 = 4, + SPECIAL_4_OPCODE_Y0 = 5, + SPECIAL_4_OPCODE_Y1 = 5, + SPECIAL_5_OPCODE_Y0 = 6, + SPECIAL_5_OPCODE_Y1 = 6, + SPECIAL_6_OPCODE_Y0 = 7, + SPECIAL_7_OPCODE_Y0 = 8, + SRAB_SPECIAL_0_OPCODE_X0 = 88, + SRAB_SPECIAL_0_OPCODE_X1 = 58, + SRAH_SPECIAL_0_OPCODE_X0 = 89, + SRAH_SPECIAL_0_OPCODE_X1 = 59, + SRAIB_SHUN_0_OPCODE_X0 = 8, + SRAIB_SHUN_0_OPCODE_X1 = 8, + SRAIH_SHUN_0_OPCODE_X0 = 9, + SRAIH_SHUN_0_OPCODE_X1 = 9, + SRAI_SHUN_0_OPCODE_X0 = 10, + SRAI_SHUN_0_OPCODE_X1 = 10, + SRAI_SHUN_0_OPCODE_Y0 = 4, + SRAI_SHUN_0_OPCODE_Y1 = 4, + SRA_SPECIAL_0_OPCODE_X0 = 90, + SRA_SPECIAL_0_OPCODE_X1 = 60, + SRA_SPECIAL_3_OPCODE_Y0 = 3, + SRA_SPECIAL_3_OPCODE_Y1 = 3, + SUBBS_U_SPECIAL_0_OPCODE_X0 = 100, + SUBBS_U_SPECIAL_0_OPCODE_X1 = 70, + SUBB_SPECIAL_0_OPCODE_X0 = 91, + SUBB_SPECIAL_0_OPCODE_X1 = 61, + SUBHS_SPECIAL_0_OPCODE_X0 = 101, + SUBHS_SPECIAL_0_OPCODE_X1 = 71, + SUBH_SPECIAL_0_OPCODE_X0 = 92, + SUBH_SPECIAL_0_OPCODE_X1 = 62, + SUBS_SPECIAL_0_OPCODE_X0 = 97, + SUBS_SPECIAL_0_OPCODE_X1 = 67, + SUB_SPECIAL_0_OPCODE_X0 = 93, + SUB_SPECIAL_0_OPCODE_X1 = 63, + SUB_SPECIAL_0_OPCODE_Y0 = 3, + SUB_SPECIAL_0_OPCODE_Y1 = 3, + SWADD_IMM_0_OPCODE_X1 = 30, + SWINT0_UN_0_SHUN_0_OPCODE_X1 = 18, + SWINT1_UN_0_SHUN_0_OPCODE_X1 = 19, + SWINT2_UN_0_SHUN_0_OPCODE_X1 = 20, + SWINT3_UN_0_SHUN_0_OPCODE_X1 = 21, + SW_OPCODE_Y2 = 7, + SW_SPECIAL_0_OPCODE_X1 = 64, + TBLIDXB0_UN_0_SHUN_0_OPCODE_X0 = 8, + TBLIDXB0_UN_0_SHUN_0_OPCODE_Y0 = 8, + TBLIDXB1_UN_0_SHUN_0_OPCODE_X0 = 9, + TBLIDXB1_UN_0_SHUN_0_OPCODE_Y0 = 9, + TBLIDXB2_UN_0_SHUN_0_OPCODE_X0 = 10, + TBLIDXB2_UN_0_SHUN_0_OPCODE_Y0 = 10, + TBLIDXB3_UN_0_SHUN_0_OPCODE_X0 = 11, + TBLIDXB3_UN_0_SHUN_0_OPCODE_Y0 = 11, + TNS_UN_0_SHUN_0_OPCODE_X1 = 22, + UN_0_SHUN_0_OPCODE_X0 = 11, + UN_0_SHUN_0_OPCODE_X1 = 11, + UN_0_SHUN_0_OPCODE_Y0 = 5, + UN_0_SHUN_0_OPCODE_Y1 = 5, + WH64_UN_0_SHUN_0_OPCODE_X1 = 23, + XORI_IMM_0_OPCODE_X0 = 2, + XORI_IMM_0_OPCODE_X1 = 21, + XOR_SPECIAL_0_OPCODE_X0 = 94, + XOR_SPECIAL_0_OPCODE_X1 = 65, + XOR_SPECIAL_2_OPCODE_Y0 = 3, + XOR_SPECIAL_2_OPCODE_Y1 = 3 +}; + + +#endif /* __ASSEMBLER__ */ + +#endif /* __ARCH_OPCODE_H__ */ diff --git a/arch/tile/include/arch/sim.h b/arch/tile/include/uapi/arch/sim.h index e54b7b0527f..e54b7b0527f 100644 --- a/arch/tile/include/arch/sim.h +++ b/arch/tile/include/uapi/arch/sim.h diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/uapi/arch/sim_def.h index 4b44a2b6a09..4b44a2b6a09 100644 --- a/arch/tile/include/arch/sim_def.h +++ b/arch/tile/include/uapi/arch/sim_def.h diff --git a/arch/tile/include/asm/opcode_constants.h b/arch/tile/include/uapi/arch/spr_def.h index 37a9f2958cb..c250c5adb1a 100644 --- a/arch/tile/include/asm/opcode_constants.h +++ b/arch/tile/include/uapi/arch/spr_def.h @@ -12,15 +12,15 @@ * more details. */ -#ifndef _ASM_TILE_OPCODE_CONSTANTS_H -#define _ASM_TILE_OPCODE_CONSTANTS_H +#ifndef _UAPI__ARCH_SPR_DEF_H__ +#define _UAPI__ARCH_SPR_DEF_H__ -#include <arch/chip.h> - -#if CHIP_WORD_SIZE() == 64 -#include <asm/opcode_constants_64.h> +/* Include the proper base SPR definition file. */ +#ifdef __tilegx__ +#include <arch/spr_def_64.h> #else -#include <asm/opcode_constants_32.h> +#include <arch/spr_def_32.h> #endif -#endif /* _ASM_TILE_OPCODE_CONSTANTS_H */ + +#endif /* _UAPI__ARCH_SPR_DEF_H__ */ diff --git a/arch/tile/include/arch/spr_def_32.h b/arch/tile/include/uapi/arch/spr_def_32.h index bbc1f4c924e..78daa3146d2 100644 --- a/arch/tile/include/arch/spr_def_32.h +++ b/arch/tile/include/uapi/arch/spr_def_32.h @@ -14,8 +14,8 @@ #ifndef __DOXYGEN__ -#ifndef __ARCH_SPR_DEF_H__ -#define __ARCH_SPR_DEF_H__ +#ifndef __ARCH_SPR_DEF_32_H__ +#define __ARCH_SPR_DEF_32_H__ #define SPR_AUX_PERF_COUNT_0 0x6005 #define SPR_AUX_PERF_COUNT_1 0x6006 @@ -65,6 +65,31 @@ #define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1 #define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4 #define SPR_FAIL 0x4e09 +#define SPR_IDN_AVAIL_EN 0x3e05 +#define SPR_IDN_CA_DATA 0x0b00 +#define SPR_IDN_DATA_AVAIL 0x0b03 +#define SPR_IDN_DEADLOCK_TIMEOUT 0x3406 +#define SPR_IDN_DEMUX_CA_COUNT 0x0a05 +#define SPR_IDN_DEMUX_COUNT_0 0x0a06 +#define SPR_IDN_DEMUX_COUNT_1 0x0a07 +#define SPR_IDN_DEMUX_CTL 0x0a08 +#define SPR_IDN_DEMUX_QUEUE_SEL 0x0a0a +#define SPR_IDN_DEMUX_STATUS 0x0a0b +#define SPR_IDN_DEMUX_WRITE_FIFO 0x0a0c +#define SPR_IDN_DIRECTION_PROTECT 0x2e05 +#define SPR_IDN_PENDING 0x0a0e +#define SPR_IDN_REFILL_EN 0x0e05 +#define SPR_IDN_SP_FIFO_DATA 0x0a0f +#define SPR_IDN_SP_FIFO_SEL 0x0a10 +#define SPR_IDN_SP_FREEZE 0x0a11 +#define SPR_IDN_SP_FREEZE__SP_FRZ_MASK 0x1 +#define SPR_IDN_SP_FREEZE__DEMUX_FRZ_MASK 0x2 +#define SPR_IDN_SP_FREEZE__NON_DEST_EXT_MASK 0x4 +#define SPR_IDN_SP_STATE 0x0a12 +#define SPR_IDN_TAG_0 0x0a13 +#define SPR_IDN_TAG_1 0x0a14 +#define SPR_IDN_TAG_VALID 0x0a15 +#define SPR_IDN_TILE_COORD 0x0a16 #define SPR_INTCTRL_0_STATUS 0x4a07 #define SPR_INTCTRL_1_STATUS 0x4807 #define SPR_INTCTRL_2_STATUS 0x4607 @@ -87,12 +112,36 @@ #define SPR_INTERRUPT_MASK_SET_1_1 0x480e #define SPR_INTERRUPT_MASK_SET_2_0 0x460c #define SPR_INTERRUPT_MASK_SET_2_1 0x460d +#define SPR_MPL_AUX_PERF_COUNT_SET_0 0x6000 +#define SPR_MPL_AUX_PERF_COUNT_SET_1 0x6001 +#define SPR_MPL_AUX_PERF_COUNT_SET_2 0x6002 #define SPR_MPL_DMA_CPL_SET_0 0x5800 #define SPR_MPL_DMA_CPL_SET_1 0x5801 #define SPR_MPL_DMA_CPL_SET_2 0x5802 #define SPR_MPL_DMA_NOTIFY_SET_0 0x3800 #define SPR_MPL_DMA_NOTIFY_SET_1 0x3801 #define SPR_MPL_DMA_NOTIFY_SET_2 0x3802 +#define SPR_MPL_IDN_ACCESS_SET_0 0x0a00 +#define SPR_MPL_IDN_ACCESS_SET_1 0x0a01 +#define SPR_MPL_IDN_ACCESS_SET_2 0x0a02 +#define SPR_MPL_IDN_AVAIL_SET_0 0x3e00 +#define SPR_MPL_IDN_AVAIL_SET_1 0x3e01 +#define SPR_MPL_IDN_AVAIL_SET_2 0x3e02 +#define SPR_MPL_IDN_CA_SET_0 0x3a00 +#define SPR_MPL_IDN_CA_SET_1 0x3a01 +#define SPR_MPL_IDN_CA_SET_2 0x3a02 +#define SPR_MPL_IDN_COMPLETE_SET_0 0x1200 +#define SPR_MPL_IDN_COMPLETE_SET_1 0x1201 +#define SPR_MPL_IDN_COMPLETE_SET_2 0x1202 +#define SPR_MPL_IDN_FIREWALL_SET_0 0x2e00 +#define SPR_MPL_IDN_FIREWALL_SET_1 0x2e01 +#define SPR_MPL_IDN_FIREWALL_SET_2 0x2e02 +#define SPR_MPL_IDN_REFILL_SET_0 0x0e00 +#define SPR_MPL_IDN_REFILL_SET_1 0x0e01 +#define SPR_MPL_IDN_REFILL_SET_2 0x0e02 +#define SPR_MPL_IDN_TIMER_SET_0 0x3400 +#define SPR_MPL_IDN_TIMER_SET_1 0x3401 +#define SPR_MPL_IDN_TIMER_SET_2 0x3402 #define SPR_MPL_INTCTRL_0_SET_0 0x4a00 #define SPR_MPL_INTCTRL_0_SET_1 0x4a01 #define SPR_MPL_INTCTRL_0_SET_2 0x4a02 @@ -102,6 +151,9 @@ #define SPR_MPL_INTCTRL_2_SET_0 0x4600 #define SPR_MPL_INTCTRL_2_SET_1 0x4601 #define SPR_MPL_INTCTRL_2_SET_2 0x4602 +#define SPR_MPL_PERF_COUNT_SET_0 0x4200 +#define SPR_MPL_PERF_COUNT_SET_1 0x4201 +#define SPR_MPL_PERF_COUNT_SET_2 0x4202 #define SPR_MPL_SN_ACCESS_SET_0 0x0800 #define SPR_MPL_SN_ACCESS_SET_1 0x0801 #define SPR_MPL_SN_ACCESS_SET_2 0x0802 @@ -148,8 +200,6 @@ #define SPR_SIM_CONTROL 0x4e0c #define SPR_SNCTL 0x0805 #define SPR_SNCTL__FRZFABRIC_MASK 0x1 -#define SPR_SNCTL__FRZPROC_MASK 0x2 -#define SPR_SNPC 0x080b #define SPR_SNSTATIC 0x080c #define SPR_SYSTEM_SAVE_0_0 0x4b00 #define SPR_SYSTEM_SAVE_0_1 0x4b01 @@ -181,6 +231,7 @@ #define SPR_UDN_DEMUX_STATUS 0x0c0d #define SPR_UDN_DEMUX_WRITE_FIFO 0x0c0e #define SPR_UDN_DIRECTION_PROTECT 0x3005 +#define SPR_UDN_PENDING 0x0c10 #define SPR_UDN_REFILL_EN 0x1005 #define SPR_UDN_SP_FIFO_DATA 0x0c11 #define SPR_UDN_SP_FIFO_SEL 0x0c12 @@ -195,7 +246,10 @@ #define SPR_UDN_TAG_3 0x0c18 #define SPR_UDN_TAG_VALID 0x0c19 #define SPR_UDN_TILE_COORD 0x0c1a +#define SPR_WATCH_CTL 0x4209 +#define SPR_WATCH_MASK 0x420a +#define SPR_WATCH_VAL 0x420b -#endif /* !defined(__ARCH_SPR_DEF_H__) */ +#endif /* !defined(__ARCH_SPR_DEF_32_H__) */ #endif /* !defined(__DOXYGEN__) */ diff --git a/arch/tile/include/uapi/arch/spr_def_64.h b/arch/tile/include/uapi/arch/spr_def_64.h new file mode 100644 index 00000000000..67a6c1751e3 --- /dev/null +++ b/arch/tile/include/uapi/arch/spr_def_64.h @@ -0,0 +1,216 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef __DOXYGEN__ + +#ifndef __ARCH_SPR_DEF_64_H__ +#define __ARCH_SPR_DEF_64_H__ + +#define SPR_AUX_PERF_COUNT_0 0x2105 +#define SPR_AUX_PERF_COUNT_1 0x2106 +#define SPR_AUX_PERF_COUNT_CTL 0x2107 +#define SPR_AUX_PERF_COUNT_STS 0x2108 +#define SPR_CMPEXCH_VALUE 0x2780 +#define SPR_CYCLE 0x2781 +#define SPR_DONE 0x2705 +#define SPR_DSTREAM_PF 0x2706 +#define SPR_EVENT_BEGIN 0x2782 +#define SPR_EVENT_END 0x2783 +#define SPR_EX_CONTEXT_0_0 0x2580 +#define SPR_EX_CONTEXT_0_1 0x2581 +#define SPR_EX_CONTEXT_0_1__PL_SHIFT 0 +#define SPR_EX_CONTEXT_0_1__PL_RMASK 0x3 +#define SPR_EX_CONTEXT_0_1__PL_MASK 0x3 +#define SPR_EX_CONTEXT_0_1__ICS_SHIFT 2 +#define SPR_EX_CONTEXT_0_1__ICS_RMASK 0x1 +#define SPR_EX_CONTEXT_0_1__ICS_MASK 0x4 +#define SPR_EX_CONTEXT_1_0 0x2480 +#define SPR_EX_CONTEXT_1_1 0x2481 +#define SPR_EX_CONTEXT_1_1__PL_SHIFT 0 +#define SPR_EX_CONTEXT_1_1__PL_RMASK 0x3 +#define SPR_EX_CONTEXT_1_1__PL_MASK 0x3 +#define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2 +#define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1 +#define SPR_EX_CONTEXT_1_1__ICS_MASK 0x4 +#define SPR_EX_CONTEXT_2_0 0x2380 +#define SPR_EX_CONTEXT_2_1 0x2381 +#define SPR_EX_CONTEXT_2_1__PL_SHIFT 0 +#define SPR_EX_CONTEXT_2_1__PL_RMASK 0x3 +#define SPR_EX_CONTEXT_2_1__PL_MASK 0x3 +#define SPR_EX_CONTEXT_2_1__ICS_SHIFT 2 +#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1 +#define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4 +#define SPR_FAIL 0x2707 +#define SPR_IDN_AVAIL_EN 0x1a05 +#define SPR_IDN_DATA_AVAIL 0x0a80 +#define SPR_IDN_DEADLOCK_TIMEOUT 0x1806 +#define SPR_IDN_DEMUX_COUNT_0 0x0a05 +#define SPR_IDN_DEMUX_COUNT_1 0x0a06 +#define SPR_IDN_DIRECTION_PROTECT 0x1405 +#define SPR_IDN_PENDING 0x0a08 +#define SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK 0x1 +#define SPR_INTCTRL_0_STATUS 0x2505 +#define SPR_INTCTRL_1_STATUS 0x2405 +#define SPR_INTCTRL_2_STATUS 0x2305 +#define SPR_INTERRUPT_CRITICAL_SECTION 0x2708 +#define SPR_INTERRUPT_MASK_0 0x2506 +#define SPR_INTERRUPT_MASK_1 0x2406 +#define SPR_INTERRUPT_MASK_2 0x2306 +#define SPR_INTERRUPT_MASK_RESET_0 0x2507 +#define SPR_INTERRUPT_MASK_RESET_1 0x2407 +#define SPR_INTERRUPT_MASK_RESET_2 0x2307 +#define SPR_INTERRUPT_MASK_SET_0 0x2508 +#define SPR_INTERRUPT_MASK_SET_1 0x2408 +#define SPR_INTERRUPT_MASK_SET_2 0x2308 +#define SPR_INTERRUPT_VECTOR_BASE_0 0x2509 +#define SPR_INTERRUPT_VECTOR_BASE_1 0x2409 +#define SPR_INTERRUPT_VECTOR_BASE_2 0x2309 +#define SPR_INTERRUPT_VECTOR_BASE_3 0x2209 +#define SPR_IPI_EVENT_0 0x1f05 +#define SPR_IPI_EVENT_1 0x1e05 +#define SPR_IPI_EVENT_2 0x1d05 +#define SPR_IPI_EVENT_RESET_0 0x1f06 +#define SPR_IPI_EVENT_RESET_1 0x1e06 +#define SPR_IPI_EVENT_RESET_2 0x1d06 +#define SPR_IPI_EVENT_SET_0 0x1f07 +#define SPR_IPI_EVENT_SET_1 0x1e07 +#define SPR_IPI_EVENT_SET_2 0x1d07 +#define SPR_IPI_MASK_0 0x1f08 +#define SPR_IPI_MASK_1 0x1e08 +#define SPR_IPI_MASK_2 0x1d08 +#define SPR_IPI_MASK_RESET_0 0x1f09 +#define SPR_IPI_MASK_RESET_1 0x1e09 +#define SPR_IPI_MASK_RESET_2 0x1d09 +#define SPR_IPI_MASK_SET_0 0x1f0a +#define SPR_IPI_MASK_SET_1 0x1e0a +#define SPR_IPI_MASK_SET_2 0x1d0a +#define SPR_MPL_AUX_PERF_COUNT_SET_0 0x2100 +#define SPR_MPL_AUX_PERF_COUNT_SET_1 0x2101 +#define SPR_MPL_AUX_PERF_COUNT_SET_2 0x2102 +#define SPR_MPL_AUX_TILE_TIMER_SET_0 0x1700 +#define SPR_MPL_AUX_TILE_TIMER_SET_1 0x1701 +#define SPR_MPL_AUX_TILE_TIMER_SET_2 0x1702 +#define SPR_MPL_IDN_ACCESS_SET_0 0x0a00 +#define SPR_MPL_IDN_ACCESS_SET_1 0x0a01 +#define SPR_MPL_IDN_ACCESS_SET_2 0x0a02 +#define SPR_MPL_IDN_AVAIL_SET_0 0x1a00 +#define SPR_MPL_IDN_AVAIL_SET_1 0x1a01 +#define SPR_MPL_IDN_AVAIL_SET_2 0x1a02 +#define SPR_MPL_IDN_COMPLETE_SET_0 0x0500 +#define SPR_MPL_IDN_COMPLETE_SET_1 0x0501 +#define SPR_MPL_IDN_COMPLETE_SET_2 0x0502 +#define SPR_MPL_IDN_FIREWALL_SET_0 0x1400 +#define SPR_MPL_IDN_FIREWALL_SET_1 0x1401 +#define SPR_MPL_IDN_FIREWALL_SET_2 0x1402 +#define SPR_MPL_IDN_TIMER_SET_0 0x1800 +#define SPR_MPL_IDN_TIMER_SET_1 0x1801 +#define SPR_MPL_IDN_TIMER_SET_2 0x1802 +#define SPR_MPL_INTCTRL_0_SET_0 0x2500 +#define SPR_MPL_INTCTRL_0_SET_1 0x2501 +#define SPR_MPL_INTCTRL_0_SET_2 0x2502 +#define SPR_MPL_INTCTRL_1_SET_0 0x2400 +#define SPR_MPL_INTCTRL_1_SET_1 0x2401 +#define SPR_MPL_INTCTRL_1_SET_2 0x2402 +#define SPR_MPL_INTCTRL_2_SET_0 0x2300 +#define SPR_MPL_INTCTRL_2_SET_1 0x2301 +#define SPR_MPL_INTCTRL_2_SET_2 0x2302 +#define SPR_MPL_IPI_0 0x1f04 +#define SPR_MPL_IPI_0_SET_0 0x1f00 +#define SPR_MPL_IPI_0_SET_1 0x1f01 +#define SPR_MPL_IPI_0_SET_2 0x1f02 +#define SPR_MPL_IPI_1 0x1e04 +#define SPR_MPL_IPI_1_SET_0 0x1e00 +#define SPR_MPL_IPI_1_SET_1 0x1e01 +#define SPR_MPL_IPI_1_SET_2 0x1e02 +#define SPR_MPL_IPI_2 0x1d04 +#define SPR_MPL_IPI_2_SET_0 0x1d00 +#define SPR_MPL_IPI_2_SET_1 0x1d01 +#define SPR_MPL_IPI_2_SET_2 0x1d02 +#define SPR_MPL_PERF_COUNT_SET_0 0x2000 +#define SPR_MPL_PERF_COUNT_SET_1 0x2001 +#define SPR_MPL_PERF_COUNT_SET_2 0x2002 +#define SPR_MPL_UDN_ACCESS_SET_0 0x0b00 +#define SPR_MPL_UDN_ACCESS_SET_1 0x0b01 +#define SPR_MPL_UDN_ACCESS_SET_2 0x0b02 +#define SPR_MPL_UDN_AVAIL_SET_0 0x1b00 +#define SPR_MPL_UDN_AVAIL_SET_1 0x1b01 +#define SPR_MPL_UDN_AVAIL_SET_2 0x1b02 +#define SPR_MPL_UDN_COMPLETE_SET_0 0x0600 +#define SPR_MPL_UDN_COMPLETE_SET_1 0x0601 +#define SPR_MPL_UDN_COMPLETE_SET_2 0x0602 +#define SPR_MPL_UDN_FIREWALL_SET_0 0x1500 +#define SPR_MPL_UDN_FIREWALL_SET_1 0x1501 +#define SPR_MPL_UDN_FIREWALL_SET_2 0x1502 +#define SPR_MPL_UDN_TIMER_SET_0 0x1900 +#define SPR_MPL_UDN_TIMER_SET_1 0x1901 +#define SPR_MPL_UDN_TIMER_SET_2 0x1902 +#define SPR_MPL_WORLD_ACCESS_SET_0 0x2700 +#define SPR_MPL_WORLD_ACCESS_SET_1 0x2701 +#define SPR_MPL_WORLD_ACCESS_SET_2 0x2702 +#define SPR_PASS 0x2709 +#define SPR_PERF_COUNT_0 0x2005 +#define SPR_PERF_COUNT_1 0x2006 +#define SPR_PERF_COUNT_CTL 0x2007 +#define SPR_PERF_COUNT_DN_CTL 0x2008 +#define SPR_PERF_COUNT_STS 0x2009 +#define SPR_PROC_STATUS 0x2784 +#define SPR_SIM_CONTROL 0x2785 +#define SPR_SINGLE_STEP_CONTROL_0 0x0405 +#define SPR_SINGLE_STEP_CONTROL_0__CANCELED_MASK 0x1 +#define SPR_SINGLE_STEP_CONTROL_0__INHIBIT_MASK 0x2 +#define SPR_SINGLE_STEP_CONTROL_1 0x0305 +#define SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK 0x1 +#define SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK 0x2 +#define SPR_SINGLE_STEP_CONTROL_2 0x0205 +#define SPR_SINGLE_STEP_CONTROL_2__CANCELED_MASK 0x1 +#define SPR_SINGLE_STEP_CONTROL_2__INHIBIT_MASK 0x2 +#define SPR_SINGLE_STEP_EN_0_0 0x250a +#define SPR_SINGLE_STEP_EN_0_1 0x240a +#define SPR_SINGLE_STEP_EN_0_2 0x230a +#define SPR_SINGLE_STEP_EN_1_0 0x250b +#define SPR_SINGLE_STEP_EN_1_1 0x240b +#define SPR_SINGLE_STEP_EN_1_2 0x230b +#define SPR_SINGLE_STEP_EN_2_0 0x250c +#define SPR_SINGLE_STEP_EN_2_1 0x240c +#define SPR_SINGLE_STEP_EN_2_2 0x230c +#define SPR_SYSTEM_SAVE_0_0 0x2582 +#define SPR_SYSTEM_SAVE_0_1 0x2583 +#define SPR_SYSTEM_SAVE_0_2 0x2584 +#define SPR_SYSTEM_SAVE_0_3 0x2585 +#define SPR_SYSTEM_SAVE_1_0 0x2482 +#define SPR_SYSTEM_SAVE_1_1 0x2483 +#define SPR_SYSTEM_SAVE_1_2 0x2484 +#define SPR_SYSTEM_SAVE_1_3 0x2485 +#define SPR_SYSTEM_SAVE_2_0 0x2382 +#define SPR_SYSTEM_SAVE_2_1 0x2383 +#define SPR_SYSTEM_SAVE_2_2 0x2384 +#define SPR_SYSTEM_SAVE_2_3 0x2385 +#define SPR_TILE_COORD 0x270b +#define SPR_TILE_RTF_HWM 0x270c +#define SPR_TILE_TIMER_CONTROL 0x1605 +#define SPR_UDN_AVAIL_EN 0x1b05 +#define SPR_UDN_DATA_AVAIL 0x0b80 +#define SPR_UDN_DEADLOCK_TIMEOUT 0x1906 +#define SPR_UDN_DEMUX_COUNT_0 0x0b05 +#define SPR_UDN_DEMUX_COUNT_1 0x0b06 +#define SPR_UDN_DEMUX_COUNT_2 0x0b07 +#define SPR_UDN_DEMUX_COUNT_3 0x0b08 +#define SPR_UDN_DIRECTION_PROTECT 0x1505 +#define SPR_UDN_PENDING 0x0b0a +#define SPR_WATCH_MASK 0x200a +#define SPR_WATCH_VAL 0x200b + +#endif /* !defined(__ARCH_SPR_DEF_64_H__) */ + +#endif /* !defined(__DOXYGEN__) */ diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild new file mode 100644 index 00000000000..c20db8e428b --- /dev/null +++ b/arch/tile/include/uapi/asm/Kbuild @@ -0,0 +1,21 @@ +# UAPI Header export list +include include/uapi/asm-generic/Kbuild.asm + +header-y += auxvec.h +header-y += bitsperlong.h +header-y += byteorder.h +header-y += cachectl.h +header-y += hardwall.h +header-y += kvm_para.h +header-y += mman.h +header-y += ptrace.h +header-y += setup.h +header-y += sigcontext.h +header-y += siginfo.h +header-y += signal.h +header-y += stat.h +header-y += swab.h +header-y += ucontext.h +header-y += unistd.h + +generic-y += ucontext.h diff --git a/arch/tile/include/asm/auxvec.h b/arch/tile/include/uapi/asm/auxvec.h index 1d393edb064..c93e92709f1 100644 --- a/arch/tile/include/asm/auxvec.h +++ b/arch/tile/include/uapi/asm/auxvec.h @@ -15,6 +15,7 @@ #ifndef _ASM_TILE_AUXVEC_H #define _ASM_TILE_AUXVEC_H -/* No extensions to auxvec */ +/* The vDSO location. */ +#define AT_SYSINFO_EHDR 33 #endif /* _ASM_TILE_AUXVEC_H */ diff --git a/arch/tile/include/asm/bitsperlong.h b/arch/tile/include/uapi/asm/bitsperlong.h index 58c771f2af2..58c771f2af2 100644 --- a/arch/tile/include/asm/bitsperlong.h +++ b/arch/tile/include/uapi/asm/bitsperlong.h diff --git a/arch/tile/include/uapi/asm/byteorder.h b/arch/tile/include/uapi/asm/byteorder.h new file mode 100644 index 00000000000..fb72ecf4921 --- /dev/null +++ b/arch/tile/include/uapi/asm/byteorder.h @@ -0,0 +1,21 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#if defined (__BIG_ENDIAN__) +#include <linux/byteorder/big_endian.h> +#elif defined (__LITTLE_ENDIAN__) +#include <linux/byteorder/little_endian.h> +#else +#error "__BIG_ENDIAN__ or __LITTLE_ENDIAN__ must be defined." +#endif diff --git a/arch/tile/include/uapi/asm/cachectl.h b/arch/tile/include/uapi/asm/cachectl.h new file mode 100644 index 00000000000..572ddcad209 --- /dev/null +++ b/arch/tile/include/uapi/asm/cachectl.h @@ -0,0 +1,42 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _ASM_TILE_CACHECTL_H +#define _ASM_TILE_CACHECTL_H + +/* + * Options for cacheflush system call. + * + * The ICACHE flush is performed on all cores currently running the + * current process's address space. The intent is for user + * applications to be able to modify code, invoke the system call, + * then allow arbitrary other threads in the same address space to see + * the newly-modified code. Passing a length of CHIP_L1I_CACHE_SIZE() + * or more invalidates the entire icache on all cores in the address + * spaces. (Note: currently this option invalidates the entire icache + * regardless of the requested address and length, but we may choose + * to honor the arguments at some point.) + * + * Flush and invalidation of memory can normally be performed with the + * __insn_flush() and __insn_finv() instructions from userspace. + * The DCACHE option to the system call allows userspace + * to flush the entire L1+L2 data cache from the core. In this case, + * the address and length arguments are not used. The DCACHE flush is + * restricted to the current core, not all cores in the address space. + */ +#define ICACHE (1<<0) /* invalidate L1 instruction cache */ +#define DCACHE (1<<1) /* flush and invalidate data cache */ +#define BCACHE (ICACHE|DCACHE) /* flush both caches */ + +#endif /* _ASM_TILE_CACHECTL_H */ diff --git a/arch/tile/include/uapi/asm/hardwall.h b/arch/tile/include/uapi/asm/hardwall.h new file mode 100644 index 00000000000..c2169d4f401 --- /dev/null +++ b/arch/tile/include/uapi/asm/hardwall.h @@ -0,0 +1,51 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Provide methods for access control of per-cpu resources like + * UDN, IDN, or IPI. + */ + +#ifndef _UAPI_ASM_TILE_HARDWALL_H +#define _UAPI_ASM_TILE_HARDWALL_H + +#include <arch/chip.h> +#include <linux/ioctl.h> + +#define HARDWALL_IOCTL_BASE 0xa2 + +/* + * The HARDWALL_CREATE() ioctl is a macro with a "size" argument. + * The resulting ioctl value is passed to the kernel in conjunction + * with a pointer to a standard kernel bitmask of cpus. + * For network resources (UDN or IDN) the bitmask must physically + * represent a rectangular configuration on the chip. + * The "size" is the number of bytes of cpu mask data. + */ +#define _HARDWALL_CREATE 1 +#define HARDWALL_CREATE(size) \ + _IOC(_IOC_READ, HARDWALL_IOCTL_BASE, _HARDWALL_CREATE, (size)) + +#define _HARDWALL_ACTIVATE 2 +#define HARDWALL_ACTIVATE \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_ACTIVATE) + +#define _HARDWALL_DEACTIVATE 3 +#define HARDWALL_DEACTIVATE \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE) + +#define _HARDWALL_GET_ID 4 +#define HARDWALL_GET_ID \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID) + + +#endif /* _UAPI_ASM_TILE_HARDWALL_H */ diff --git a/arch/tile/include/uapi/asm/kvm_para.h b/arch/tile/include/uapi/asm/kvm_para.h new file mode 100644 index 00000000000..14fab8f0b95 --- /dev/null +++ b/arch/tile/include/uapi/asm/kvm_para.h @@ -0,0 +1 @@ +#include <asm-generic/kvm_para.h> diff --git a/arch/tile/include/asm/mman.h b/arch/tile/include/uapi/asm/mman.h index 81b8fc348d6..81b8fc348d6 100644 --- a/arch/tile/include/asm/mman.h +++ b/arch/tile/include/uapi/asm/mman.h diff --git a/arch/tile/include/uapi/asm/ptrace.h b/arch/tile/include/uapi/asm/ptrace.h new file mode 100644 index 00000000000..7757e1985fb --- /dev/null +++ b/arch/tile/include/uapi/asm/ptrace.h @@ -0,0 +1,94 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _UAPI_ASM_TILE_PTRACE_H +#define _UAPI_ASM_TILE_PTRACE_H + +#include <arch/chip.h> +#include <arch/abi.h> + +/* These must match struct pt_regs, below. */ +#if CHIP_WORD_SIZE() == 32 +#define PTREGS_OFFSET_REG(n) ((n)*4) +#else +#define PTREGS_OFFSET_REG(n) ((n)*8) +#endif +#define PTREGS_OFFSET_BASE 0 +#define PTREGS_OFFSET_TP PTREGS_OFFSET_REG(53) +#define PTREGS_OFFSET_SP PTREGS_OFFSET_REG(54) +#define PTREGS_OFFSET_LR PTREGS_OFFSET_REG(55) +#define PTREGS_NR_GPRS 56 +#define PTREGS_OFFSET_PC PTREGS_OFFSET_REG(56) +#define PTREGS_OFFSET_EX1 PTREGS_OFFSET_REG(57) +#define PTREGS_OFFSET_FAULTNUM PTREGS_OFFSET_REG(58) +#define PTREGS_OFFSET_ORIG_R0 PTREGS_OFFSET_REG(59) +#define PTREGS_OFFSET_FLAGS PTREGS_OFFSET_REG(60) +#if CHIP_HAS_CMPEXCH() +#define PTREGS_OFFSET_CMPEXCH PTREGS_OFFSET_REG(61) +#endif +#define PTREGS_SIZE PTREGS_OFFSET_REG(64) + + +#ifndef __ASSEMBLY__ + +#ifndef __KERNEL__ +/* Provide appropriate length type to userspace regardless of -m32/-m64. */ +typedef uint_reg_t pt_reg_t; +#endif + +/* + * This struct defines the way the registers are stored on the stack during a + * system call or exception. "struct sigcontext" has the same shape. + */ +struct pt_regs { + /* Saved main processor registers; 56..63 are special. */ + /* tp, sp, and lr must immediately follow regs[] for aliasing. */ + pt_reg_t regs[53]; + pt_reg_t tp; /* aliases regs[TREG_TP] */ + pt_reg_t sp; /* aliases regs[TREG_SP] */ + pt_reg_t lr; /* aliases regs[TREG_LR] */ + + /* Saved special registers. */ + pt_reg_t pc; /* stored in EX_CONTEXT_K_0 */ + pt_reg_t ex1; /* stored in EX_CONTEXT_K_1 (PL and ICS bit) */ + pt_reg_t faultnum; /* fault number (INT_SWINT_1 for syscall) */ + pt_reg_t orig_r0; /* r0 at syscall entry, else zero */ + pt_reg_t flags; /* flags (see below) */ +#if !CHIP_HAS_CMPEXCH() + pt_reg_t pad[3]; +#else + pt_reg_t cmpexch; /* value of CMPEXCH_VALUE SPR at interrupt */ + pt_reg_t pad[2]; +#endif +}; + +#endif /* __ASSEMBLY__ */ + +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 + +/* Support TILE-specific ptrace options, with events starting at 16. */ +#define PTRACE_EVENT_MIGRATE 16 +#define PTRACE_O_TRACEMIGRATE (1 << PTRACE_EVENT_MIGRATE) + +/* + * Flag bits in pt_regs.flags that are part of the ptrace API. + * We start our numbering higher up to avoid confusion with the + * non-ABI kernel-internal values that use the low 16 bits. + */ +#define PT_FLAGS_COMPAT 0x10000 /* process is an -m32 compat process */ + +#endif /* _UAPI_ASM_TILE_PTRACE_H */ diff --git a/arch/tile/include/asm/hw_irq.h b/arch/tile/include/uapi/asm/setup.h index 4fac5fbf333..e6f7da265ac 100644 --- a/arch/tile/include/asm/hw_irq.h +++ b/arch/tile/include/uapi/asm/setup.h @@ -12,7 +12,10 @@ * more details. */ -#ifndef _ASM_TILE_HW_IRQ_H -#define _ASM_TILE_HW_IRQ_H +#ifndef _UAPI_ASM_TILE_SETUP_H +#define _UAPI_ASM_TILE_SETUP_H -#endif /* _ASM_TILE_HW_IRQ_H */ +#define COMMAND_LINE_SIZE 2048 + + +#endif /* _UAPI_ASM_TILE_SETUP_H */ diff --git a/arch/tile/include/asm/sigcontext.h b/arch/tile/include/uapi/asm/sigcontext.h index 5e2d03336f5..6348e59d372 100644 --- a/arch/tile/include/asm/sigcontext.h +++ b/arch/tile/include/uapi/asm/sigcontext.h @@ -15,6 +15,8 @@ #ifndef _ASM_TILE_SIGCONTEXT_H #define _ASM_TILE_SIGCONTEXT_H +/* Don't pollute the namespace since <signal.h> includes this file. */ +#define __need_int_reg_t #include <arch/abi.h> /* @@ -22,14 +24,14 @@ * but is simplified since we know the fault is from userspace. */ struct sigcontext { - uint_reg_t gregs[53]; /* General-purpose registers. */ - uint_reg_t tp; /* Aliases gregs[TREG_TP]. */ - uint_reg_t sp; /* Aliases gregs[TREG_SP]. */ - uint_reg_t lr; /* Aliases gregs[TREG_LR]. */ - uint_reg_t pc; /* Program counter. */ - uint_reg_t ics; /* In Interrupt Critical Section? */ - uint_reg_t faultnum; /* Fault number. */ - uint_reg_t pad[5]; + __uint_reg_t gregs[53]; /* General-purpose registers. */ + __uint_reg_t tp; /* Aliases gregs[TREG_TP]. */ + __uint_reg_t sp; /* Aliases gregs[TREG_SP]. */ + __uint_reg_t lr; /* Aliases gregs[TREG_LR]. */ + __uint_reg_t pc; /* Program counter. */ + __uint_reg_t ics; /* In Interrupt Critical Section? */ + __uint_reg_t faultnum; /* Fault number. */ + __uint_reg_t pad[5]; }; #endif /* _ASM_TILE_SIGCONTEXT_H */ diff --git a/arch/tile/include/asm/siginfo.h b/arch/tile/include/uapi/asm/siginfo.h index 56d661bb010..56d661bb010 100644 --- a/arch/tile/include/asm/siginfo.h +++ b/arch/tile/include/uapi/asm/siginfo.h diff --git a/arch/tile/include/asm/opcode-tile.h b/arch/tile/include/uapi/asm/signal.h index ba38959137d..ef0d32d84a4 100644 --- a/arch/tile/include/asm/opcode-tile.h +++ b/arch/tile/include/uapi/asm/signal.h @@ -12,19 +12,16 @@ * more details. */ -#ifndef _ASM_TILE_OPCODE_TILE_H -#define _ASM_TILE_OPCODE_TILE_H +#ifndef _UAPI_ASM_TILE_SIGNAL_H +#define _UAPI_ASM_TILE_SIGNAL_H -#include <arch/chip.h> +/* Do not notify a ptracer when this signal is handled. */ +#define SA_NOPTRACE 0x02000000u -#if CHIP_WORD_SIZE() == 64 -#include <asm/opcode-tile_64.h> -#else -#include <asm/opcode-tile_32.h> -#endif +/* Used in earlier Tilera releases, so keeping for binary compatibility. */ +#define SA_RESTORER 0x04000000u -/* These definitions are not correct for TILE64, so just avoid them. */ -#undef TILE_ELF_MACHINE_CODE -#undef TILE_ELF_NAME +#include <asm-generic/signal.h> -#endif /* _ASM_TILE_OPCODE_TILE_H */ + +#endif /* _UAPI_ASM_TILE_SIGNAL_H */ diff --git a/arch/tile/include/asm/stat.h b/arch/tile/include/uapi/asm/stat.h index b16e5db8f0e..c0db34d56be 100644 --- a/arch/tile/include/asm/stat.h +++ b/arch/tile/include/uapi/asm/stat.h @@ -1,4 +1,4 @@ -#ifdef CONFIG_COMPAT +#if defined(__KERNEL__) && defined(CONFIG_COMPAT) #define __ARCH_WANT_STAT64 /* Used for compat_sys_stat64() etc. */ #endif #include <asm-generic/stat.h> diff --git a/arch/tile/include/asm/swab.h b/arch/tile/include/uapi/asm/swab.h index 25c686a00f1..7c37b38f6c8 100644 --- a/arch/tile/include/asm/swab.h +++ b/arch/tile/include/uapi/asm/swab.h @@ -18,12 +18,6 @@ /* Tile gcc is always >= 4.3.0, so we use __builtin_bswap. */ #define __arch_swab32(x) __builtin_bswap32(x) #define __arch_swab64(x) __builtin_bswap64(x) - -/* Use the variant that is natural for the wordsize. */ -#ifdef CONFIG_64BIT -#define __arch_swab16(x) (__builtin_bswap64(x) >> 48) -#else #define __arch_swab16(x) (__builtin_bswap32(x) >> 16) -#endif #endif /* _ASM_TILE_SWAB_H */ diff --git a/arch/tile/include/uapi/asm/unistd.h b/arch/tile/include/uapi/asm/unistd.h new file mode 100644 index 00000000000..3866397aaf5 --- /dev/null +++ b/arch/tile/include/uapi/asm/unistd.h @@ -0,0 +1,36 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#if !defined(__LP64__) || defined(__SYSCALL_COMPAT) +/* Use the flavor of this syscall that matches the 32-bit API better. */ +#define __ARCH_WANT_SYNC_FILE_RANGE2 +#endif + +/* Use the standard ABI for syscalls. */ +#include <asm-generic/unistd.h> + +#define NR_syscalls __NR_syscalls + +/* Additional Tilera-specific syscalls. */ +#define __NR_cacheflush (__NR_arch_specific_syscall + 1) +__SYSCALL(__NR_cacheflush, sys_cacheflush) + +#ifndef __tilegx__ +/* "Fast" syscalls provide atomic support for 32-bit chips. */ +#define __NR_FAST_cmpxchg -1 +#define __NR_FAST_atomic_update -2 +#define __NR_FAST_cmpxchg64 -3 +#define __NR_cmpxchg_badaddr (__NR_arch_specific_syscall + 0) +__SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr) +#endif diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index b4c8e8ec45d..21f77bf68c6 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -3,16 +3,34 @@ # extra-y := vmlinux.lds head_$(BITS).o -obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \ +obj-y := backtrace.o entry.o hvglue.o irq.o messaging.o \ pci-dma.o proc.o process.o ptrace.o reboot.o \ - setup.o signal.o single_step.o stack.o sys.o time.o traps.o \ + setup.o signal.o single_step.o stack.o sys.o \ + sysfs.o time.o traps.o unaligned.o vdso.o \ intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_early_printk.o = -pg +endif + obj-$(CONFIG_HARDWALL) += hardwall.o -obj-$(CONFIG_TILEGX) += futex_64.o obj-$(CONFIG_COMPAT) += compat.o compat_signal.o obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel_$(BITS).o +ifdef CONFIG_TILEGX +obj-$(CONFIG_PCI) += pci_gx.o +else obj-$(CONFIG_PCI) += pci.o +endif +obj-$(CONFIG_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_USE_PMC) += pmc.o +obj-$(CONFIG_TILE_USB) += usb.o +obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o +obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o mcount_64.o +obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_KGDB) += kgdb.o + +obj-y += vdso/ diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c index 01ddf19cc36..375e7c321ee 100644 --- a/arch/tile/kernel/asm-offsets.c +++ b/arch/tile/kernel/asm-offsets.c @@ -14,13 +14,6 @@ * Generates definitions from c-type structures used by assembly sources. */ -#include <linux/kbuild.h> -#include <linux/thread_info.h> -#include <linux/sched.h> -#include <linux/hardirq.h> -#include <linux/ptrace.h> -#include <hv/hypervisor.h> - /* Check for compatible compiler early in the build. */ #ifdef CONFIG_TILEGX # ifndef __tilegx__ @@ -31,46 +24,61 @@ # endif #else # ifdef __tilegx__ -# error Can not build TILEPro/TILE64 configurations with tilegx compiler +# error Can not build TILEPro configurations with tilegx compiler # endif #endif +#include <linux/kbuild.h> +#include <linux/thread_info.h> +#include <linux/sched.h> +#include <linux/hardirq.h> +#include <linux/ptrace.h> +#include <hv/hypervisor.h> + void foo(void) { - DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET, \ + DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET, offsetof(struct single_step_state, buffer)); - DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET, \ + DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET, offsetof(struct single_step_state, flags)); - DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET, \ + DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET, offsetof(struct single_step_state, orig_pc)); - DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET, \ + DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET, offsetof(struct single_step_state, next_pc)); - DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET, \ + DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET, offsetof(struct single_step_state, branch_next_pc)); - DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET, \ + DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET, offsetof(struct single_step_state, update_value)); - DEFINE(THREAD_INFO_TASK_OFFSET, \ + DEFINE(THREAD_INFO_TASK_OFFSET, offsetof(struct thread_info, task)); - DEFINE(THREAD_INFO_FLAGS_OFFSET, \ + DEFINE(THREAD_INFO_FLAGS_OFFSET, offsetof(struct thread_info, flags)); - DEFINE(THREAD_INFO_STATUS_OFFSET, \ + DEFINE(THREAD_INFO_STATUS_OFFSET, offsetof(struct thread_info, status)); - DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, \ + DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, offsetof(struct thread_info, homecache_cpu)); - DEFINE(THREAD_INFO_STEP_STATE_OFFSET, \ + DEFINE(THREAD_INFO_PREEMPT_COUNT_OFFSET, + offsetof(struct thread_info, preempt_count)); + DEFINE(THREAD_INFO_STEP_STATE_OFFSET, offsetof(struct thread_info, step_state)); +#ifdef __tilegx__ + DEFINE(THREAD_INFO_UNALIGN_JIT_BASE_OFFSET, + offsetof(struct thread_info, unalign_jit_base)); + DEFINE(THREAD_INFO_UNALIGN_JIT_TMP_OFFSET, + offsetof(struct thread_info, unalign_jit_tmp)); +#endif DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET, offsetof(struct task_struct, thread.ksp)); DEFINE(TASK_STRUCT_THREAD_PC_OFFSET, offsetof(struct task_struct, thread.pc)); - DEFINE(HV_TOPOLOGY_WIDTH_OFFSET, \ + DEFINE(HV_TOPOLOGY_WIDTH_OFFSET, offsetof(HV_Topology, width)); - DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET, \ + DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET, offsetof(HV_Topology, height)); - DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET, \ + DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET, offsetof(irq_cpustat_t, irq_syscall_count)); } diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c index 55a6a74974b..f8b74ca83b9 100644 --- a/arch/tile/kernel/backtrace.c +++ b/arch/tile/kernel/backtrace.c @@ -1,5 +1,5 @@ /* - * Copyright 2010 Tilera Corporation. All Rights Reserved. + * Copyright 2011 Tilera Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -14,22 +14,13 @@ #include <linux/kernel.h> #include <linux/string.h> - +#include <asm/byteorder.h> #include <asm/backtrace.h> +#include <asm/tile-desc.h> +#include <arch/abi.h> -#include <arch/chip.h> - -#include <asm/opcode-tile.h> - - -#define TREG_SP 54 -#define TREG_LR 55 - - -#if TILE_CHIP >= 10 -#define tile_bundle_bits tilegx_bundle_bits +#ifdef __tilegx__ #define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE -#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES #define tile_decoded_instruction tilegx_decoded_instruction #define tile_mnemonic tilegx_mnemonic #define parse_insn_tile parse_insn_tilegx @@ -43,11 +34,22 @@ #define OPCODE_STORE TILEGX_OPC_ST typedef long long bt_int_reg_t; #else -#define OPCODE_STORE TILE_OPC_SW +#define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE +#define tile_decoded_instruction tilepro_decoded_instruction +#define tile_mnemonic tilepro_mnemonic +#define parse_insn_tile parse_insn_tilepro +#define TILE_OPC_IRET TILEPRO_OPC_IRET +#define TILE_OPC_ADDI TILEPRO_OPC_ADDI +#define TILE_OPC_ADDLI TILEPRO_OPC_ADDLI +#define TILE_OPC_INFO TILEPRO_OPC_INFO +#define TILE_OPC_INFOL TILEPRO_OPC_INFOL +#define TILE_OPC_JRP TILEPRO_OPC_JRP +#define TILE_OPC_MOVE TILEPRO_OPC_MOVE +#define OPCODE_STORE TILEPRO_OPC_SW typedef int bt_int_reg_t; #endif -/** A decoded bundle used for backtracer analysis. */ +/* A decoded bundle used for backtracer analysis. */ struct BacktraceBundle { tile_bundle_bits bits; int num_insns; @@ -56,23 +58,7 @@ struct BacktraceBundle { }; -/* This implementation only makes sense for native tools. */ -/** Default function to read memory. */ -static bool bt_read_memory(void *result, VirtualAddress addr, - unsigned int size, void *extra) -{ - /* FIXME: this should do some horrible signal stuff to catch - * SEGV cleanly and fail. - * - * Or else the caller should do the setjmp for efficiency. - */ - - memcpy(result, (const void *)addr, size); - return true; -} - - -/** Locates an instruction inside the given bundle that +/* Locates an instruction inside the given bundle that * has the specified mnemonic, and whose first 'num_operands_to_match' * operands exactly match those in 'operand_values'. */ @@ -107,13 +93,13 @@ static const struct tile_decoded_instruction *find_matching_insn( return NULL; } -/** Does this bundle contain an 'iret' instruction? */ +/* Does this bundle contain an 'iret' instruction? */ static inline bool bt_has_iret(const struct BacktraceBundle *bundle) { return find_matching_insn(bundle, TILE_OPC_IRET, NULL, 0) != NULL; } -/** Does this bundle contain an 'addi sp, sp, OFFSET' or +/* Does this bundle contain an 'addi sp, sp, OFFSET' or * 'addli sp, sp, OFFSET' instruction, and if so, what is OFFSET? */ static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust) @@ -124,7 +110,7 @@ static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust) find_matching_insn(bundle, TILE_OPC_ADDI, vals, 2); if (insn == NULL) insn = find_matching_insn(bundle, TILE_OPC_ADDLI, vals, 2); -#if TILE_CHIP >= 10 +#ifdef __tilegx__ if (insn == NULL) insn = find_matching_insn(bundle, TILEGX_OPC_ADDXLI, vals, 2); if (insn == NULL) @@ -137,7 +123,7 @@ static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust) return true; } -/** Does this bundle contain any 'info OP' or 'infol OP' +/* Does this bundle contain any 'info OP' or 'infol OP' * instruction, and if so, what are their OP? Note that OP is interpreted * as an unsigned value by this code since that's what the caller wants. * Returns the number of info ops found. @@ -161,7 +147,7 @@ static int bt_get_info_ops(const struct BacktraceBundle *bundle, return num_ops; } -/** Does this bundle contain a jrp instruction, and if so, to which +/* Does this bundle contain a jrp instruction, and if so, to which * register is it jumping? */ static bool bt_has_jrp(const struct BacktraceBundle *bundle, int *target_reg) @@ -175,7 +161,7 @@ static bool bt_has_jrp(const struct BacktraceBundle *bundle, int *target_reg) return true; } -/** Does this bundle modify the specified register in any way? */ +/* Does this bundle modify the specified register in any way? */ static bool bt_modifies_reg(const struct BacktraceBundle *bundle, int reg) { int i, j; @@ -195,34 +181,34 @@ static bool bt_modifies_reg(const struct BacktraceBundle *bundle, int reg) return false; } -/** Does this bundle modify sp? */ +/* Does this bundle modify sp? */ static inline bool bt_modifies_sp(const struct BacktraceBundle *bundle) { return bt_modifies_reg(bundle, TREG_SP); } -/** Does this bundle modify lr? */ +/* Does this bundle modify lr? */ static inline bool bt_modifies_lr(const struct BacktraceBundle *bundle) { return bt_modifies_reg(bundle, TREG_LR); } -/** Does this bundle contain the instruction 'move fp, sp'? */ +/* Does this bundle contain the instruction 'move fp, sp'? */ static inline bool bt_has_move_r52_sp(const struct BacktraceBundle *bundle) { static const int vals[2] = { 52, TREG_SP }; return find_matching_insn(bundle, TILE_OPC_MOVE, vals, 2) != NULL; } -/** Does this bundle contain a store of lr to sp? */ +/* Does this bundle contain a store of lr to sp? */ static inline bool bt_has_sw_sp_lr(const struct BacktraceBundle *bundle) { static const int vals[2] = { TREG_SP, TREG_LR }; return find_matching_insn(bundle, OPCODE_STORE, vals, 2) != NULL; } -#if TILE_CHIP >= 10 -/** Track moveli values placed into registers. */ +#ifdef __tilegx__ +/* Track moveli values placed into registers. */ static inline void bt_update_moveli(const struct BacktraceBundle *bundle, int moveli_args[]) { @@ -238,7 +224,7 @@ static inline void bt_update_moveli(const struct BacktraceBundle *bundle, } } -/** Does this bundle contain an 'add sp, sp, reg' instruction +/* Does this bundle contain an 'add sp, sp, reg' instruction * from a register that we saw a moveli into, and if so, what * is the value in the register? */ @@ -260,11 +246,11 @@ static bool bt_has_add_sp(const struct BacktraceBundle *bundle, int *adjust, } #endif -/** Locates the caller's PC and SP for a program starting at the +/* Locates the caller's PC and SP for a program starting at the * given address. */ static void find_caller_pc_and_caller_sp(CallerLocation *location, - const VirtualAddress start_pc, + const unsigned long start_pc, BacktraceMemoryReader read_memory_func, void *read_memory_func_extra) { @@ -288,9 +274,9 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, tile_bundle_bits prefetched_bundles[32]; int num_bundles_prefetched = 0; int next_bundle = 0; - VirtualAddress pc; + unsigned long pc; -#if TILE_CHIP >= 10 +#ifdef __tilegx__ /* Naively try to track moveli values to support addx for -m32. */ int moveli_args[TILEGX_NUM_REGISTERS] = { 0 }; #endif @@ -351,8 +337,12 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, bytes_to_prefetch / sizeof(tile_bundle_bits); } - /* Decode the next bundle. */ - bundle.bits = prefetched_bundles[next_bundle++]; + /* + * Decode the next bundle. + * TILE always stores instruction bundles in little-endian + * mode, even when the chip is running in big-endian mode. + */ + bundle.bits = le64_to_cpu(prefetched_bundles[next_bundle++]); bundle.num_insns = parse_insn_tile(bundle.bits, pc, bundle.insns); num_info_ops = bt_get_info_ops(&bundle, info_operands); @@ -369,10 +359,6 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, /* Weird; reserved value, ignore it. */ continue; } - if (info_operand & ENTRY_POINT_INFO_OP) { - /* This info op is ignored by the backtracer. */ - continue; - } /* Skip info ops which are not in the * "one_ago" mode we want right now. @@ -453,7 +439,7 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, if (!sp_determined) { int adjust; if (bt_has_addi_sp(&bundle, &adjust) -#if TILE_CHIP >= 10 +#ifdef __tilegx__ || bt_has_add_sp(&bundle, &adjust, moveli_args) #endif ) { @@ -504,7 +490,7 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, } } -#if TILE_CHIP >= 10 +#ifdef __tilegx__ /* Track moveli arguments for -m32 mode. */ bt_update_moveli(&bundle, moveli_args); #endif @@ -546,18 +532,26 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, } } +/* Initializes a backtracer to start from the given location. + * + * If the frame pointer cannot be determined it is set to -1. + * + * state: The state to be filled in. + * read_memory_func: A callback that reads memory. + * read_memory_func_extra: An arbitrary argument to read_memory_func. + * pc: The current PC. + * lr: The current value of the 'lr' register. + * sp: The current value of the 'sp' register. + * r52: The current value of the 'r52' register. + */ void backtrace_init(BacktraceIterator *state, BacktraceMemoryReader read_memory_func, void *read_memory_func_extra, - VirtualAddress pc, VirtualAddress lr, - VirtualAddress sp, VirtualAddress r52) + unsigned long pc, unsigned long lr, + unsigned long sp, unsigned long r52) { CallerLocation location; - VirtualAddress fp, initial_frame_caller_pc; - - if (read_memory_func == NULL) { - read_memory_func = bt_read_memory; - } + unsigned long fp, initial_frame_caller_pc; /* Find out where we are in the initial frame. */ find_caller_pc_and_caller_sp(&location, pc, @@ -630,12 +624,15 @@ void backtrace_init(BacktraceIterator *state, /* Handle the case where the register holds more bits than the VA. */ static bool valid_addr_reg(bt_int_reg_t reg) { - return ((VirtualAddress)reg == reg); + return ((unsigned long)reg == reg); } +/* Advances the backtracing state to the calling frame, returning + * true iff successful. + */ bool backtrace_next(BacktraceIterator *state) { - VirtualAddress next_fp, next_pc; + unsigned long next_fp, next_pc; bt_int_reg_t next_frame[2]; if (state->fp == -1) { diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index dbc213adf5e..49120843ff9 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -16,7 +16,6 @@ #define __SYSCALL_COMPAT #include <linux/compat.h> -#include <linux/msg.h> #include <linux/syscalls.h> #include <linux/kdev_t.h> #include <linux/fs.h> @@ -33,132 +32,69 @@ * adapt the usual convention. */ -long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE4(truncate64, char __user *, filename, u32, dummy, + u32, low, u32, high) { return sys_truncate(filename, ((loff_t)high << 32) | low); } -long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE4(ftruncate64, unsigned int, fd, u32, dummy, + u32, low, u32, high) { return sys_ftruncate(fd, ((loff_t)high << 32) | low); } -long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count, - u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE6(pread64, unsigned int, fd, char __user *, ubuf, + size_t, count, u32, dummy, u32, low, u32, high) { return sys_pread64(fd, ubuf, count, ((loff_t)high << 32) | low); } -long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count, - u32 dummy, u32 low, u32 high) +COMPAT_SYSCALL_DEFINE6(pwrite64, unsigned int, fd, char __user *, ubuf, + size_t, count, u32, dummy, u32, low, u32, high) { return sys_pwrite64(fd, ubuf, count, ((loff_t)high << 32) | low); } -long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len) -{ - return sys_lookup_dcookie(((loff_t)high << 32) | low, buf, len); -} - -long compat_sys_sync_file_range2(int fd, unsigned int flags, - u32 offset_lo, u32 offset_hi, - u32 nbytes_lo, u32 nbytes_hi) +COMPAT_SYSCALL_DEFINE6(sync_file_range2, int, fd, unsigned int, flags, + u32, offset_lo, u32, offset_hi, + u32, nbytes_lo, u32, nbytes_hi) { return sys_sync_file_range(fd, ((loff_t)offset_hi << 32) | offset_lo, ((loff_t)nbytes_hi << 32) | nbytes_lo, flags); } -long compat_sys_fallocate(int fd, int mode, - u32 offset_lo, u32 offset_hi, - u32 len_lo, u32 len_hi) +COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, + u32, offset_lo, u32, offset_hi, + u32, len_lo, u32, len_hi) { return sys_fallocate(fd, mode, ((loff_t)offset_hi << 32) | offset_lo, ((loff_t)len_hi << 32) | len_lo); } - - -long compat_sys_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval) -{ - struct timespec t; - int ret; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - ret = sys_sched_rr_get_interval(pid, - (struct timespec __force __user *)&t); - set_fs(old_fs); - if (put_compat_timespec(&t, interval)) - return -EFAULT; - return ret; -} - /* - * The usual compat_sys_msgsnd() and _msgrcv() seem to be assuming - * some different calling convention than our normal 32-bit tile code. + * Avoid bug in generic sys_llseek() that specifies offset_high and + * offset_low as "unsigned long", thus making it possible to pass + * a sign-extended high 32 bits in offset_low. */ - -/* Already defined in ipc/compat.c, but we need it here. */ -struct compat_msgbuf { - compat_long_t mtype; - char mtext[1]; -}; - -long tile_compat_sys_msgsnd(int msqid, - struct compat_msgbuf __user *msgp, - size_t msgsz, int msgflg) -{ - compat_long_t mtype; - - if (get_user(mtype, &msgp->mtype)) - return -EFAULT; - return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); -} - -long tile_compat_sys_msgrcv(int msqid, - struct compat_msgbuf __user *msgp, - size_t msgsz, long msgtyp, int msgflg) +COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high, + unsigned int, offset_low, loff_t __user *, result, + unsigned int, origin) { - long err, mtype; - - err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg); - if (err < 0) - goto out; - - if (put_user(mtype, &msgp->mtype)) - err = -EFAULT; - out: - return err; + return sys_llseek(fd, offset_high, offset_low, result, origin); } /* Provide the compat syscall number to call mapping. */ #undef __SYSCALL -#define __SYSCALL(nr, call) [nr] = (compat_##call), - -/* The generic versions of these don't work for Tile. */ -#define compat_sys_msgrcv tile_compat_sys_msgrcv -#define compat_sys_msgsnd tile_compat_sys_msgsnd +#define __SYSCALL(nr, call) [nr] = (call), /* See comments in sys.c */ -#define compat_sys_fadvise64 sys32_fadvise64 #define compat_sys_fadvise64_64 sys32_fadvise64_64 #define compat_sys_readahead sys32_readahead -#define compat_sys_sync_file_range compat_sys_sync_file_range2 - -/* We leverage the "struct stat64" type for 32-bit time_t/nsec. */ -#define compat_sys_stat64 sys_stat64 -#define compat_sys_lstat64 sys_lstat64 -#define compat_sys_fstat64 sys_fstat64 -#define compat_sys_fstatat64 sys_fstatat64 - -/* The native sys_ptrace dynamically handles compat binaries. */ -#define compat_sys_ptrace sys_ptrace +#define sys_llseek compat_sys_llseek -/* Call the trampolines to manage pt_regs where necessary. */ -#define compat_sys_execve _compat_sys_execve -#define compat_sys_sigaltstack _compat_sys_sigaltstack +/* Call the assembly trampolines where necessary. */ #define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn #define sys_clone _sys_clone diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index dbb0dfc7bec..19c04b5ce40 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -32,21 +32,9 @@ #include <asm/ucontext.h> #include <asm/sigframe.h> #include <asm/syscalls.h> +#include <asm/vdso.h> #include <arch/interrupts.h> -struct compat_sigaction { - compat_uptr_t sa_handler; - compat_ulong_t sa_flags; - compat_uptr_t sa_restorer; - sigset_t sa_mask __packed; -}; - -struct compat_sigaltstack { - compat_uptr_t ss_sp; - int ss_flags; - compat_size_t ss_size; -}; - struct compat_ucontext { compat_ulong_t uc_flags; compat_uptr_t uc_link; @@ -55,129 +43,13 @@ struct compat_ucontext { sigset_t uc_sigmask; /* mask last for extensibility */ }; -#define COMPAT_SI_PAD_SIZE ((SI_MAX_SIZE - 3 * sizeof(int)) / sizeof(int)) - -struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[COMPAT_SI_PAD_SIZE]; - - /* kill() */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ - int _overrun_incr; /* amount to add to overrun */ - } _timer; - - /* POSIX.1b signals */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - unsigned int _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - unsigned int _addr; /* faulting insn/memory ref. */ -#ifdef __ARCH_SI_TRAPNO - int _trapno; /* TRAP # which caused the signal */ -#endif - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - } _sifields; -}; - struct compat_rt_sigframe { unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */ struct compat_siginfo info; struct compat_ucontext uc; }; -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - -long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act, - struct compat_sigaction __user *oact, - size_t sigsetsize) -{ - struct k_sigaction new_sa, old_sa; - int ret = -EINVAL; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - goto out; - - if (act) { - compat_uptr_t handler, restorer; - - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(handler, &act->sa_handler) || - __get_user(new_sa.sa.sa_flags, &act->sa_flags) || - __get_user(restorer, &act->sa_restorer) || - __copy_from_user(&new_sa.sa.sa_mask, &act->sa_mask, - sizeof(sigset_t))) - return -EFAULT; - new_sa.sa.sa_handler = compat_ptr(handler); - new_sa.sa.sa_restorer = compat_ptr(restorer); - } - - ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL); - - if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(ptr_to_compat(old_sa.sa.sa_handler), - &oact->sa_handler) || - __put_user(ptr_to_compat(old_sa.sa.sa_restorer), - &oact->sa_restorer) || - __put_user(old_sa.sa.sa_flags, &oact->sa_flags) || - __copy_to_user(&oact->sa_mask, &old_sa.sa.sa_mask, - sizeof(sigset_t))) - return -EFAULT; - } -out: - return ret; -} - -long compat_sys_rt_sigqueueinfo(int pid, int sig, - struct compat_siginfo __user *uinfo) -{ - siginfo_t info; - int ret; - mm_segment_t old_fs = get_fs(); - - if (copy_siginfo_from_user32(&info, uinfo)) - return -EFAULT; - set_fs(KERNEL_DS); - ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force __user *)&info); - set_fs(old_fs); - return ret; -} - -int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from) +int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from) { int err; @@ -255,44 +127,10 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) return err; } -long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, - struct compat_sigaltstack __user *uoss_ptr, - struct pt_regs *regs) -{ - stack_t uss, uoss; - int ret; - mm_segment_t seg; - - if (uss_ptr) { - u32 ptr; - - memset(&uss, 0, sizeof(stack_t)); - if (!access_ok(VERIFY_READ, uss_ptr, sizeof(*uss_ptr)) || - __get_user(ptr, &uss_ptr->ss_sp) || - __get_user(uss.ss_flags, &uss_ptr->ss_flags) || - __get_user(uss.ss_size, &uss_ptr->ss_size)) - return -EFAULT; - uss.ss_sp = compat_ptr(ptr); - } - seg = get_fs(); - set_fs(KERNEL_DS); - ret = do_sigaltstack(uss_ptr ? (stack_t __user __force *)&uss : NULL, - (stack_t __user __force *)&uoss, - (unsigned long)compat_ptr(regs->sp)); - set_fs(seg); - if (ret >= 0 && uoss_ptr) { - if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(*uoss_ptr)) || - __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) || - __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) || - __put_user(uoss.ss_size, &uoss_ptr->ss_size)) - ret = -EFAULT; - } - return ret; -} - /* The assembly shim for this function arranges to ignore the return value. */ -long compat_sys_rt_sigreturn(struct pt_regs *regs) +long compat_sys_rt_sigreturn(void) { + struct pt_regs *regs = current_pt_regs(); struct compat_rt_sigframe __user *frame = (struct compat_rt_sigframe __user *) compat_ptr(regs->sp); sigset_t set; @@ -302,22 +140,18 @@ long compat_sys_rt_sigreturn(struct pt_regs *regs) if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; - if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0) + if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; return 0; badframe: - force_sig(SIGSEGV, current); + signal_fault("bad sigreturn frame", regs, frame, 0); return 0; } @@ -388,17 +222,13 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __clear_user(&frame->save_area, sizeof(frame->save_area)); err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(ptr_to_compat((void *)(current->sas_ss_sp)), - &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); err |= setup_sigcontext(&frame->uc.uc_mcontext, regs); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) goto give_sigsegv; - restorer = VDSO_BASE; + restorer = VDSO_SYM(&__vdso_rt_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = ptr_to_compat_reg(ka->sa.sa_restorer); @@ -406,31 +236,20 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Set up registers for signal handler. * Registers that we don't modify keep the value they had from * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). */ regs->pc = ptr_to_compat_reg(ka->sa.sa_handler); regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */ regs->sp = ptr_to_compat_reg(frame); regs->lr = restorer; regs->regs[0] = (unsigned long) usig; - - if (ka->sa.sa_flags & SA_SIGINFO) { - /* Need extra arguments, so mark to restore caller-saves. */ - regs->regs[1] = ptr_to_compat_reg(&frame->info); - regs->regs[2] = ptr_to_compat_reg(&frame->uc); - regs->flags |= PT_FLAGS_CALLER_SAVES; - } - - /* - * Notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - + regs->regs[1] = ptr_to_compat_reg(&frame->info); + regs->regs[2] = ptr_to_compat_reg(&frame->uc); + regs->flags |= PT_FLAGS_CALLER_SAVES; return 0; give_sigsegv: - force_sigsegv(sig, current); + signal_fault("bad setup frame", regs, frame, sig); return -EFAULT; } diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c index 493a0e66d91..b608e00e7f6 100644 --- a/arch/tile/kernel/early_printk.c +++ b/arch/tile/kernel/early_printk.c @@ -16,41 +16,31 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/string.h> +#include <linux/irqflags.h> +#include <linux/printk.h> #include <asm/setup.h> #include <hv/hypervisor.h> static void early_hv_write(struct console *con, const char *s, unsigned n) { - hv_console_write((HV_VirtAddr) s, n); + tile_console_write(s, n); + + /* + * Convert NL to NLCR (close enough to CRNL) during early boot. + * We assume newlines are at the ends of strings, which turns out + * to be good enough for early boot console output. + */ + if (n && s[n-1] == '\n') + tile_console_write("\r", 1); } static struct console early_hv_console = { .name = "earlyhv", .write = early_hv_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_BOOT, .index = -1, }; -/* Direct interface for emergencies */ -static struct console *early_console = &early_hv_console; -static int early_console_initialized; -static int early_console_complete; - -static void early_vprintk(const char *fmt, va_list ap) -{ - char buf[512]; - int n = vscnprintf(buf, sizeof(buf), fmt, ap); - early_console->write(early_console, buf, n); -} - -void early_printk(const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - early_vprintk(fmt, ap); - va_end(ap); -} - void early_panic(const char *fmt, ...) { va_list ap; @@ -58,52 +48,21 @@ void early_panic(const char *fmt, ...) va_start(ap, fmt); early_printk("Kernel panic - not syncing: "); early_vprintk(fmt, ap); - early_console->write(early_console, "\n", 1); + early_printk("\n"); va_end(ap); dump_stack(); hv_halt(); } -static int __initdata keep_early; - static int __init setup_early_printk(char *str) { - if (early_console_initialized) + if (early_console) return 1; - if (str != NULL && strncmp(str, "keep", 4) == 0) - keep_early = 1; - early_console = &early_hv_console; - early_console_initialized = 1; register_console(early_console); return 0; } -void __init disable_early_printk(void) -{ - early_console_complete = 1; - if (!early_console_initialized || !early_console) - return; - if (!keep_early) { - early_printk("disabling early console\n"); - unregister_console(early_console); - early_console_initialized = 0; - } else { - early_printk("keeping early console\n"); - } -} - -void warn_early_printk(void) -{ - if (early_console_complete || early_console_initialized) - return; - early_printk("\ -Machine shutting down before console output is fully initialized.\n\ -You may wish to reboot and add the option 'earlyprintk' to your\n\ -boot command line to see any diagnostic early console output.\n\ -"); -} - early_param("earlyprintk", setup_early_printk); diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 431e9ae6048..3d9175992a2 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -27,33 +27,6 @@ STD_ENTRY(current_text_addr) { move r0, lr; jrp lr } STD_ENDPROC(current_text_addr) -/* - * Implement execve(). The i386 code has a note that forking from kernel - * space results in no copy on write until the execve, so we should be - * careful not to write to the stack here. - */ -STD_ENTRY(kernel_execve) - moveli TREG_SYSCALL_NR_NAME, __NR_execve - swint1 - jrp lr - STD_ENDPROC(kernel_execve) - -/* - * We don't run this function directly, but instead copy it to a page - * we map into every user process. See vdso_setup(). - * - * Note that libc has a copy of this function that it uses to compare - * against the PC when a stack backtrace ends, so if this code is - * changed, the libc implementation(s) should also be updated. - */ - .pushsection .data -ENTRY(__rt_sigreturn) - moveli TREG_SYSCALL_NR_NAME,__NR_rt_sigreturn - swint1 - ENDPROC(__rt_sigreturn) - ENTRY(__rt_sigreturn_end) - .popsection - STD_ENTRY(dump_stack) { move r2, lr; lnk r1 } { move r4, r52; addli r1, r1, dump_stack - . } @@ -68,23 +41,10 @@ STD_ENTRY(KBacktraceIterator_init_current) jrp lr /* keep backtracer happy */ STD_ENDPROC(KBacktraceIterator_init_current) -/* - * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then - * free the old stack (passed in r0) and re-invoke cpu_idle(). - * We update sp and ksp0 simultaneously to avoid backtracer warnings. - */ -STD_ENTRY(cpu_idle_on_new_stack) - { - move sp, r1 - mtspr SPR_SYSTEM_SAVE_K_0, r2 - } - jal free_thread_info - j cpu_idle - STD_ENDPROC(cpu_idle_on_new_stack) - /* Loop forever on a nap during SMP boot. */ STD_ENTRY(smp_nap) nap + nop /* avoid provoking the icache prefetch with a jump */ j smp_nap /* we are not architecturally guaranteed not to exit nap */ jrp lr /* clue in the backtracer */ STD_ENDPROC(smp_nap) @@ -99,11 +59,13 @@ STD_ENTRY(smp_nap) */ STD_ENTRY(_cpu_idle) movei r1, 1 + IRQ_ENABLE_LOAD(r2, r3) mtspr INTERRUPT_CRITICAL_SECTION, r1 - IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ + IRQ_ENABLE_APPLY(r2, r3) /* unmask, but still with ICS set */ mtspr INTERRUPT_CRITICAL_SECTION, zero .global _cpu_idle_nap _cpu_idle_nap: nap + nop /* avoid provoking the icache prefetch with a jump */ jrp lr STD_ENDPROC(_cpu_idle) diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c new file mode 100644 index 00000000000..8d52d83cc51 --- /dev/null +++ b/arch/tile/kernel/ftrace.c @@ -0,0 +1,244 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE-Gx specific ftrace support + */ + +#include <linux/ftrace.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/ftrace.h> +#include <asm/sections.h> + +#include <arch/opcode.h> + +#ifdef CONFIG_DYNAMIC_FTRACE + +static inline tilegx_bundle_bits NOP(void) +{ + return create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) | + create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | + create_Opcode_X0(RRR_0_OPCODE_X0) | + create_UnaryOpcodeExtension_X1(NOP_UNARY_OPCODE_X1) | + create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | + create_Opcode_X1(RRR_0_OPCODE_X1); +} + +static int machine_stopped __read_mostly; + +int ftrace_arch_code_modify_prepare(void) +{ + machine_stopped = 1; + return 0; +} + +int ftrace_arch_code_modify_post_process(void) +{ + flush_icache_range(0, CHIP_L1I_CACHE_SIZE()); + machine_stopped = 0; + return 0; +} + +/* + * Put { move r10, lr; jal ftrace_caller } in a bundle, this lets dynamic + * tracer just add one cycle overhead to every kernel function when disabled. + */ +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) +{ + tilegx_bundle_bits opcode_x0, opcode_x1; + long pcrel_by_instr = (addr - pc) >> TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES; + + if (link) { + /* opcode: jal addr */ + opcode_x1 = + create_Opcode_X1(JUMP_OPCODE_X1) | + create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | + create_JumpOff_X1(pcrel_by_instr); + } else { + /* opcode: j addr */ + opcode_x1 = + create_Opcode_X1(JUMP_OPCODE_X1) | + create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | + create_JumpOff_X1(pcrel_by_instr); + } + + if (addr == FTRACE_ADDR) { + /* opcode: or r10, lr, zero */ + opcode_x0 = + create_Dest_X0(10) | + create_SrcA_X0(TREG_LR) | + create_SrcB_X0(TREG_ZERO) | + create_RRROpcodeExtension_X0(OR_RRR_0_OPCODE_X0) | + create_Opcode_X0(RRR_0_OPCODE_X0); + } else { + /* opcode: fnop */ + opcode_x0 = + create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) | + create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | + create_Opcode_X0(RRR_0_OPCODE_X0); + } + + return opcode_x1 | opcode_x0; +} + +static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) +{ + return NOP(); +} + +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + return ftrace_gen_branch(pc, addr, true); +} + +static int ftrace_modify_code(unsigned long pc, unsigned long old, + unsigned long new) +{ + unsigned long pc_wr; + + /* Check if the address is in kernel text space and module space. */ + if (!kernel_text_address(pc)) + return -EINVAL; + + /* Operate on writable kernel text mapping. */ + pc_wr = pc - MEM_SV_START + PAGE_OFFSET; + + if (probe_kernel_write((void *)pc_wr, &new, MCOUNT_INSN_SIZE)) + return -EPERM; + + smp_wmb(); + + if (!machine_stopped && num_online_cpus() > 1) + flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); + + return 0; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc, old; + unsigned long new; + int ret; + + pc = (unsigned long)&ftrace_call; + memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(pc, (unsigned long)func); + + ret = ftrace_modify_code(pc, old, new); + + return ret; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long new, old; + unsigned long ip = rec->ip; + + old = ftrace_nop_replace(rec); + new = ftrace_call_replace(ip, addr); + + return ftrace_modify_code(rec->ip, old, new); +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + unsigned long old; + unsigned long new; + int ret; + + old = ftrace_call_replace(ip, addr); + new = ftrace_nop_replace(rec); + ret = ftrace_modify_code(ip, old, new); + + return ret; +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long) &return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + *parent = return_hooker; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; + +static int __ftrace_modify_caller(unsigned long *callsite, + void (*func) (void), bool enable) +{ + unsigned long caller_fn = (unsigned long) func; + unsigned long pc = (unsigned long) callsite; + unsigned long branch = ftrace_gen_branch(pc, caller_fn, false); + unsigned long nop = NOP(); + unsigned long old = enable ? nop : branch; + unsigned long new = enable ? branch : nop; + + return ftrace_modify_code(pc, old, new); +} + +static int ftrace_modify_graph_caller(bool enable) +{ + int ret; + + ret = __ftrace_modify_caller(&ftrace_graph_call, + ftrace_graph_caller, + enable); + + return ret; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index e910530436e..531f4c36535 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -33,50 +33,157 @@ /* - * This data structure tracks the rectangle data, etc., associated - * one-to-one with a "struct file *" from opening HARDWALL_FILE. + * Implement a per-cpu "hardwall" resource class such as UDN or IPI. + * We use "hardwall" nomenclature throughout for historical reasons. + * The lock here controls access to the list data structure as well as + * to the items on the list. + */ +struct hardwall_type { + int index; + int is_xdn; + int is_idn; + int disabled; + const char *name; + struct list_head list; + spinlock_t lock; + struct proc_dir_entry *proc_dir; +}; + +enum hardwall_index { + HARDWALL_UDN = 0, +#ifndef __tilepro__ + HARDWALL_IDN = 1, + HARDWALL_IPI = 2, +#endif + _HARDWALL_TYPES +}; + +static struct hardwall_type hardwall_types[] = { + { /* user-space access to UDN */ + 0, + 1, + 0, + 0, + "udn", + LIST_HEAD_INIT(hardwall_types[HARDWALL_UDN].list), + __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_UDN].lock), + NULL + }, +#ifndef __tilepro__ + { /* user-space access to IDN */ + 1, + 1, + 1, + 1, /* disabled pending hypervisor support */ + "idn", + LIST_HEAD_INIT(hardwall_types[HARDWALL_IDN].list), + __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_IDN].lock), + NULL + }, + { /* access to user-space IPI */ + 2, + 0, + 0, + 0, + "ipi", + LIST_HEAD_INIT(hardwall_types[HARDWALL_IPI].list), + __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_IPI].lock), + NULL + }, +#endif +}; + +/* + * This data structure tracks the cpu data, etc., associated + * one-to-one with a "struct file *" from opening a hardwall device file. * Note that the file's private data points back to this structure. */ struct hardwall_info { - struct list_head list; /* "rectangles" list */ + struct list_head list; /* for hardwall_types.list */ struct list_head task_head; /* head of tasks in this hardwall */ + struct hardwall_type *type; /* type of this resource */ + struct cpumask cpumask; /* cpus reserved */ + int id; /* integer id for this hardwall */ + int teardown_in_progress; /* are we tearing this one down? */ + + /* Remaining fields only valid for user-network resources. */ int ulhc_x; /* upper left hand corner x coord */ int ulhc_y; /* upper left hand corner y coord */ int width; /* rectangle width */ int height; /* rectangle height */ - int teardown_in_progress; /* are we tearing this one down? */ +#if CHIP_HAS_REV1_XDN() + atomic_t xdn_pending_count; /* cores in phase 1 of drain */ +#endif }; -/* Currently allocated hardwall rectangles */ -static LIST_HEAD(rectangles); -/* - * Guard changes to the hardwall data structures. - * This could be finer grained (e.g. one lock for the list of hardwall - * rectangles, then separate embedded locks for each one's list of tasks), - * but there are subtle correctness issues when trying to start with - * a task's "hardwall" pointer and lock the correct rectangle's embedded - * lock in the presence of a simultaneous deactivation, so it seems - * easier to have a single lock, given that none of these data - * structures are touched very frequently during normal operation. - */ -static DEFINE_SPINLOCK(hardwall_lock); +/* /proc/tile/hardwall */ +static struct proc_dir_entry *hardwall_proc_dir; + +/* Functions to manage files in /proc/tile/hardwall. */ +static void hardwall_add_proc(struct hardwall_info *); +static void hardwall_remove_proc(struct hardwall_info *); /* Allow disabling UDN access. */ -static int udn_disabled; static int __init noudn(char *str) { pr_info("User-space UDN access is disabled\n"); - udn_disabled = 1; + hardwall_types[HARDWALL_UDN].disabled = 1; return 0; } early_param("noudn", noudn); +#ifndef __tilepro__ +/* Allow disabling IDN access. */ +static int __init noidn(char *str) +{ + pr_info("User-space IDN access is disabled\n"); + hardwall_types[HARDWALL_IDN].disabled = 1; + return 0; +} +early_param("noidn", noidn); + +/* Allow disabling IPI access. */ +static int __init noipi(char *str) +{ + pr_info("User-space IPI access is disabled\n"); + hardwall_types[HARDWALL_IPI].disabled = 1; + return 0; +} +early_param("noipi", noipi); +#endif + /* - * Low-level primitives + * Low-level primitives for UDN/IDN */ +#ifdef __tilepro__ +#define mtspr_XDN(hwt, name, val) \ + do { (void)(hwt); __insn_mtspr(SPR_UDN_##name, (val)); } while (0) +#define mtspr_MPL_XDN(hwt, name, val) \ + do { (void)(hwt); __insn_mtspr(SPR_MPL_UDN_##name, (val)); } while (0) +#define mfspr_XDN(hwt, name) \ + ((void)(hwt), __insn_mfspr(SPR_UDN_##name)) +#else +#define mtspr_XDN(hwt, name, val) \ + do { \ + if ((hwt)->is_idn) \ + __insn_mtspr(SPR_IDN_##name, (val)); \ + else \ + __insn_mtspr(SPR_UDN_##name, (val)); \ + } while (0) +#define mtspr_MPL_XDN(hwt, name, val) \ + do { \ + if ((hwt)->is_idn) \ + __insn_mtspr(SPR_MPL_IDN_##name, (val)); \ + else \ + __insn_mtspr(SPR_MPL_UDN_##name, (val)); \ + } while (0) +#define mfspr_XDN(hwt, name) \ + ((hwt)->is_idn ? __insn_mfspr(SPR_IDN_##name) : __insn_mfspr(SPR_UDN_##name)) +#endif + /* Set a CPU bit if the CPU is online. */ #define cpu_online_set(cpu, dst) do { \ if (cpu_online(cpu)) \ @@ -92,7 +199,7 @@ static int contains(struct hardwall_info *r, int x, int y) } /* Compute the rectangle parameters and validate the cpumask. */ -static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) +static int check_rectangle(struct hardwall_info *r, struct cpumask *mask) { int x, y, cpu, ulhc, lrhc; @@ -117,7 +224,7 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) return -EINVAL; /* - * Note that offline cpus can't be drained when this UDN + * Note that offline cpus can't be drained when this user network * rectangle eventually closes. We used to detect this * situation and print a warning, but it annoyed users and * they ignored it anyway, so now we just return without a @@ -126,16 +233,6 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) return 0; } -/* Do the two given rectangles overlap on any cpu? */ -static int overlaps(struct hardwall_info *a, struct hardwall_info *b) -{ - return a->ulhc_x + a->width > b->ulhc_x && /* A not to the left */ - b->ulhc_x + b->width > a->ulhc_x && /* B not to the left */ - a->ulhc_y + a->height > b->ulhc_y && /* A not above */ - b->ulhc_y + b->height > a->ulhc_y; /* B not above */ -} - - /* * Hardware management of hardwall setup, teardown, trapping, * and enabling/disabling PL0 access to the networks. @@ -146,26 +243,38 @@ enum direction_protect { N_PROTECT = (1 << 0), E_PROTECT = (1 << 1), S_PROTECT = (1 << 2), - W_PROTECT = (1 << 3) + W_PROTECT = (1 << 3), + C_PROTECT = (1 << 4), }; -static void enable_firewall_interrupts(void) +static inline int xdn_which_interrupt(struct hardwall_type *hwt) { - arch_local_irq_unmask_now(INT_UDN_FIREWALL); +#ifndef __tilepro__ + if (hwt->is_idn) + return INT_IDN_FIREWALL; +#endif + return INT_UDN_FIREWALL; } -static void disable_firewall_interrupts(void) +static void enable_firewall_interrupts(struct hardwall_type *hwt) { - arch_local_irq_mask_now(INT_UDN_FIREWALL); + arch_local_irq_unmask_now(xdn_which_interrupt(hwt)); +} + +static void disable_firewall_interrupts(struct hardwall_type *hwt) +{ + arch_local_irq_mask_now(xdn_which_interrupt(hwt)); } /* Set up hardwall on this cpu based on the passed hardwall_info. */ -static void hardwall_setup_ipi_func(void *info) +static void hardwall_setup_func(void *info) { struct hardwall_info *r = info; - int cpu = smp_processor_id(); - int x = cpu % smp_width; - int y = cpu / smp_width; + struct hardwall_type *hwt = r->type; + + int cpu = smp_processor_id(); /* on_each_cpu disables preemption */ + int x = cpu_x(cpu); + int y = cpu_y(cpu); int bits = 0; if (x == r->ulhc_x) bits |= W_PROTECT; @@ -176,13 +285,12 @@ static void hardwall_setup_ipi_func(void *info) if (y == r->ulhc_y + r->height - 1) bits |= S_PROTECT; BUG_ON(bits == 0); - __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, bits); - enable_firewall_interrupts(); - + mtspr_XDN(hwt, DIRECTION_PROTECT, bits); + enable_firewall_interrupts(hwt); } /* Set up all cpus on edge of rectangle to enable/disable hardwall SPRs. */ -static void hardwall_setup(struct hardwall_info *r) +static void hardwall_protect_rectangle(struct hardwall_info *r) { int x, y, cpu, delta; struct cpumask rect_cpus; @@ -206,37 +314,50 @@ static void hardwall_setup(struct hardwall_info *r) } /* Then tell all the cpus to set up their protection SPR */ - on_each_cpu_mask(&rect_cpus, hardwall_setup_ipi_func, r, 1); + on_each_cpu_mask(&rect_cpus, hardwall_setup_func, r, 1); } +/* Entered from INT_xDN_FIREWALL interrupt vector with irqs disabled. */ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) { struct hardwall_info *rect; + struct hardwall_type *hwt; struct task_struct *p; struct siginfo info; - int x, y; int cpu = smp_processor_id(); int found_processes; - unsigned long flags; - struct pt_regs *old_regs = set_irq_regs(regs); + irq_enter(); + /* Figure out which network trapped. */ + switch (fault_num) { +#ifndef __tilepro__ + case INT_IDN_FIREWALL: + hwt = &hardwall_types[HARDWALL_IDN]; + break; +#endif + case INT_UDN_FIREWALL: + hwt = &hardwall_types[HARDWALL_UDN]; + break; + default: + BUG(); + } + BUG_ON(hwt->disabled); + /* This tile trapped a network access; find the rectangle. */ - x = cpu % smp_width; - y = cpu / smp_width; - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry(rect, &rectangles, list) { - if (contains(rect, x, y)) + spin_lock(&hwt->lock); + list_for_each_entry(rect, &hwt->list, list) { + if (cpumask_test_cpu(cpu, &rect->cpumask)) break; } /* * It shouldn't be possible not to find this cpu on the * rectangle list, since only cpus in rectangles get hardwalled. - * The hardwall is only removed after the UDN is drained. + * The hardwall is only removed after the user network is drained. */ - BUG_ON(&rect->list == &rectangles); + BUG_ON(&rect->list == &hwt->list); /* * If we already started teardown on this hardwall, don't worry; @@ -244,43 +365,43 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) * to quiesce. */ if (rect->teardown_in_progress) { - pr_notice("cpu %d: detected hardwall violation %#lx" + pr_notice("cpu %d: detected %s hardwall violation %#lx" " while teardown already in progress\n", - cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT)); + cpu, hwt->name, + (long)mfspr_XDN(hwt, DIRECTION_PROTECT)); goto done; } /* * Kill off any process that is activated in this rectangle. * We bypass security to deliver the signal, since it must be - * one of the activated processes that generated the UDN + * one of the activated processes that generated the user network * message that caused this trap, and all the activated * processes shared a single open file so are pretty tightly * bound together from a security point of view to begin with. */ rect->teardown_in_progress = 1; wmb(); /* Ensure visibility of rectangle before notifying processes. */ - pr_notice("cpu %d: detected hardwall violation %#lx...\n", - cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT)); + pr_notice("cpu %d: detected %s hardwall violation %#lx...\n", + cpu, hwt->name, (long)mfspr_XDN(hwt, DIRECTION_PROTECT)); info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_HARDWALL; found_processes = 0; - list_for_each_entry(p, &rect->task_head, thread.hardwall_list) { - BUG_ON(p->thread.hardwall != rect); - if (p->sighand) { + list_for_each_entry(p, &rect->task_head, + thread.hardwall[hwt->index].list) { + BUG_ON(p->thread.hardwall[hwt->index].info != rect); + if (!(p->flags & PF_EXITING)) { found_processes = 1; pr_notice("hardwall: killing %d\n", p->pid); - spin_lock(&p->sighand->siglock); - __group_send_sig_info(info.si_signo, &info, p); - spin_unlock(&p->sighand->siglock); + do_send_sig_info(info.si_signo, &info, p, false); } } if (!found_processes) pr_notice("hardwall: no associated processes!\n"); done: - spin_unlock_irqrestore(&hardwall_lock, flags); + spin_unlock(&hwt->lock); /* * We have to disable firewall interrupts now, or else when we @@ -289,48 +410,87 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) * haven't yet drained the network, and that would allow packets * to cross out of the hardwall region. */ - disable_firewall_interrupts(); + disable_firewall_interrupts(hwt); irq_exit(); set_irq_regs(old_regs); } -/* Allow access from user space to the UDN. */ -void grant_network_mpls(void) +/* Allow access from user space to the user network. */ +void grant_hardwall_mpls(struct hardwall_type *hwt) { - __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_TIMER_SET_0, 1); +#ifndef __tilepro__ + if (!hwt->is_xdn) { + __insn_mtspr(SPR_MPL_IPI_0_SET_0, 1); + return; + } +#endif + mtspr_MPL_XDN(hwt, ACCESS_SET_0, 1); + mtspr_MPL_XDN(hwt, AVAIL_SET_0, 1); + mtspr_MPL_XDN(hwt, COMPLETE_SET_0, 1); + mtspr_MPL_XDN(hwt, TIMER_SET_0, 1); #if !CHIP_HAS_REV1_XDN() - __insn_mtspr(SPR_MPL_UDN_REFILL_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_CA_SET_0, 1); + mtspr_MPL_XDN(hwt, REFILL_SET_0, 1); + mtspr_MPL_XDN(hwt, CA_SET_0, 1); #endif } -/* Deny access from user space to the UDN. */ -void restrict_network_mpls(void) +/* Deny access from user space to the user network. */ +void restrict_hardwall_mpls(struct hardwall_type *hwt) { - __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_TIMER_SET_1, 1); +#ifndef __tilepro__ + if (!hwt->is_xdn) { + __insn_mtspr(SPR_MPL_IPI_0_SET_1, 1); + return; + } +#endif + mtspr_MPL_XDN(hwt, ACCESS_SET_1, 1); + mtspr_MPL_XDN(hwt, AVAIL_SET_1, 1); + mtspr_MPL_XDN(hwt, COMPLETE_SET_1, 1); + mtspr_MPL_XDN(hwt, TIMER_SET_1, 1); #if !CHIP_HAS_REV1_XDN() - __insn_mtspr(SPR_MPL_UDN_REFILL_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_CA_SET_1, 1); + mtspr_MPL_XDN(hwt, REFILL_SET_1, 1); + mtspr_MPL_XDN(hwt, CA_SET_1, 1); #endif } +/* Restrict or deny as necessary for the task we're switching to. */ +void hardwall_switch_tasks(struct task_struct *prev, + struct task_struct *next) +{ + int i; + for (i = 0; i < HARDWALL_TYPES; ++i) { + if (prev->thread.hardwall[i].info != NULL) { + if (next->thread.hardwall[i].info == NULL) + restrict_hardwall_mpls(&hardwall_types[i]); + } else if (next->thread.hardwall[i].info != NULL) { + grant_hardwall_mpls(&hardwall_types[i]); + } + } +} + +/* Does this task have the right to IPI the given cpu? */ +int hardwall_ipi_valid(int cpu) +{ +#ifdef __tilegx__ + struct hardwall_info *info = + current->thread.hardwall[HARDWALL_IPI].info; + return info && cpumask_test_cpu(cpu, &info->cpumask); +#else + return 0; +#endif +} /* - * Code to create, activate, deactivate, and destroy hardwall rectangles. + * Code to create, activate, deactivate, and destroy hardwall resources. */ -/* Create a hardwall for the given rectangle */ -static struct hardwall_info *hardwall_create( - size_t size, const unsigned char __user *bits) +/* Create a hardwall for the given resource */ +static struct hardwall_info *hardwall_create(struct hardwall_type *hwt, + size_t size, + const unsigned char __user *bits) { - struct hardwall_info *iter, *rect; + struct hardwall_info *iter, *info; struct cpumask mask; unsigned long flags; int rc; @@ -361,52 +521,70 @@ static struct hardwall_info *hardwall_create( } } - /* Allocate a new rectangle optimistically. */ - rect = kmalloc(sizeof(struct hardwall_info), + /* Allocate a new hardwall_info optimistically. */ + info = kmalloc(sizeof(struct hardwall_info), GFP_KERNEL | __GFP_ZERO); - if (rect == NULL) + if (info == NULL) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&rect->task_head); + INIT_LIST_HEAD(&info->task_head); + info->type = hwt; /* Compute the rectangle size and validate that it's plausible. */ - rc = setup_rectangle(rect, &mask); - if (rc != 0) { - kfree(rect); - return ERR_PTR(rc); + cpumask_copy(&info->cpumask, &mask); + info->id = find_first_bit(cpumask_bits(&mask), nr_cpumask_bits); + if (hwt->is_xdn) { + rc = check_rectangle(info, &mask); + if (rc != 0) { + kfree(info); + return ERR_PTR(rc); + } } + /* + * Eliminate cpus that are not part of this Linux client. + * Note that this allows for configurations that we might not want to + * support, such as one client on every even cpu, another client on + * every odd cpu. + */ + cpumask_and(&info->cpumask, &info->cpumask, cpu_online_mask); + /* Confirm it doesn't overlap and add it to the list. */ - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry(iter, &rectangles, list) { - if (overlaps(iter, rect)) { - spin_unlock_irqrestore(&hardwall_lock, flags); - kfree(rect); + spin_lock_irqsave(&hwt->lock, flags); + list_for_each_entry(iter, &hwt->list, list) { + if (cpumask_intersects(&iter->cpumask, &info->cpumask)) { + spin_unlock_irqrestore(&hwt->lock, flags); + kfree(info); return ERR_PTR(-EBUSY); } } - list_add_tail(&rect->list, &rectangles); - spin_unlock_irqrestore(&hardwall_lock, flags); + list_add_tail(&info->list, &hwt->list); + spin_unlock_irqrestore(&hwt->lock, flags); /* Set up appropriate hardwalling on all affected cpus. */ - hardwall_setup(rect); + if (hwt->is_xdn) + hardwall_protect_rectangle(info); - return rect; + /* Create a /proc/tile/hardwall entry. */ + hardwall_add_proc(info); + + return info; } /* Activate a given hardwall on this cpu for this process. */ -static int hardwall_activate(struct hardwall_info *rect) +static int hardwall_activate(struct hardwall_info *info) { - int cpu, x, y; + int cpu; unsigned long flags; struct task_struct *p = current; struct thread_struct *ts = &p->thread; + struct hardwall_type *hwt; - /* Require a rectangle. */ - if (rect == NULL) + /* Require a hardwall. */ + if (info == NULL) return -ENODATA; - /* Not allowed to activate a rectangle that is being torn down. */ - if (rect->teardown_in_progress) + /* Not allowed to activate a hardwall that is being torn down. */ + if (info->teardown_in_progress) return -EINVAL; /* @@ -416,78 +594,87 @@ static int hardwall_activate(struct hardwall_info *rect) if (cpumask_weight(&p->cpus_allowed) != 1) return -EPERM; - /* Make sure we are bound to a cpu in this rectangle. */ + /* Make sure we are bound to a cpu assigned to this resource. */ cpu = smp_processor_id(); BUG_ON(cpumask_first(&p->cpus_allowed) != cpu); - x = cpu_x(cpu); - y = cpu_y(cpu); - if (!contains(rect, x, y)) + if (!cpumask_test_cpu(cpu, &info->cpumask)) return -EINVAL; /* If we are already bound to this hardwall, it's a no-op. */ - if (ts->hardwall) { - BUG_ON(ts->hardwall != rect); + hwt = info->type; + if (ts->hardwall[hwt->index].info) { + BUG_ON(ts->hardwall[hwt->index].info != info); return 0; } - /* Success! This process gets to use the user networks on this cpu. */ - ts->hardwall = rect; - spin_lock_irqsave(&hardwall_lock, flags); - list_add(&ts->hardwall_list, &rect->task_head); - spin_unlock_irqrestore(&hardwall_lock, flags); - grant_network_mpls(); - printk(KERN_DEBUG "Pid %d (%s) activated for hardwall: cpu %d\n", - p->pid, p->comm, cpu); + /* Success! This process gets to use the resource on this cpu. */ + ts->hardwall[hwt->index].info = info; + spin_lock_irqsave(&hwt->lock, flags); + list_add(&ts->hardwall[hwt->index].list, &info->task_head); + spin_unlock_irqrestore(&hwt->lock, flags); + grant_hardwall_mpls(hwt); + printk(KERN_DEBUG "Pid %d (%s) activated for %s hardwall: cpu %d\n", + p->pid, p->comm, hwt->name, cpu); return 0; } /* - * Deactivate a task's hardwall. Must hold hardwall_lock. - * This method may be called from free_task(), so we don't want to + * Deactivate a task's hardwall. Must hold lock for hardwall_type. + * This method may be called from exit_thread(), so we don't want to * rely on too many fields of struct task_struct still being valid. * We assume the cpus_allowed, pid, and comm fields are still valid. */ -static void _hardwall_deactivate(struct task_struct *task) +static void _hardwall_deactivate(struct hardwall_type *hwt, + struct task_struct *task) { struct thread_struct *ts = &task->thread; if (cpumask_weight(&task->cpus_allowed) != 1) { - pr_err("pid %d (%s) releasing networks with" + pr_err("pid %d (%s) releasing %s hardwall with" " an affinity mask containing %d cpus!\n", - task->pid, task->comm, + task->pid, task->comm, hwt->name, cpumask_weight(&task->cpus_allowed)); BUG(); } - BUG_ON(ts->hardwall == NULL); - ts->hardwall = NULL; - list_del(&ts->hardwall_list); + BUG_ON(ts->hardwall[hwt->index].info == NULL); + ts->hardwall[hwt->index].info = NULL; + list_del(&ts->hardwall[hwt->index].list); if (task == current) - restrict_network_mpls(); + restrict_hardwall_mpls(hwt); } /* Deactivate a task's hardwall. */ -int hardwall_deactivate(struct task_struct *task) +static int hardwall_deactivate(struct hardwall_type *hwt, + struct task_struct *task) { unsigned long flags; int activated; - spin_lock_irqsave(&hardwall_lock, flags); - activated = (task->thread.hardwall != NULL); + spin_lock_irqsave(&hwt->lock, flags); + activated = (task->thread.hardwall[hwt->index].info != NULL); if (activated) - _hardwall_deactivate(task); - spin_unlock_irqrestore(&hardwall_lock, flags); + _hardwall_deactivate(hwt, task); + spin_unlock_irqrestore(&hwt->lock, flags); if (!activated) return -EINVAL; - printk(KERN_DEBUG "Pid %d (%s) deactivated for hardwall: cpu %d\n", - task->pid, task->comm, smp_processor_id()); + printk(KERN_DEBUG "Pid %d (%s) deactivated for %s hardwall: cpu %d\n", + task->pid, task->comm, hwt->name, raw_smp_processor_id()); return 0; } -/* Stop a UDN switch before draining the network. */ -static void stop_udn_switch(void *ignored) +void hardwall_deactivate_all(struct task_struct *task) +{ + int i; + for (i = 0; i < HARDWALL_TYPES; ++i) + if (task->thread.hardwall[i].info) + hardwall_deactivate(&hardwall_types[i], task); +} + +/* Stop the switch before draining the network. */ +static void stop_xdn_switch(void *arg) { #if !CHIP_HAS_REV1_XDN() /* Freeze the switch and the demux. */ @@ -495,13 +682,71 @@ static void stop_udn_switch(void *ignored) SPR_UDN_SP_FREEZE__SP_FRZ_MASK | SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK | SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK); +#else + /* + * Drop all packets bound for the core or off the edge. + * We rely on the normal hardwall protection setup code + * to have set the low four bits to trigger firewall interrupts, + * and shift those bits up to trigger "drop on send" semantics, + * plus adding "drop on send to core" for all switches. + * In practice it seems the switches latch the DIRECTION_PROTECT + * SPR so they won't start dropping if they're already + * delivering the last message to the core, but it doesn't + * hurt to enable it here. + */ + struct hardwall_type *hwt = arg; + unsigned long protect = mfspr_XDN(hwt, DIRECTION_PROTECT); + mtspr_XDN(hwt, DIRECTION_PROTECT, (protect | C_PROTECT) << 5); #endif } +static void empty_xdn_demuxes(struct hardwall_type *hwt) +{ +#ifndef __tilepro__ + if (hwt->is_idn) { + while (__insn_mfspr(SPR_IDN_DATA_AVAIL) & (1 << 0)) + (void) __tile_idn0_receive(); + while (__insn_mfspr(SPR_IDN_DATA_AVAIL) & (1 << 1)) + (void) __tile_idn1_receive(); + return; + } +#endif + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0)) + (void) __tile_udn0_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1)) + (void) __tile_udn1_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2)) + (void) __tile_udn2_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3)) + (void) __tile_udn3_receive(); +} + /* Drain all the state from a stopped switch. */ -static void drain_udn_switch(void *ignored) +static void drain_xdn_switch(void *arg) { -#if !CHIP_HAS_REV1_XDN() + struct hardwall_info *info = arg; + struct hardwall_type *hwt = info->type; + +#if CHIP_HAS_REV1_XDN() + /* + * The switches have been configured to drop any messages + * destined for cores (or off the edge of the rectangle). + * But the current message may continue to be delivered, + * so we wait until all the cores have finished any pending + * messages before we stop draining. + */ + int pending = mfspr_XDN(hwt, PENDING); + while (pending--) { + empty_xdn_demuxes(hwt); + if (hwt->is_idn) + __tile_idn_send(0); + else + __tile_udn_send(0); + } + atomic_dec(&info->xdn_pending_count); + while (atomic_read(&info->xdn_pending_count)) + empty_xdn_demuxes(hwt); +#else int i; int from_tile_words, ca_count; @@ -521,15 +766,7 @@ static void drain_udn_switch(void *ignored) (void) __insn_mfspr(SPR_UDN_DEMUX_WRITE_FIFO); /* Empty out demuxes. */ - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0)) - (void) __tile_udn0_receive(); - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1)) - (void) __tile_udn1_receive(); - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2)) - (void) __tile_udn2_receive(); - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3)) - (void) __tile_udn3_receive(); - BUG_ON((__insn_mfspr(SPR_UDN_DATA_AVAIL) & 0xF) != 0); + empty_xdn_demuxes(hwt); /* Empty out catch all. */ ca_count = __insn_mfspr(SPR_UDN_DEMUX_CA_COUNT); @@ -551,21 +788,25 @@ static void drain_udn_switch(void *ignored) #endif } -/* Reset random UDN state registers at boot up and during hardwall teardown. */ -void reset_network_state(void) +/* Reset random XDN state registers at boot up and during hardwall teardown. */ +static void reset_xdn_network_state(struct hardwall_type *hwt) { -#if !CHIP_HAS_REV1_XDN() - /* Reset UDN coordinates to their standard value */ - unsigned int cpu = smp_processor_id(); - unsigned int x = cpu % smp_width; - unsigned int y = cpu / smp_width; -#endif - - if (udn_disabled) + if (hwt->disabled) return; + /* Clear out other random registers so we have a clean slate. */ + mtspr_XDN(hwt, DIRECTION_PROTECT, 0); + mtspr_XDN(hwt, AVAIL_EN, 0); + mtspr_XDN(hwt, DEADLOCK_TIMEOUT, 0); + #if !CHIP_HAS_REV1_XDN() - __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7)); + /* Reset UDN coordinates to their standard value */ + { + unsigned int cpu = smp_processor_id(); + unsigned int x = cpu_x(cpu); + unsigned int y = cpu_y(cpu); + __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7)); + } /* Set demux tags to predefined values and enable them. */ __insn_mtspr(SPR_UDN_TAG_VALID, 0xf); @@ -573,56 +814,50 @@ void reset_network_state(void) __insn_mtspr(SPR_UDN_TAG_1, (1 << 1)); __insn_mtspr(SPR_UDN_TAG_2, (1 << 2)); __insn_mtspr(SPR_UDN_TAG_3, (1 << 3)); -#endif - /* Clear out other random registers so we have a clean slate. */ - __insn_mtspr(SPR_UDN_AVAIL_EN, 0); - __insn_mtspr(SPR_UDN_DEADLOCK_TIMEOUT, 0); -#if !CHIP_HAS_REV1_XDN() + /* Set other rev0 random registers to a clean state. */ __insn_mtspr(SPR_UDN_REFILL_EN, 0); __insn_mtspr(SPR_UDN_DEMUX_QUEUE_SEL, 0); __insn_mtspr(SPR_UDN_SP_FIFO_SEL, 0); -#endif /* Start the switch and demux. */ -#if !CHIP_HAS_REV1_XDN() __insn_mtspr(SPR_UDN_SP_FREEZE, 0); #endif } -/* Restart a UDN switch after draining. */ -static void restart_udn_switch(void *ignored) +void reset_network_state(void) { - reset_network_state(); - - /* Disable firewall interrupts. */ - __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, 0); - disable_firewall_interrupts(); + reset_xdn_network_state(&hardwall_types[HARDWALL_UDN]); +#ifndef __tilepro__ + reset_xdn_network_state(&hardwall_types[HARDWALL_IDN]); +#endif } -/* Build a struct cpumask containing all valid tiles in bounding rectangle. */ -static void fill_mask(struct hardwall_info *r, struct cpumask *result) +/* Restart an XDN switch after draining. */ +static void restart_xdn_switch(void *arg) { - int x, y, cpu; + struct hardwall_type *hwt = arg; - cpumask_clear(result); +#if CHIP_HAS_REV1_XDN() + /* One last drain step to avoid races with injection and draining. */ + empty_xdn_demuxes(hwt); +#endif - cpu = r->ulhc_y * smp_width + r->ulhc_x; - for (y = 0; y < r->height; ++y, cpu += smp_width - r->width) { - for (x = 0; x < r->width; ++x, ++cpu) - cpu_online_set(cpu, result); - } + reset_xdn_network_state(hwt); + + /* Disable firewall interrupts. */ + disable_firewall_interrupts(hwt); } /* Last reference to a hardwall is gone, so clear the network. */ -static void hardwall_destroy(struct hardwall_info *rect) +static void hardwall_destroy(struct hardwall_info *info) { struct task_struct *task; + struct hardwall_type *hwt; unsigned long flags; - struct cpumask mask; - /* Make sure this file actually represents a rectangle. */ - if (rect == NULL) + /* Make sure this file actually represents a hardwall. */ + if (info == NULL) return; /* @@ -632,57 +867,110 @@ static void hardwall_destroy(struct hardwall_info *rect) * deactivate any remaining tasks before freeing the * hardwall_info object itself. */ - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry(task, &rect->task_head, thread.hardwall_list) - _hardwall_deactivate(task); - spin_unlock_irqrestore(&hardwall_lock, flags); + hwt = info->type; + info->teardown_in_progress = 1; + spin_lock_irqsave(&hwt->lock, flags); + list_for_each_entry(task, &info->task_head, + thread.hardwall[hwt->index].list) + _hardwall_deactivate(hwt, task); + spin_unlock_irqrestore(&hwt->lock, flags); + + if (hwt->is_xdn) { + /* Configure the switches for draining the user network. */ + printk(KERN_DEBUG + "Clearing %s hardwall rectangle %dx%d %d,%d\n", + hwt->name, info->width, info->height, + info->ulhc_x, info->ulhc_y); + on_each_cpu_mask(&info->cpumask, stop_xdn_switch, hwt, 1); + + /* Drain the network. */ +#if CHIP_HAS_REV1_XDN() + atomic_set(&info->xdn_pending_count, + cpumask_weight(&info->cpumask)); + on_each_cpu_mask(&info->cpumask, drain_xdn_switch, info, 0); +#else + on_each_cpu_mask(&info->cpumask, drain_xdn_switch, info, 1); +#endif - /* Drain the UDN. */ - printk(KERN_DEBUG "Clearing hardwall rectangle %dx%d %d,%d\n", - rect->width, rect->height, rect->ulhc_x, rect->ulhc_y); - fill_mask(rect, &mask); - on_each_cpu_mask(&mask, stop_udn_switch, NULL, 1); - on_each_cpu_mask(&mask, drain_udn_switch, NULL, 1); + /* Restart switch and disable firewall. */ + on_each_cpu_mask(&info->cpumask, restart_xdn_switch, hwt, 1); + } - /* Restart switch and disable firewall. */ - on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1); + /* Remove the /proc/tile/hardwall entry. */ + hardwall_remove_proc(info); - /* Now free the rectangle from the list. */ - spin_lock_irqsave(&hardwall_lock, flags); - BUG_ON(!list_empty(&rect->task_head)); - list_del(&rect->list); - spin_unlock_irqrestore(&hardwall_lock, flags); - kfree(rect); + /* Now free the hardwall from the list. */ + spin_lock_irqsave(&hwt->lock, flags); + BUG_ON(!list_empty(&info->task_head)); + list_del(&info->list); + spin_unlock_irqrestore(&hwt->lock, flags); + kfree(info); } -/* - * Dump hardwall state via /proc; initialized in arch/tile/sys/proc.c. - */ -int proc_tile_hardwall_show(struct seq_file *sf, void *v) +static int hardwall_proc_show(struct seq_file *sf, void *v) { - struct hardwall_info *r; + struct hardwall_info *info = sf->private; + char buf[256]; - if (udn_disabled) { - seq_printf(sf, "%dx%d 0,0 pids:\n", smp_width, smp_height); - return 0; + int rc = cpulist_scnprintf(buf, sizeof(buf), &info->cpumask); + buf[rc++] = '\n'; + seq_write(sf, buf, rc); + return 0; +} + +static int hardwall_proc_open(struct inode *inode, + struct file *file) +{ + return single_open(file, hardwall_proc_show, PDE_DATA(inode)); +} + +static const struct file_operations hardwall_proc_fops = { + .open = hardwall_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void hardwall_add_proc(struct hardwall_info *info) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", info->id); + proc_create_data(buf, 0444, info->type->proc_dir, + &hardwall_proc_fops, info); +} + +static void hardwall_remove_proc(struct hardwall_info *info) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", info->id); + remove_proc_entry(buf, info->type->proc_dir); +} + +int proc_pid_hardwall(struct task_struct *task, char *buffer) +{ + int i; + int n = 0; + for (i = 0; i < HARDWALL_TYPES; ++i) { + struct hardwall_info *info = task->thread.hardwall[i].info; + if (info) + n += sprintf(&buffer[n], "%s: %d\n", + info->type->name, info->id); } + return n; +} - spin_lock_irq(&hardwall_lock); - list_for_each_entry(r, &rectangles, list) { - struct task_struct *p; - seq_printf(sf, "%dx%d %d,%d pids:", - r->width, r->height, r->ulhc_x, r->ulhc_y); - list_for_each_entry(p, &r->task_head, thread.hardwall_list) { - unsigned int cpu = cpumask_first(&p->cpus_allowed); - unsigned int x = cpu % smp_width; - unsigned int y = cpu / smp_width; - seq_printf(sf, " %d@%d,%d", p->pid, x, y); - } - seq_printf(sf, "\n"); +void proc_tile_hardwall_init(struct proc_dir_entry *root) +{ + int i; + for (i = 0; i < HARDWALL_TYPES; ++i) { + struct hardwall_type *hwt = &hardwall_types[i]; + if (hwt->disabled) + continue; + if (hardwall_proc_dir == NULL) + hardwall_proc_dir = proc_mkdir("hardwall", root); + hwt->proc_dir = proc_mkdir(hwt->name, hardwall_proc_dir); } - spin_unlock_irq(&hardwall_lock); - return 0; } @@ -692,31 +980,45 @@ int proc_tile_hardwall_show(struct seq_file *sf, void *v) static long hardwall_ioctl(struct file *file, unsigned int a, unsigned long b) { - struct hardwall_info *rect = file->private_data; + struct hardwall_info *info = file->private_data; + int minor = iminor(file->f_mapping->host); + struct hardwall_type* hwt; if (_IOC_TYPE(a) != HARDWALL_IOCTL_BASE) return -EINVAL; + BUILD_BUG_ON(HARDWALL_TYPES != _HARDWALL_TYPES); + BUILD_BUG_ON(HARDWALL_TYPES != + sizeof(hardwall_types)/sizeof(hardwall_types[0])); + + if (minor < 0 || minor >= HARDWALL_TYPES) + return -EINVAL; + hwt = &hardwall_types[minor]; + WARN_ON(info && hwt != info->type); + switch (_IOC_NR(a)) { case _HARDWALL_CREATE: - if (udn_disabled) + if (hwt->disabled) return -ENOSYS; - if (rect != NULL) + if (info != NULL) return -EALREADY; - rect = hardwall_create(_IOC_SIZE(a), - (const unsigned char __user *)b); - if (IS_ERR(rect)) - return PTR_ERR(rect); - file->private_data = rect; + info = hardwall_create(hwt, _IOC_SIZE(a), + (const unsigned char __user *)b); + if (IS_ERR(info)) + return PTR_ERR(info); + file->private_data = info; return 0; case _HARDWALL_ACTIVATE: - return hardwall_activate(rect); + return hardwall_activate(info); case _HARDWALL_DEACTIVATE: - if (current->thread.hardwall != rect) + if (current->thread.hardwall[hwt->index].info != info) return -EINVAL; - return hardwall_deactivate(current); + return hardwall_deactivate(hwt, current); + + case _HARDWALL_GET_ID: + return info ? info->id : -EINVAL; default: return -EINVAL; @@ -735,26 +1037,28 @@ static long hardwall_compat_ioctl(struct file *file, /* The user process closed the file; revoke access to user networks. */ static int hardwall_flush(struct file *file, fl_owner_t owner) { - struct hardwall_info *rect = file->private_data; + struct hardwall_info *info = file->private_data; struct task_struct *task, *tmp; unsigned long flags; - if (rect) { + if (info) { /* * NOTE: if multiple threads are activated on this hardwall * file, the other threads will continue having access to the - * UDN until they are context-switched out and back in again. + * user network until they are context-switched out and back + * in again. * * NOTE: A NULL files pointer means the task is being torn * down, so in that case we also deactivate it. */ - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry_safe(task, tmp, &rect->task_head, - thread.hardwall_list) { + struct hardwall_type *hwt = info->type; + spin_lock_irqsave(&hwt->lock, flags); + list_for_each_entry_safe(task, tmp, &info->task_head, + thread.hardwall[hwt->index].list) { if (task->files == owner || task->files == NULL) - _hardwall_deactivate(task); + _hardwall_deactivate(hwt, task); } - spin_unlock_irqrestore(&hardwall_lock, flags); + spin_unlock_irqrestore(&hwt->lock, flags); } return 0; @@ -784,11 +1088,11 @@ static int __init dev_hardwall_init(void) int rc; dev_t dev; - rc = alloc_chrdev_region(&dev, 0, 1, "hardwall"); + rc = alloc_chrdev_region(&dev, 0, HARDWALL_TYPES, "hardwall"); if (rc < 0) return rc; cdev_init(&hardwall_dev, &dev_hardwall_fops); - rc = cdev_add(&hardwall_dev, dev, 1); + rc = cdev_add(&hardwall_dev, dev, HARDWALL_TYPES); if (rc < 0) return rc; diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index 1a39b7c1c87..8d5b40ff292 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -38,13 +38,13 @@ ENTRY(_start) movei r2, TILE_CHIP_REV } { - moveli r0, _HV_VERSION - jal hv_init + moveli r0, _HV_VERSION_OLD_HV_INIT + jal _hv_init } /* Get a reasonable default ASID in r0 */ { move r0, zero - jal hv_inquire_asid + jal _hv_inquire_asid } /* Install the default page table */ { @@ -64,21 +64,21 @@ ENTRY(_start) auli r0, r0, ha16(swapper_pg_dir - PAGE_OFFSET) } { - inv r6 + finv r6 move r1, zero /* high 32 bits of CPA is zero */ } { moveli lr, lo16(1f) - move r5, zero + moveli r5, CTX_PAGE_FLAG } { auli lr, lr, ha16(1f) - j hv_install_context + j _hv_install_context } 1: /* Get our processor number and save it away in SAVE_K_0. */ - jal hv_inquire_topology + jal _hv_inquire_topology mulll_uu r4, r1, r2 /* r1 == y, r2 == width */ add r4, r4, r0 /* r0 == x, so r4 == cpu == y*width + x */ @@ -86,7 +86,7 @@ ENTRY(_start) /* * Load up our per-cpu offset. When the first (master) tile * boots, this value is still zero, so we will load boot_pc - * with start_kernel, and boot_sp with init_stack + THREAD_SIZE. + * with start_kernel, and boot_sp at the top of init_stack. * The master tile initializes the per-cpu offset array, so that * when subsequent (secondary) tiles boot, they will instead load * from their per-cpu versions of boot_sp and boot_pc. @@ -126,7 +126,6 @@ ENTRY(_start) lw sp, r1 or r4, sp, r4 mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ - addi sp, sp, -STACK_TOP_DELTA { move lr, zero /* stop backtraces in the called function */ jr r0 @@ -141,11 +140,11 @@ ENTRY(empty_zero_page) .macro PTE va, cpa, bits1, no_org=0 .ifeq \no_org - .org swapper_pg_dir + HV_L1_INDEX(\va) * HV_PTE_SIZE + .org swapper_pg_dir + PGD_INDEX(\va) * HV_PTE_SIZE .endif .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \ (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) - .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32)) + .word (\bits1) | (HV_CPA_TO_PTFN(\cpa) << (HV_PTE_INDEX_PTFN - 32)) .endm __PAGE_ALIGNED_DATA @@ -163,10 +162,10 @@ ENTRY(swapper_pg_dir) .set addr, addr + PGDIR_SIZE .endr - /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ - PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ + /* The true text VAs are mapped as VA = PA + MEM_SV_START */ + PTE MEM_SV_START, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ (1 << (HV_PTE_INDEX_EXECUTABLE - 32)) - .org swapper_pg_dir + HV_L1_SIZE + .org swapper_pg_dir + PGDIR_SIZE END(swapper_pg_dir) /* diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S new file mode 100644 index 00000000000..bd0e12f283f --- /dev/null +++ b/arch/tile/kernel/head_64.S @@ -0,0 +1,279 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE startup code. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/thread_info.h> +#include <asm/processor.h> +#include <asm/asm-offsets.h> +#include <hv/hypervisor.h> +#include <arch/chip.h> +#include <arch/spr_def.h> + +/* Extract two 32-bit bit values that were read into one register. */ +#ifdef __BIG_ENDIAN__ +#define GET_FIRST_INT(rd, rs) shrsi rd, rs, 32 +#define GET_SECOND_INT(rd, rs) addxi rd, rs, 0 +#else +#define GET_FIRST_INT(rd, rs) addxi rd, rs, 0 +#define GET_SECOND_INT(rd, rs) shrsi rd, rs, 32 +#endif + +/* + * This module contains the entry code for kernel images. It performs the + * minimal setup needed to call the generic C routines. + */ + + __HEAD +ENTRY(_start) + /* Notify the hypervisor of what version of the API we want */ + { +#if KERNEL_PL == 1 && _HV_VERSION == 13 + /* Support older hypervisors by asking for API version 12. */ + movei r0, _HV_VERSION_OLD_HV_INIT +#else + movei r0, _HV_VERSION +#endif + movei r1, TILE_CHIP + } + { + movei r2, TILE_CHIP_REV + movei r3, KERNEL_PL + } + jal _hv_init + /* Get a reasonable default ASID in r0 */ + { + move r0, zero + jal _hv_inquire_asid + } + + /* + * Install the default page table. The relocation required to + * statically define the table is a bit too complex, so we have + * to plug in the pointer from the L0 to the L1 table by hand. + * We only do this on the first cpu to boot, though, since the + * other CPUs should see a properly-constructed page table. + */ + { + GET_FIRST_INT(r2, r0) /* ASID for hv_install_context */ + moveli r4, hw1_last(swapper_pgprot - PAGE_OFFSET) + } + { + shl16insli r4, r4, hw0(swapper_pgprot - PAGE_OFFSET) + } + { + ld r1, r4 /* access_pte for hv_install_context */ + } + { + moveli r0, hw1_last(.Lsv_data_pmd - PAGE_OFFSET) + moveli r6, hw1_last(temp_data_pmd - PAGE_OFFSET) + } + { + /* After initializing swapper_pgprot, HV_PTE_GLOBAL is set. */ + bfextu r7, r1, HV_PTE_INDEX_GLOBAL, HV_PTE_INDEX_GLOBAL + finv r4 + } + bnez r7, .Lno_write + { + shl16insli r0, r0, hw0(.Lsv_data_pmd - PAGE_OFFSET) + shl16insli r6, r6, hw0(temp_data_pmd - PAGE_OFFSET) + } + { + /* Cut off the low bits of the PT address. */ + shrui r6, r6, HV_LOG2_PAGE_TABLE_ALIGN + /* Start with our access pte. */ + move r5, r1 + } + { + /* Stuff the address into the page table pointer slot of the PTE. */ + bfins r5, r6, HV_PTE_INDEX_PTFN, \ + HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1 + } + { + /* Store the L0 data PTE. */ + st r0, r5 + addli r6, r6, (temp_code_pmd - temp_data_pmd) >> \ + HV_LOG2_PAGE_TABLE_ALIGN + } + { + addli r0, r0, .Lsv_code_pmd - .Lsv_data_pmd + bfins r5, r6, HV_PTE_INDEX_PTFN, \ + HV_PTE_INDEX_PTFN + HV_PTE_PTFN_BITS - 1 + } + /* Store the L0 code PTE. */ + st r0, r5 + +.Lno_write: + moveli lr, hw2_last(1f) + { + shl16insli lr, lr, hw1(1f) + moveli r0, hw1_last(swapper_pg_dir - PAGE_OFFSET) + } + { + shl16insli lr, lr, hw0(1f) + shl16insli r0, r0, hw0(swapper_pg_dir - PAGE_OFFSET) + } + { + moveli r3, CTX_PAGE_FLAG + j _hv_install_context + } +1: + + /* Install the interrupt base. */ + moveli r0, hw2_last(intrpt_start) + shl16insli r0, r0, hw1(intrpt_start) + shl16insli r0, r0, hw0(intrpt_start) + mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0 + + /* Get our processor number and save it away in SAVE_K_0. */ + jal _hv_inquire_topology + { + GET_FIRST_INT(r5, r1) /* r5 = width */ + GET_SECOND_INT(r4, r0) /* r4 = y */ + } + { + GET_FIRST_INT(r6, r0) /* r6 = x */ + mul_lu_lu r4, r4, r5 + } + { + add r4, r4, r6 /* r4 == cpu == y*width + x */ + } + +#ifdef CONFIG_SMP + /* + * Load up our per-cpu offset. When the first (master) tile + * boots, this value is still zero, so we will load boot_pc + * with start_kernel, and boot_sp with at the top of init_stack. + * The master tile initializes the per-cpu offset array, so that + * when subsequent (secondary) tiles boot, they will instead load + * from their per-cpu versions of boot_sp and boot_pc. + */ + moveli r5, hw2_last(__per_cpu_offset) + shl16insli r5, r5, hw1(__per_cpu_offset) + shl16insli r5, r5, hw0(__per_cpu_offset) + shl3add r5, r4, r5 + ld r5, r5 + bnez r5, 1f + + /* + * Save the width and height to the smp_topology variable + * for later use. + */ + moveli r0, hw2_last(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + shl16insli r0, r0, hw1(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + shl16insli r0, r0, hw0(smp_topology + HV_TOPOLOGY_WIDTH_OFFSET) + st r0, r1 +1: +#else + move r5, zero +#endif + + /* Load and go with the correct pc and sp. */ + { + moveli r1, hw2_last(boot_sp) + moveli r0, hw2_last(boot_pc) + } + { + shl16insli r1, r1, hw1(boot_sp) + shl16insli r0, r0, hw1(boot_pc) + } + { + shl16insli r1, r1, hw0(boot_sp) + shl16insli r0, r0, hw0(boot_pc) + } + { + add r1, r1, r5 + add r0, r0, r5 + } + ld r0, r0 + ld sp, r1 + shli r4, r4, CPU_SHIFT + bfins r4, sp, 0, CPU_SHIFT-1 + mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ + { + move lr, zero /* stop backtraces in the called function */ + jr r0 + } + ENDPROC(_start) + +__PAGE_ALIGNED_BSS + .align PAGE_SIZE +ENTRY(empty_zero_page) + .fill PAGE_SIZE,1,0 + END(empty_zero_page) + + .macro PTE cpa, bits1 + .quad HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED |\ + HV_PTE_GLOBAL | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |\ + (\bits1) | (HV_CPA_TO_PTFN(\cpa) << HV_PTE_INDEX_PTFN) + .endm + +__PAGE_ALIGNED_DATA + .align PAGE_SIZE +ENTRY(swapper_pg_dir) + .org swapper_pg_dir + PGD_INDEX(PAGE_OFFSET) * HV_PTE_SIZE +.Lsv_data_pmd: + .quad 0 /* PTE temp_data_pmd - PAGE_OFFSET, 0 */ + .org swapper_pg_dir + PGD_INDEX(MEM_SV_START) * HV_PTE_SIZE +.Lsv_code_pmd: + .quad 0 /* PTE temp_code_pmd - PAGE_OFFSET, 0 */ + .org swapper_pg_dir + SIZEOF_PGD + END(swapper_pg_dir) + + .align HV_PAGE_TABLE_ALIGN +ENTRY(temp_data_pmd) + /* + * We fill the PAGE_OFFSET pmd with huge pages with + * VA = PA + PAGE_OFFSET. We remap things with more precise access + * permissions later. + */ + .set addr, 0 + .rept PTRS_PER_PMD + PTE addr, HV_PTE_READABLE | HV_PTE_WRITABLE + .set addr, addr + HPAGE_SIZE + .endr + .org temp_data_pmd + SIZEOF_PMD + END(temp_data_pmd) + + .align HV_PAGE_TABLE_ALIGN +ENTRY(temp_code_pmd) + /* + * We fill the MEM_SV_START pmd with huge pages with + * VA = PA + PAGE_OFFSET. We remap things with more precise access + * permissions later. + */ + .set addr, 0 + .rept PTRS_PER_PMD + PTE addr, HV_PTE_READABLE | HV_PTE_EXECUTABLE + .set addr, addr + HPAGE_SIZE + .endr + .org temp_code_pmd + SIZEOF_PMD + END(temp_code_pmd) + + /* + * Isolate swapper_pgprot to its own cache line, since each cpu + * starting up will read it using VA-is-PA and local homing. + * This would otherwise likely conflict with other data on the cache + * line, once we have set its permanent home in the page tables. + */ + __INITDATA + .align CHIP_L2_LINE_SIZE() +ENTRY(swapper_pgprot) + .quad HV_PTE_PRESENT | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) + .align CHIP_L2_LINE_SIZE() + END(swapper_pgprot) diff --git a/arch/tile/kernel/hvglue.S b/arch/tile/kernel/hvglue.S new file mode 100644 index 00000000000..2ab45662239 --- /dev/null +++ b/arch/tile/kernel/hvglue.S @@ -0,0 +1,74 @@ +/* Hypervisor call vector addresses; see <hv/hypervisor.h> */ +.macro gensym sym, val, size +.org \val +.global _\sym +.type _\sym,function +_\sym: +.size _\sym,\size +#ifndef CONFIG_TILE_HVGLUE_TRACE +.globl \sym +.set \sym,_\sym +#endif +.endm + +.section .hvglue,"x",@nobits +.align 8 +gensym hv_init, 0x20, 32 +gensym hv_install_context, 0x40, 32 +gensym hv_sysconf, 0x60, 32 +gensym hv_get_rtc, 0x80, 32 +gensym hv_set_rtc, 0xa0, 32 +gensym hv_flush_asid, 0xc0, 32 +gensym hv_flush_page, 0xe0, 32 +gensym hv_flush_pages, 0x100, 32 +gensym hv_restart, 0x120, 32 +gensym hv_halt, 0x140, 32 +gensym hv_power_off, 0x160, 32 +gensym hv_inquire_physical, 0x180, 32 +gensym hv_inquire_memory_controller, 0x1a0, 32 +gensym hv_inquire_virtual, 0x1c0, 32 +gensym hv_inquire_asid, 0x1e0, 32 +gensym hv_nanosleep, 0x200, 32 +gensym hv_console_read_if_ready, 0x220, 32 +gensym hv_console_write, 0x240, 32 +gensym hv_downcall_dispatch, 0x260, 32 +gensym hv_inquire_topology, 0x280, 32 +gensym hv_fs_findfile, 0x2a0, 32 +gensym hv_fs_fstat, 0x2c0, 32 +gensym hv_fs_pread, 0x2e0, 32 +gensym hv_physaddr_read64, 0x300, 32 +gensym hv_physaddr_write64, 0x320, 32 +gensym hv_get_command_line, 0x340, 32 +gensym hv_set_caching, 0x360, 32 +gensym hv_bzero_page, 0x380, 32 +gensym hv_register_message_state, 0x3a0, 32 +gensym hv_send_message, 0x3c0, 32 +gensym hv_receive_message, 0x3e0, 32 +gensym hv_inquire_context, 0x400, 32 +gensym hv_start_all_tiles, 0x420, 32 +gensym hv_dev_open, 0x440, 32 +gensym hv_dev_close, 0x460, 32 +gensym hv_dev_pread, 0x480, 32 +gensym hv_dev_pwrite, 0x4a0, 32 +gensym hv_dev_poll, 0x4c0, 32 +gensym hv_dev_poll_cancel, 0x4e0, 32 +gensym hv_dev_preada, 0x500, 32 +gensym hv_dev_pwritea, 0x520, 32 +gensym hv_flush_remote, 0x540, 32 +gensym hv_console_putc, 0x560, 32 +gensym hv_inquire_tiles, 0x580, 32 +gensym hv_confstr, 0x5a0, 32 +gensym hv_reexec, 0x5c0, 32 +gensym hv_set_command_line, 0x5e0, 32 +gensym hv_clear_intr, 0x600, 32 +gensym hv_enable_intr, 0x620, 32 +gensym hv_disable_intr, 0x640, 32 +gensym hv_raise_intr, 0x660, 32 +gensym hv_trigger_ipi, 0x680, 32 +gensym hv_store_mapping, 0x6a0, 32 +gensym hv_inquire_realpa, 0x6c0, 32 +gensym hv_flush_all, 0x6e0, 32 +gensym hv_get_ipi_pte, 0x700, 32 +gensym hv_set_pte_super_shift, 0x720, 32 +gensym hv_console_set_ipi, 0x7e0, 32 +gensym hv_glue_internals, 0x800, 30720 diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds deleted file mode 100644 index 2b7cd0a659a..00000000000 --- a/arch/tile/kernel/hvglue.lds +++ /dev/null @@ -1,58 +0,0 @@ -/* Hypervisor call vector addresses; see <hv/hypervisor.h> */ -hv_init = TEXT_OFFSET + 0x10020; -hv_install_context = TEXT_OFFSET + 0x10040; -hv_sysconf = TEXT_OFFSET + 0x10060; -hv_get_rtc = TEXT_OFFSET + 0x10080; -hv_set_rtc = TEXT_OFFSET + 0x100a0; -hv_flush_asid = TEXT_OFFSET + 0x100c0; -hv_flush_page = TEXT_OFFSET + 0x100e0; -hv_flush_pages = TEXT_OFFSET + 0x10100; -hv_restart = TEXT_OFFSET + 0x10120; -hv_halt = TEXT_OFFSET + 0x10140; -hv_power_off = TEXT_OFFSET + 0x10160; -hv_inquire_physical = TEXT_OFFSET + 0x10180; -hv_inquire_memory_controller = TEXT_OFFSET + 0x101a0; -hv_inquire_virtual = TEXT_OFFSET + 0x101c0; -hv_inquire_asid = TEXT_OFFSET + 0x101e0; -hv_nanosleep = TEXT_OFFSET + 0x10200; -hv_console_read_if_ready = TEXT_OFFSET + 0x10220; -hv_console_write = TEXT_OFFSET + 0x10240; -hv_downcall_dispatch = TEXT_OFFSET + 0x10260; -hv_inquire_topology = TEXT_OFFSET + 0x10280; -hv_fs_findfile = TEXT_OFFSET + 0x102a0; -hv_fs_fstat = TEXT_OFFSET + 0x102c0; -hv_fs_pread = TEXT_OFFSET + 0x102e0; -hv_physaddr_read64 = TEXT_OFFSET + 0x10300; -hv_physaddr_write64 = TEXT_OFFSET + 0x10320; -hv_get_command_line = TEXT_OFFSET + 0x10340; -hv_set_caching = TEXT_OFFSET + 0x10360; -hv_bzero_page = TEXT_OFFSET + 0x10380; -hv_register_message_state = TEXT_OFFSET + 0x103a0; -hv_send_message = TEXT_OFFSET + 0x103c0; -hv_receive_message = TEXT_OFFSET + 0x103e0; -hv_inquire_context = TEXT_OFFSET + 0x10400; -hv_start_all_tiles = TEXT_OFFSET + 0x10420; -hv_dev_open = TEXT_OFFSET + 0x10440; -hv_dev_close = TEXT_OFFSET + 0x10460; -hv_dev_pread = TEXT_OFFSET + 0x10480; -hv_dev_pwrite = TEXT_OFFSET + 0x104a0; -hv_dev_poll = TEXT_OFFSET + 0x104c0; -hv_dev_poll_cancel = TEXT_OFFSET + 0x104e0; -hv_dev_preada = TEXT_OFFSET + 0x10500; -hv_dev_pwritea = TEXT_OFFSET + 0x10520; -hv_flush_remote = TEXT_OFFSET + 0x10540; -hv_console_putc = TEXT_OFFSET + 0x10560; -hv_inquire_tiles = TEXT_OFFSET + 0x10580; -hv_confstr = TEXT_OFFSET + 0x105a0; -hv_reexec = TEXT_OFFSET + 0x105c0; -hv_set_command_line = TEXT_OFFSET + 0x105e0; -hv_clear_intr = TEXT_OFFSET + 0x10600; -hv_enable_intr = TEXT_OFFSET + 0x10620; -hv_disable_intr = TEXT_OFFSET + 0x10640; -hv_raise_intr = TEXT_OFFSET + 0x10660; -hv_trigger_ipi = TEXT_OFFSET + 0x10680; -hv_store_mapping = TEXT_OFFSET + 0x106a0; -hv_inquire_realpa = TEXT_OFFSET + 0x106c0; -hv_flush_all = TEXT_OFFSET + 0x106e0; -hv_get_ipi_pte = TEXT_OFFSET + 0x10700; -hv_glue_internals = TEXT_OFFSET + 0x10720; diff --git a/arch/tile/kernel/hvglue_trace.c b/arch/tile/kernel/hvglue_trace.c new file mode 100644 index 00000000000..85c74ad2931 --- /dev/null +++ b/arch/tile/kernel/hvglue_trace.c @@ -0,0 +1,266 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* + * Pull in the hypervisor header so we declare all the ABI functions + * with the underscore versions, then undef the names so that we can + * provide our own wrapper versions. + */ +#define hv_init _hv_init +#define hv_install_context _hv_install_context +#define hv_sysconf _hv_sysconf +#define hv_get_rtc _hv_get_rtc +#define hv_set_rtc _hv_set_rtc +#define hv_flush_asid _hv_flush_asid +#define hv_flush_page _hv_flush_page +#define hv_flush_pages _hv_flush_pages +#define hv_restart _hv_restart +#define hv_halt _hv_halt +#define hv_power_off _hv_power_off +#define hv_inquire_physical _hv_inquire_physical +#define hv_inquire_memory_controller _hv_inquire_memory_controller +#define hv_inquire_virtual _hv_inquire_virtual +#define hv_inquire_asid _hv_inquire_asid +#define hv_nanosleep _hv_nanosleep +#define hv_console_read_if_ready _hv_console_read_if_ready +#define hv_console_write _hv_console_write +#define hv_downcall_dispatch _hv_downcall_dispatch +#define hv_inquire_topology _hv_inquire_topology +#define hv_fs_findfile _hv_fs_findfile +#define hv_fs_fstat _hv_fs_fstat +#define hv_fs_pread _hv_fs_pread +#define hv_physaddr_read64 _hv_physaddr_read64 +#define hv_physaddr_write64 _hv_physaddr_write64 +#define hv_get_command_line _hv_get_command_line +#define hv_set_caching _hv_set_caching +#define hv_bzero_page _hv_bzero_page +#define hv_register_message_state _hv_register_message_state +#define hv_send_message _hv_send_message +#define hv_receive_message _hv_receive_message +#define hv_inquire_context _hv_inquire_context +#define hv_start_all_tiles _hv_start_all_tiles +#define hv_dev_open _hv_dev_open +#define hv_dev_close _hv_dev_close +#define hv_dev_pread _hv_dev_pread +#define hv_dev_pwrite _hv_dev_pwrite +#define hv_dev_poll _hv_dev_poll +#define hv_dev_poll_cancel _hv_dev_poll_cancel +#define hv_dev_preada _hv_dev_preada +#define hv_dev_pwritea _hv_dev_pwritea +#define hv_flush_remote _hv_flush_remote +#define hv_console_putc _hv_console_putc +#define hv_inquire_tiles _hv_inquire_tiles +#define hv_confstr _hv_confstr +#define hv_reexec _hv_reexec +#define hv_set_command_line _hv_set_command_line +#define hv_clear_intr _hv_clear_intr +#define hv_enable_intr _hv_enable_intr +#define hv_disable_intr _hv_disable_intr +#define hv_raise_intr _hv_raise_intr +#define hv_trigger_ipi _hv_trigger_ipi +#define hv_store_mapping _hv_store_mapping +#define hv_inquire_realpa _hv_inquire_realpa +#define hv_flush_all _hv_flush_all +#define hv_get_ipi_pte _hv_get_ipi_pte +#define hv_set_pte_super_shift _hv_set_pte_super_shift +#define hv_console_set_ipi _hv_console_set_ipi +#include <hv/hypervisor.h> +#undef hv_init +#undef hv_install_context +#undef hv_sysconf +#undef hv_get_rtc +#undef hv_set_rtc +#undef hv_flush_asid +#undef hv_flush_page +#undef hv_flush_pages +#undef hv_restart +#undef hv_halt +#undef hv_power_off +#undef hv_inquire_physical +#undef hv_inquire_memory_controller +#undef hv_inquire_virtual +#undef hv_inquire_asid +#undef hv_nanosleep +#undef hv_console_read_if_ready +#undef hv_console_write +#undef hv_downcall_dispatch +#undef hv_inquire_topology +#undef hv_fs_findfile +#undef hv_fs_fstat +#undef hv_fs_pread +#undef hv_physaddr_read64 +#undef hv_physaddr_write64 +#undef hv_get_command_line +#undef hv_set_caching +#undef hv_bzero_page +#undef hv_register_message_state +#undef hv_send_message +#undef hv_receive_message +#undef hv_inquire_context +#undef hv_start_all_tiles +#undef hv_dev_open +#undef hv_dev_close +#undef hv_dev_pread +#undef hv_dev_pwrite +#undef hv_dev_poll +#undef hv_dev_poll_cancel +#undef hv_dev_preada +#undef hv_dev_pwritea +#undef hv_flush_remote +#undef hv_console_putc +#undef hv_inquire_tiles +#undef hv_confstr +#undef hv_reexec +#undef hv_set_command_line +#undef hv_clear_intr +#undef hv_enable_intr +#undef hv_disable_intr +#undef hv_raise_intr +#undef hv_trigger_ipi +#undef hv_store_mapping +#undef hv_inquire_realpa +#undef hv_flush_all +#undef hv_get_ipi_pte +#undef hv_set_pte_super_shift +#undef hv_console_set_ipi + +/* + * Provide macros based on <linux/syscalls.h> to provide a wrapper + * function that invokes the same function with an underscore prefix. + * We can't use the existing __SC_xxx macros because we need to + * support up to nine arguments rather than up to six, and also this + * way the file stands alone from possible changes in the + * implementation of <linux/syscalls.h>. + */ +#define HV_WRAP0(type, name) \ + type name(void); \ + type name(void) \ + { \ + return _##name(); \ + } +#define __HV_DECL1(t1, a1) t1 a1 +#define __HV_DECL2(t2, a2, ...) t2 a2, __HV_DECL1(__VA_ARGS__) +#define __HV_DECL3(t3, a3, ...) t3 a3, __HV_DECL2(__VA_ARGS__) +#define __HV_DECL4(t4, a4, ...) t4 a4, __HV_DECL3(__VA_ARGS__) +#define __HV_DECL5(t5, a5, ...) t5 a5, __HV_DECL4(__VA_ARGS__) +#define __HV_DECL6(t6, a6, ...) t6 a6, __HV_DECL5(__VA_ARGS__) +#define __HV_DECL7(t7, a7, ...) t7 a7, __HV_DECL6(__VA_ARGS__) +#define __HV_DECL8(t8, a8, ...) t8 a8, __HV_DECL7(__VA_ARGS__) +#define __HV_DECL9(t9, a9, ...) t9 a9, __HV_DECL8(__VA_ARGS__) +#define __HV_PASS1(t1, a1) a1 +#define __HV_PASS2(t2, a2, ...) a2, __HV_PASS1(__VA_ARGS__) +#define __HV_PASS3(t3, a3, ...) a3, __HV_PASS2(__VA_ARGS__) +#define __HV_PASS4(t4, a4, ...) a4, __HV_PASS3(__VA_ARGS__) +#define __HV_PASS5(t5, a5, ...) a5, __HV_PASS4(__VA_ARGS__) +#define __HV_PASS6(t6, a6, ...) a6, __HV_PASS5(__VA_ARGS__) +#define __HV_PASS7(t7, a7, ...) a7, __HV_PASS6(__VA_ARGS__) +#define __HV_PASS8(t8, a8, ...) a8, __HV_PASS7(__VA_ARGS__) +#define __HV_PASS9(t9, a9, ...) a9, __HV_PASS8(__VA_ARGS__) +#define HV_WRAPx(x, type, name, ...) \ + type name(__HV_DECL##x(__VA_ARGS__)); \ + type name(__HV_DECL##x(__VA_ARGS__)) \ + { \ + return _##name(__HV_PASS##x(__VA_ARGS__)); \ + } +#define HV_WRAP1(type, name, ...) HV_WRAPx(1, type, name, __VA_ARGS__) +#define HV_WRAP2(type, name, ...) HV_WRAPx(2, type, name, __VA_ARGS__) +#define HV_WRAP3(type, name, ...) HV_WRAPx(3, type, name, __VA_ARGS__) +#define HV_WRAP4(type, name, ...) HV_WRAPx(4, type, name, __VA_ARGS__) +#define HV_WRAP5(type, name, ...) HV_WRAPx(5, type, name, __VA_ARGS__) +#define HV_WRAP6(type, name, ...) HV_WRAPx(6, type, name, __VA_ARGS__) +#define HV_WRAP7(type, name, ...) HV_WRAPx(7, type, name, __VA_ARGS__) +#define HV_WRAP8(type, name, ...) HV_WRAPx(8, type, name, __VA_ARGS__) +#define HV_WRAP9(type, name, ...) HV_WRAPx(9, type, name, __VA_ARGS__) + +/* List all the hypervisor API functions. */ +HV_WRAP4(void, hv_init, HV_VersionNumber, interface_version_number, + int, chip_num, int, chip_rev_num, int, client_pl) +HV_WRAP1(long, hv_sysconf, HV_SysconfQuery, query) +HV_WRAP3(int, hv_confstr, HV_ConfstrQuery, query, HV_VirtAddr, buf, int, len) +#if CHIP_HAS_IPI() +HV_WRAP3(int, hv_get_ipi_pte, HV_Coord, tile, int, pl, HV_PTE*, pte) +HV_WRAP3(int, hv_console_set_ipi, int, ipi, int, event, HV_Coord, coord); +#else +HV_WRAP1(void, hv_enable_intr, HV_IntrMask, enab_mask) +HV_WRAP1(void, hv_disable_intr, HV_IntrMask, disab_mask) +HV_WRAP1(void, hv_clear_intr, HV_IntrMask, clear_mask) +HV_WRAP1(void, hv_raise_intr, HV_IntrMask, raise_mask) +HV_WRAP2(HV_Errno, hv_trigger_ipi, HV_Coord, tile, int, interrupt) +#endif /* !CHIP_HAS_IPI() */ +HV_WRAP3(int, hv_store_mapping, HV_VirtAddr, va, unsigned int, len, + HV_PhysAddr, pa) +HV_WRAP2(HV_PhysAddr, hv_inquire_realpa, HV_PhysAddr, cpa, unsigned int, len) +HV_WRAP0(HV_RTCTime, hv_get_rtc) +HV_WRAP1(void, hv_set_rtc, HV_RTCTime, time) +HV_WRAP4(int, hv_install_context, HV_PhysAddr, page_table, HV_PTE, access, + HV_ASID, asid, __hv32, flags) +HV_WRAP2(int, hv_set_pte_super_shift, int, level, int, log2_count) +HV_WRAP0(HV_Context, hv_inquire_context) +HV_WRAP1(int, hv_flush_asid, HV_ASID, asid) +HV_WRAP2(int, hv_flush_page, HV_VirtAddr, address, HV_PageSize, page_size) +HV_WRAP3(int, hv_flush_pages, HV_VirtAddr, start, HV_PageSize, page_size, + unsigned long, size) +HV_WRAP1(int, hv_flush_all, int, preserve_global) +HV_WRAP2(void, hv_restart, HV_VirtAddr, cmd, HV_VirtAddr, args) +HV_WRAP0(void, hv_halt) +HV_WRAP0(void, hv_power_off) +HV_WRAP1(int, hv_reexec, HV_PhysAddr, entry) +HV_WRAP0(HV_Topology, hv_inquire_topology) +HV_WRAP3(HV_Errno, hv_inquire_tiles, HV_InqTileSet, set, HV_VirtAddr, cpumask, + int, length) +HV_WRAP1(HV_PhysAddrRange, hv_inquire_physical, int, idx) +HV_WRAP2(HV_MemoryControllerInfo, hv_inquire_memory_controller, HV_Coord, coord, + int, controller) +HV_WRAP1(HV_VirtAddrRange, hv_inquire_virtual, int, idx) +HV_WRAP1(HV_ASIDRange, hv_inquire_asid, int, idx) +HV_WRAP1(void, hv_nanosleep, int, nanosecs) +HV_WRAP0(int, hv_console_read_if_ready) +HV_WRAP1(void, hv_console_putc, int, byte) +HV_WRAP2(int, hv_console_write, HV_VirtAddr, bytes, int, len) +HV_WRAP0(void, hv_downcall_dispatch) +HV_WRAP1(int, hv_fs_findfile, HV_VirtAddr, filename) +HV_WRAP1(HV_FS_StatInfo, hv_fs_fstat, int, inode) +HV_WRAP4(int, hv_fs_pread, int, inode, HV_VirtAddr, buf, + int, length, int, offset) +HV_WRAP2(unsigned long long, hv_physaddr_read64, HV_PhysAddr, addr, + HV_PTE, access) +HV_WRAP3(void, hv_physaddr_write64, HV_PhysAddr, addr, HV_PTE, access, + unsigned long long, val) +HV_WRAP2(int, hv_get_command_line, HV_VirtAddr, buf, int, length) +HV_WRAP2(HV_Errno, hv_set_command_line, HV_VirtAddr, buf, int, length) +HV_WRAP1(void, hv_set_caching, unsigned long, bitmask) +HV_WRAP2(void, hv_bzero_page, HV_VirtAddr, va, unsigned int, size) +HV_WRAP1(HV_Errno, hv_register_message_state, HV_MsgState*, msgstate) +HV_WRAP4(int, hv_send_message, HV_Recipient *, recips, int, nrecip, + HV_VirtAddr, buf, int, buflen) +HV_WRAP3(HV_RcvMsgInfo, hv_receive_message, HV_MsgState, msgstate, + HV_VirtAddr, buf, int, buflen) +HV_WRAP0(void, hv_start_all_tiles) +HV_WRAP2(int, hv_dev_open, HV_VirtAddr, name, __hv32, flags) +HV_WRAP1(int, hv_dev_close, int, devhdl) +HV_WRAP5(int, hv_dev_pread, int, devhdl, __hv32, flags, HV_VirtAddr, va, + __hv32, len, __hv64, offset) +HV_WRAP5(int, hv_dev_pwrite, int, devhdl, __hv32, flags, HV_VirtAddr, va, + __hv32, len, __hv64, offset) +HV_WRAP3(int, hv_dev_poll, int, devhdl, __hv32, events, HV_IntArg, intarg) +HV_WRAP1(int, hv_dev_poll_cancel, int, devhdl) +HV_WRAP6(int, hv_dev_preada, int, devhdl, __hv32, flags, __hv32, sgl_len, + HV_SGL *, sglp, __hv64, offset, HV_IntArg, intarg) +HV_WRAP6(int, hv_dev_pwritea, int, devhdl, __hv32, flags, __hv32, sgl_len, + HV_SGL *, sglp, __hv64, offset, HV_IntArg, intarg) +HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa, + unsigned long, cache_control, unsigned long*, cache_cpumask, + HV_VirtAddr, tlb_va, unsigned long, tlb_length, + unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask, + HV_Remote_ASID*, asids, int, asidcount) diff --git a/arch/tile/kernel/init_task.c b/arch/tile/kernel/init_task.c deleted file mode 100644 index 928b3187066..00000000000 --- a/arch/tile/kernel/init_task.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/init_task.h> -#include <linux/mqueue.h> -#include <linux/module.h> -#include <linux/start_kernel.h> -#include <linux/uaccess.h> - -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); - -/* - * Initial thread structure. - * - * We need to make sure that this is THREAD_SIZE aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union __init_task_data = { - INIT_THREAD_INFO(init_task) -}; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); -EXPORT_SYMBOL(init_task); - -/* - * per-CPU stack and boot info. - */ -DEFINE_PER_CPU(unsigned long, boot_sp) = - (unsigned long)init_stack + THREAD_SIZE; - -#ifdef CONFIG_SMP -DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel; -#else -/* - * The variable must be __initdata since it references __init code. - * With CONFIG_SMP it is per-cpu data, which is exempt from validation. - */ -unsigned long __initdata boot_pc = (unsigned long)start_kernel; -#endif diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index fffcfa6b3a6..cdbda45a4e4 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -21,27 +21,17 @@ #include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/irqflags.h> -#include <asm/atomic.h> +#include <asm/atomic_32.h> #include <asm/asm-offsets.h> #include <hv/hypervisor.h> #include <arch/abi.h> #include <arch/interrupts.h> #include <arch/spr_def.h> -#ifdef CONFIG_PREEMPT -# error "No support for kernel preemption currently" -#endif - #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) -#if !CHIP_HAS_WH64() - /* By making this an empty macro, we can use wh64 in the code. */ - .macro wh64 reg - .endm -#endif - .macro push_reg reg, ptr=sp, delta=-4 { sw \ptr, \reg @@ -189,7 +179,7 @@ intvec_\vecname: * point sp at the top aligned address on the actual stack page. */ mfspr r0, SPR_SYSTEM_SAVE_K_0 - mm r0, r0, zero, LOG2_THREAD_SIZE, 31 + mm r0, r0, zero, LOG2_NR_CPU_IDS, 31 0: /* @@ -207,6 +197,9 @@ intvec_\vecname: * cache line 1: r14...r29 * cache line 0: 2 x frame, r0..r13 */ +#if STACK_TOP_DELTA != 64 +#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs() +#endif andi r0, r0, -64 /* @@ -320,24 +313,20 @@ intvec_\vecname: movei r3, 0 } .else - .ifc \c_routine, op_handle_perf_interrupt + .ifc \c_routine, handle_perf_interrupt { mfspr r2, PERF_COUNT_STS movei r3, -1 /* not used, but set for consistency */ } .else -#if CHIP_HAS_AUX_PERF_COUNTERS() - .ifc \c_routine, op_handle_aux_perf_interrupt + .ifc \c_routine, handle_perf_interrupt { mfspr r2, AUX_PERF_COUNT_STS movei r3, -1 /* not used, but set for consistency */ } .else -#endif movei r3, 0 -#if CHIP_HAS_AUX_PERF_COUNTERS() .endif -#endif .endif .endif .endif @@ -354,7 +343,7 @@ intvec_\vecname: #ifdef __COLLECT_LINKER_FEEDBACK__ .pushsection .text.intvec_feedback,"ax" .org (\vecnum << 5) - FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8) + FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8) jrp lr .popsection #endif @@ -468,7 +457,7 @@ intvec_\vecname: } { auli r21, r21, ha16(__per_cpu_offset) - mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1 + mm r20, r20, zero, 0, LOG2_NR_CPU_IDS-1 } s2a r20, r20, r21 lw tp, r20 @@ -562,7 +551,6 @@ intvec_\vecname: .endif mtspr INTERRUPT_CRITICAL_SECTION, zero -#if CHIP_HAS_WH64() /* * Prepare the first 256 stack bytes to be rapidly accessible * without having to fetch the background data. We don't really @@ -583,7 +571,6 @@ intvec_\vecname: addi r52, r52, -64 } wh64 r52 -#endif #ifdef CONFIG_TRACE_IRQFLAGS .ifnc \function,handle_nmi @@ -762,7 +749,7 @@ intvec_\vecname: .macro dc_dispatch vecnum, vecname .org (\vecnum << 8) intvec_\vecname: - j hv_downcall_dispatch + j _hv_downcall_dispatch ENDPROC(intvec_\vecname) .endm @@ -799,6 +786,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -808,17 +799,37 @@ STD_ENTRY(interrupt_return) } lw r29, r29 andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ + bzt r29, .Lresume_userspace + +#ifdef CONFIG_PREEMPT + /* Returning to kernel space. Check if we need preemption. */ + GET_THREAD_INFO(r29) + addli r28, r29, THREAD_INFO_FLAGS_OFFSET { - bzt r29, .Lresume_userspace - PTREGS_PTR(r29, PTREGS_OFFSET_PC) + lw r28, r28 + addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET } + { + andi r28, r28, _TIF_NEED_RESCHED + lw r29, r29 + } + bzt r28, 1f + bnz r29, 1f + /* Disable interrupts explicitly for preemption. */ + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF + jal preempt_schedule_irq + FEEDBACK_REENTER(interrupt_return) +1: +#endif /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */ { - lw r28, r29 + PTREGS_PTR(r29, PTREGS_OFFSET_PC) moveli r27, lo16(_cpu_idle_nap) } { + lw r28, r29 auli r27, r27, ha16(_cpu_idle_nap) } { @@ -835,6 +846,18 @@ STD_ENTRY(interrupt_return) FEEDBACK_REENTER(interrupt_return) /* + * Use r33 to hold whether we have already loaded the callee-saves + * into ptregs. We don't want to do it twice in this loop, since + * then we'd clobber whatever changes are made by ptrace, etc. + * Get base of stack in r32. + */ + { + GET_THREAD_INFO(r32) + movei r33, 0 + } + +.Lretry_work_pending: + /* * Disable interrupts so as to make sure we don't * miss an interrupt that sets any of the thread flags (like * need_resched or sigpending) between sampling and the iret. @@ -844,21 +867,33 @@ STD_ENTRY(interrupt_return) IRQ_DISABLE(r20, r21) TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */ - /* Get base of stack in r32; note r30/31 are used as arguments here. */ - GET_THREAD_INFO(r32) - /* Check to see if there is any work to do before returning to user. */ { addi r29, r32, THREAD_INFO_FLAGS_OFFSET - moveli r28, lo16(_TIF_ALLWORK_MASK) + moveli r1, lo16(_TIF_ALLWORK_MASK) } { lw r29, r29 - auli r28, r28, ha16(_TIF_ALLWORK_MASK) + auli r1, r1, ha16(_TIF_ALLWORK_MASK) } - and r28, r29, r28 - bnz r28, .Lwork_pending + and r1, r29, r1 + bzt r1, .Lrestore_all + + /* + * Make sure we have all the registers saved for signal + * handling, notify-resume, or single-step. Call out to C + * code to figure out exactly what we need to do for each flag bit, + * then if necessary, reload the flags and recheck. + */ + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + bnz r33, 1f + } + push_extra_callee_saves r0 + movei r33, 1 +1: jal do_work_pending + bnz r0, .Lretry_work_pending /* * In the NMI case we @@ -911,6 +946,13 @@ STD_ENTRY(interrupt_return) bzt r30, .Lrestore_regs 3: + /* We are relying on INT_PERF_COUNT at 33, and AUX_PERF_COUNT at 48 */ + { + moveli r0, lo16(1 << (INT_PERF_COUNT - 32)) + bz r31, .Lrestore_regs + } + auli r0, r0, ha16(1 << (INT_AUX_PERF_COUNT - 32)) + mtspr SPR_INTERRUPT_MASK_RESET_K_1, r0 /* * We now commit to returning from this interrupt, since we will be @@ -1099,99 +1141,6 @@ STD_ENTRY(interrupt_return) pop_reg r50 pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) j .Lcontinue_restore_regs - -.Lwork_pending: - /* Mask the reschedule flag */ - andi r28, r29, _TIF_NEED_RESCHED - - { - /* - * If the NEED_RESCHED flag is called, we call schedule(), which - * may drop this context right here and go do something else. - * On return, jump back to .Lresume_userspace and recheck. - */ - bz r28, .Lasync_tlb - - /* Mask the async-tlb flag */ - andi r28, r29, _TIF_ASYNC_TLB - } - - jal schedule - FEEDBACK_REENTER(interrupt_return) - - /* Reload the flags and check again */ - j .Lresume_userspace - -.Lasync_tlb: - { - bz r28, .Lneed_sigpending - - /* Mask the sigpending flag */ - andi r28, r29, _TIF_SIGPENDING - } - - PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - jal do_async_page_fault - FEEDBACK_REENTER(interrupt_return) - - /* - * Go restart the "resume userspace" process. We may have - * fired a signal, and we need to disable interrupts again. - */ - j .Lresume_userspace - -.Lneed_sigpending: - /* - * At this point we are either doing signal handling or single-step, - * so either way make sure we have all the registers saved. - */ - push_extra_callee_saves r0 - - { - /* If no signal pending, skip to singlestep check */ - bz r28, .Lneed_singlestep - - /* Mask the singlestep flag */ - andi r28, r29, _TIF_SINGLESTEP - } - - jal do_signal - FEEDBACK_REENTER(interrupt_return) - - /* Reload the flags and check again */ - j .Lresume_userspace - -.Lneed_singlestep: - { - /* Get a pointer to the EX1 field */ - PTREGS_PTR(r29, PTREGS_OFFSET_EX1) - - /* If we get here, our bit must be set. */ - bz r28, .Lwork_confusion - } - /* If we are in priv mode, don't single step */ - lw r28, r29 - andi r28, r28, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ - bnz r28, .Lrestore_all - - /* Allow interrupts within the single step code */ - TRACE_IRQS_ON /* Note: clobbers registers r0-r29 */ - IRQ_ENABLE(r20, r21) - - /* try to single-step the current instruction */ - PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - jal single_step_once - FEEDBACK_REENTER(interrupt_return) - - /* Re-disable interrupts. TRACE_IRQS_OFF in .Lrestore_all. */ - IRQ_DISABLE(r20,r21) - - j .Lrestore_all - -.Lwork_confusion: - move r0, r28 - panic "thread_info allwork flags unhandled on userspace resume: %#x" - STD_ENDPROC(interrupt_return) /* @@ -1229,6 +1178,10 @@ handle_nmi: PTREGS_PTR(r0, PTREGS_OFFSET_BASE) } FEEDBACK_REENTER(handle_nmi) + { + movei r30, 1 + seq r31, r0, zero + } j interrupt_return STD_ENDPROC(handle_nmi) @@ -1256,15 +1209,20 @@ handle_syscall: add r20, r20, tp lw r21, r20 addi r21, r21, 1 - sw r20, r21 + { + sw r20, r21 + GET_THREAD_INFO(r31) + } /* Trace syscalls, if requested. */ - GET_THREAD_INFO(r31) addi r31, r31, THREAD_INFO_FLAGS_OFFSET lw r30, r31 andi r30, r30, _TIF_SYSCALL_TRACE bzt r30, .Lrestore_syscall_regs - jal do_syscall_trace + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + jal do_syscall_trace_enter + } FEEDBACK_REENTER(handle_syscall) /* @@ -1315,9 +1273,15 @@ handle_syscall: lw r30, r31 andi r30, r30, _TIF_SYSCALL_TRACE bzt r30, 1f - jal do_syscall_trace + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + jal do_syscall_trace_exit + } FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ +1: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Linvalid_syscall: /* Report an invalid syscall back to the user program */ @@ -1326,7 +1290,10 @@ handle_syscall: movei r28, -ENOSYS } sw r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1342,9 +1309,27 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) +STD_ENTRY(ret_from_kernel_thread) + jal sim_notify_fork + jal schedule_tail + FEEDBACK_REENTER(ret_from_fork) + { + move r0, r31 + jalr r30 + } + FEEDBACK_REENTER(ret_from_kernel_thread) + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(ret_from_kernel_thread) + /* * Code for ill interrupt. */ @@ -1429,7 +1414,10 @@ handle_ill: 3: /* set PC and continue */ lw r26, r24 - sw r28, r26 + { + sw r28, r26 + GET_THREAD_INFO(r0) + } /* * Clear TIF_SINGLESTEP to prevent recursion if we execute an ill. @@ -1437,7 +1425,6 @@ handle_ill: * need to clear it here and can't really impose on all other arches. * So what's another write between friends? */ - GET_THREAD_INFO(r0) addi r1, r0, THREAD_INFO_FLAGS_OFFSET { @@ -1451,12 +1438,14 @@ handle_ill: { lw r0, r0 /* indirect thru thread_info to get task_info*/ addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */ - move r2, zero /* load error code into r2 */ } jal send_sigtrap /* issue a SIGTRAP */ FEEDBACK_REENTER(handle_ill) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Ldispatch_normal_ill: { @@ -1486,15 +1475,6 @@ STD_ENTRY_LOCAL(bad_intr) panic "Unhandled interrupt %#x: PC %#lx" STD_ENDPROC(bad_intr) -/* Put address of pt_regs in reg and jump. */ -#define PTREGS_SYSCALL(x, reg) \ - STD_ENTRY(_##x); \ - { \ - PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ - j x \ - }; \ - STD_ENDPROC(_##x) - /* * Special-case sigreturn to not write r0 to the stack on return. * This is technically more efficient, but it also avoids difficulties @@ -1510,12 +1490,9 @@ STD_ENTRY_LOCAL(bad_intr) }; \ STD_ENDPROC(_##x) -PTREGS_SYSCALL(sys_execve, r3) -PTREGS_SYSCALL(sys_sigaltstack, r2) PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0) -PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1) -/* Save additional callee-saves to pt_regs, put address in r4 and jump. */ +/* Save additional callee-saves to pt_regs and jump to standard function. */ STD_ENTRY(_sys_clone) push_extra_callee_saves r4 j sys_clone @@ -1550,17 +1527,18 @@ STD_ENTRY(_sys_clone) * We place it in the __HEAD section to ensure it is relatively * near to the intvec_SWINT_1 code (reachable by a conditional branch). * - * Must match register usage in do_page_fault(). + * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics(). + * + * As we do in lib/atomic_asm_32.S, we bypass a store if the value we + * would store is the same as the value we just loaded. */ __HEAD .align 64 /* Align much later jump on the start of a cache line. */ -#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() nop #if PAGE_SIZE >= 0x10000 nop #endif -#endif ENTRY(sys_cmpxchg) /* @@ -1594,64 +1572,6 @@ ENTRY(sys_cmpxchg) # error Code here assumes PAGE_OFFSET can be loaded with just hi16() #endif -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - { - /* Check for unaligned input. */ - bnz sp, .Lcmpxchg_badaddr - mm r25, r0, zero, 3, PAGE_SHIFT-1 - } - { - crc32_32 r25, zero, r25 - moveli r21, lo16(atomic_lock_ptr) - } - { - auli r21, r21, ha16(atomic_lock_ptr) - auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */ - } - { - shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT - slt_u r23, r0, r23 - - /* - * Ensure that the TLB is loaded before we take out the lock. - * On TILEPro, this will start fetching the value all the way - * into our L1 as well (and if it gets modified before we - * grab the lock, it will be invalidated from our cache - * before we reload it). On tile64, we'll start fetching it - * into our L1 if we're the home, and if we're not, we'll - * still at least start fetching it into the home's L2. - */ - lw r26, r0 - } - { - s2a r21, r20, r21 - bbns r23, .Lcmpxchg_badaddr - } - { - lw r21, r21 - seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64 - andi r25, r25, ATOMIC_HASH_L2_SIZE - 1 - } - { - /* Branch away at this point if we're doing a 64-bit cmpxchg. */ - bbs r23, .Lcmpxchg64 - andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ - } - - { - /* - * We very carefully align the code that actually runs with - * the lock held (nine bundles) so that we know it is all in - * the icache when we start. This instruction (the jump) is - * at the start of the first cache line, address zero mod 64; - * we jump to somewhere in the second cache line to issue the - * tns, then jump back to finish up. - */ - s2a ATOMIC_LOCK_REG_NAME, r25, r21 - j .Lcmpxchg32_tns - } - -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ { /* Check for unaligned input. */ bnz sp, .Lcmpxchg_badaddr @@ -1665,7 +1585,7 @@ ENTRY(sys_cmpxchg) * Because of C pointer arithmetic, we want to compute this: * * ((char*)atomic_locks + - * (((r0 >> 3) & (1 << (ATOMIC_HASH_SIZE - 1))) << 2)) + * (((r0 >> 3) & ((1 << ATOMIC_HASH_SHIFT) - 1)) << 2)) * * Instead of two shifts we just ">> 1", and use 'mm' * to ignore the low and high bits we don't want. @@ -1676,12 +1596,9 @@ ENTRY(sys_cmpxchg) /* * Ensure that the TLB is loaded before we take out the lock. - * On tilepro, this will start fetching the value all the way - * into our L1 as well (and if it gets modified before we - * grab the lock, it will be invalidated from our cache - * before we reload it). On tile64, we'll start fetching it - * into our L1 if we're the home, and if we're not, we'll - * still at least start fetching it into the home's L2. + * This will start fetching the value all the way into our L1 + * as well (and if it gets modified before we grab the lock, + * it will be invalidated from our cache before we reload it). */ lw r26, r0 } @@ -1713,24 +1630,23 @@ ENTRY(sys_cmpxchg) { /* * We very carefully align the code that actually runs with - * the lock held (nine bundles) so that we know it is all in + * the lock held (twelve bundles) so that we know it is all in * the icache when we start. This instruction (the jump) is * at the start of the first cache line, address zero mod 64; - * we jump to somewhere in the second cache line to issue the - * tns, then jump back to finish up. + * we jump to the very end of the second cache line to get that + * line loaded in the icache, then fall through to issue the tns + * in the third cache line, at which point it's all cached. + * Note that is for performance, not correctness. */ j .Lcmpxchg32_tns } -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - - ENTRY(__sys_cmpxchg_grab_lock) +/* Symbol for do_page_fault_ics() to use to compare against the PC. */ +.global __sys_cmpxchg_grab_lock +__sys_cmpxchg_grab_lock: /* * Perform the actual cmpxchg or atomic_update. - * Note that the system <arch/atomic.h> header relies on - * atomic_update() to always perform an "mf", so don't make - * it optional or conditional without modifying that code. */ .Ldo_cmpxchg32: { @@ -1748,10 +1664,13 @@ ENTRY(sys_cmpxchg) } { mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ - bbns r22, .Lcmpxchg32_mismatch + bbns r22, .Lcmpxchg32_nostore } + seq r22, r24, r21 /* Are we storing the value we loaded? */ + bbs r22, .Lcmpxchg32_nostore sw r0, r24 + /* The following instruction is the start of the second cache line. */ /* Do slow mtspr here so the following "mf" waits less. */ { move sp, r27 @@ -1759,7 +1678,6 @@ ENTRY(sys_cmpxchg) } mf - /* The following instruction is the start of the second cache line. */ { move r0, r21 sw ATOMIC_LOCK_REG_NAME, zero @@ -1767,7 +1685,7 @@ ENTRY(sys_cmpxchg) iret /* Duplicated code here in the case where we don't overlap "mf" */ -.Lcmpxchg32_mismatch: +.Lcmpxchg32_nostore: { move r0, r21 sw ATOMIC_LOCK_REG_NAME, zero @@ -1783,8 +1701,6 @@ ENTRY(sys_cmpxchg) * and for 64-bit cmpxchg. We provide it as a macro and put * it into both versions. We can't share the code literally * since it depends on having the right branch-back address. - * Note that the first few instructions should share the cache - * line with the second half of the actual locked code. */ .macro cmpxchg_lock, bitwidth @@ -1810,7 +1726,7 @@ ENTRY(sys_cmpxchg) } /* * The preceding instruction is the last thing that must be - * on the second cache line. + * hot in the icache before we do the "tns" above. */ #ifdef CONFIG_SMP @@ -1841,6 +1757,12 @@ ENTRY(sys_cmpxchg) .endm .Lcmpxchg32_tns: + /* + * This is the last instruction on the second cache line. + * The nop here loads the second line, then we fall through + * to the tns to load the third line before we take the lock. + */ + nop cmpxchg_lock 32 /* @@ -1856,9 +1778,6 @@ ENTRY(sys_cmpxchg) .align 64 .Lcmpxchg64: { -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - s2a ATOMIC_LOCK_REG_NAME, r25, r21 -#endif bzt r23, .Lcmpxchg64_tns } j .Lcmpxchg_badaddr @@ -1924,11 +1843,12 @@ int_unalign: push_extra_callee_saves r0 j do_trap -/* Include .intrpt1 array of interrupt vectors */ - .section ".intrpt1", "ax" +/* Include .intrpt array of interrupt vectors */ + .section ".intrpt", "ax" -#define op_handle_perf_interrupt bad_intr -#define op_handle_aux_perf_interrupt bad_intr +#ifndef CONFIG_USE_PMC +#define handle_perf_interrupt bad_intr +#endif #ifndef CONFIG_HARDWALL #define do_hardwall_trap bad_intr @@ -1969,7 +1889,7 @@ int_unalign: int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr int_hand INT_PERF_COUNT, PERF_COUNT, \ - op_handle_perf_interrupt, handle_nmi + handle_perf_interrupt, handle_nmi int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr #if CONFIG_KERNEL_PL == 2 dc_dispatch INT_INTCTRL_2, INTCTRL_2 @@ -1993,10 +1913,8 @@ int_unalign: do_page_fault int_hand INT_SN_CPL, SN_CPL, bad_intr int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap -#if CHIP_HAS_AUX_PERF_COUNTERS() int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ - op_handle_aux_perf_interrupt, handle_nmi -#endif + handle_perf_interrupt, handle_nmi /* Synthetic interrupt delivered only by the simulator */ int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S new file mode 100644 index 00000000000..5b67efcecab --- /dev/null +++ b/arch/tile/kernel/intvec_64.S @@ -0,0 +1,1573 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Linux interrupt vectors. + */ + +#include <linux/linkage.h> +#include <linux/errno.h> +#include <linux/unistd.h> +#include <linux/init.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> +#include <asm/irqflags.h> +#include <asm/asm-offsets.h> +#include <asm/types.h> +#include <asm/traps.h> +#include <asm/signal.h> +#include <hv/hypervisor.h> +#include <arch/abi.h> +#include <arch/interrupts.h> +#include <arch/spr_def.h> + +#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) + +#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) + +#if CONFIG_KERNEL_PL == 1 || CONFIG_KERNEL_PL == 2 +/* + * Set "result" non-zero if ex1 holds the PL of the kernel + * (with or without ICS being set). Note this works only + * because we never find the PL at level 3. + */ +# define IS_KERNEL_EX1(result, ex1) andi result, ex1, CONFIG_KERNEL_PL +#else +# error Recode IS_KERNEL_EX1 for CONFIG_KERNEL_PL +#endif + + .macro push_reg reg, ptr=sp, delta=-8 + { + st \ptr, \reg + addli \ptr, \ptr, \delta + } + .endm + + .macro pop_reg reg, ptr=sp, delta=8 + { + ld \reg, \ptr + addli \ptr, \ptr, \delta + } + .endm + + .macro pop_reg_zero reg, zreg, ptr=sp, delta=8 + { + move \zreg, zero + ld \reg, \ptr + addi \ptr, \ptr, \delta + } + .endm + + .macro push_extra_callee_saves reg + PTREGS_PTR(\reg, PTREGS_OFFSET_REG(51)) + push_reg r51, \reg + push_reg r50, \reg + push_reg r49, \reg + push_reg r48, \reg + push_reg r47, \reg + push_reg r46, \reg + push_reg r45, \reg + push_reg r44, \reg + push_reg r43, \reg + push_reg r42, \reg + push_reg r41, \reg + push_reg r40, \reg + push_reg r39, \reg + push_reg r38, \reg + push_reg r37, \reg + push_reg r36, \reg + push_reg r35, \reg + push_reg r34, \reg, PTREGS_OFFSET_BASE - PTREGS_OFFSET_REG(34) + .endm + + .macro panic str + .pushsection .rodata, "a" +1: + .asciz "\str" + .popsection + { + moveli r0, hw2_last(1b) + } + { + shl16insli r0, r0, hw1(1b) + } + { + shl16insli r0, r0, hw0(1b) + jal panic + } + .endm + + /* + * Unalign data exception fast handling: In order to handle + * unaligned data access, a fast JIT version is generated and stored + * in a specific area in user space. We first need to do a quick poke + * to see if the JIT is available. We use certain bits in the fault + * PC (3 to 9 is used for 16KB page size) as index to address the JIT + * code area. The first 64bit word is the fault PC, and the 2nd one is + * the fault bundle itself. If these 2 words both match, then we + * directly "iret" to JIT code. If not, a slow path is invoked to + * generate new JIT code. Note: the current JIT code WILL be + * overwritten if it existed. So, ideally we can handle 128 unalign + * fixups via JIT. For lookup efficiency and to effectively support + * tight loops with multiple unaligned reference, a simple + * direct-mapped cache is used. + * + * SPR_EX_CONTEXT_K_0 is modified to return to JIT code. + * SPR_EX_CONTEXT_K_1 has ICS set. + * SPR_EX_CONTEXT_0_0 is setup to user program's next PC. + * SPR_EX_CONTEXT_0_1 = 0. + */ + .macro int_hand_unalign_fast vecnum, vecname + .org (\vecnum << 8) +intvec_\vecname: + /* Put r3 in SPR_SYSTEM_SAVE_K_1. */ + mtspr SPR_SYSTEM_SAVE_K_1, r3 + + mfspr r3, SPR_EX_CONTEXT_K_1 + /* + * Examine if exception comes from user without ICS set. + * If not, just go directly to the slow path. + */ + bnez r3, hand_unalign_slow_nonuser + + mfspr r3, SPR_SYSTEM_SAVE_K_0 + + /* Get &thread_info->unalign_jit_tmp[0] in r3. */ + bfexts r3, r3, 0, CPU_SHIFT-1 + mm r3, zero, LOG2_THREAD_SIZE, 63 + addli r3, r3, THREAD_INFO_UNALIGN_JIT_TMP_OFFSET + + /* + * Save r0, r1, r2 into thread_info array r3 points to + * from low to high memory in order. + */ + st_add r3, r0, 8 + st_add r3, r1, 8 + { + st_add r3, r2, 8 + andi r2, sp, 7 + } + + /* Save stored r3 value so we can revert it on a page fault. */ + mfspr r1, SPR_SYSTEM_SAVE_K_1 + st r3, r1 + + { + /* Generate a SIGBUS if sp is not 8-byte aligned. */ + bnez r2, hand_unalign_slow_badsp + } + + /* + * Get the thread_info in r0; load r1 with pc. Set the low bit of sp + * as an indicator to the page fault code in case we fault. + */ + { + ori sp, sp, 1 + mfspr r1, SPR_EX_CONTEXT_K_0 + } + + /* Add the jit_info offset in thread_info; extract r1 [3:9] into r2. */ + { + addli r0, r3, THREAD_INFO_UNALIGN_JIT_BASE_OFFSET - \ + (THREAD_INFO_UNALIGN_JIT_TMP_OFFSET + (3 * 8)) + bfextu r2, r1, 3, (2 + PAGE_SHIFT - UNALIGN_JIT_SHIFT) + } + + /* Load the jit_info; multiply r2 by 128. */ + { + ld r0, r0 + shli r2, r2, UNALIGN_JIT_SHIFT + } + + /* + * If r0 is NULL, the JIT page is not mapped, so go to slow path; + * add offset r2 to r0 at the same time. + */ + { + beqz r0, hand_unalign_slow + add r2, r0, r2 + } + + /* + * We are loading from userspace (both the JIT info PC and + * instruction word, and the instruction word we executed) + * and since either could fault while holding the interrupt + * critical section, we must tag this region and check it in + * do_page_fault() to handle it properly. + */ +ENTRY(__start_unalign_asm_code) + + /* Load first word of JIT in r0 and increment r2 by 8. */ + ld_add r0, r2, 8 + + /* + * Compare the PC with the 1st word in JIT; load the fault bundle + * into r1. + */ + { + cmpeq r0, r0, r1 + ld r1, r1 + } + + /* Go to slow path if PC doesn't match. */ + beqz r0, hand_unalign_slow + + /* + * Load the 2nd word of JIT, which is supposed to be the fault + * bundle for a cache hit. Increment r2; after this bundle r2 will + * point to the potential start of the JIT code we want to run. + */ + ld_add r0, r2, 8 + + /* No further accesses to userspace are done after this point. */ +ENTRY(__end_unalign_asm_code) + + /* Compare the real bundle with what is saved in the JIT area. */ + { + cmpeq r0, r1, r0 + mtspr SPR_EX_CONTEXT_0_1, zero + } + + /* Go to slow path if the fault bundle does not match. */ + beqz r0, hand_unalign_slow + + /* + * A cache hit is found. + * r2 points to start of JIT code (3rd word). + * r0 is the fault pc. + * r1 is the fault bundle. + * Reset the low bit of sp. + */ + { + mfspr r0, SPR_EX_CONTEXT_K_0 + andi sp, sp, ~1 + } + + /* Write r2 into EX_CONTEXT_K_0 and increment PC. */ + { + mtspr SPR_EX_CONTEXT_K_0, r2 + addi r0, r0, 8 + } + + /* + * Set ICS on kernel EX_CONTEXT_K_1 in order to "iret" to + * user with ICS set. This way, if the JIT fixup causes another + * unalign exception (which shouldn't be possible) the user + * process will be terminated with SIGBUS. Also, our fixup will + * run without interleaving with external interrupts. + * Each fixup is at most 14 bundles, so it won't hold ICS for long. + */ + { + movei r1, PL_ICS_EX1(USER_PL, 1) + mtspr SPR_EX_CONTEXT_0_0, r0 + } + + { + mtspr SPR_EX_CONTEXT_K_1, r1 + addi r3, r3, -(3 * 8) + } + + /* Restore r0..r3. */ + ld_add r0, r3, 8 + ld_add r1, r3, 8 + ld_add r2, r3, 8 + ld r3, r3 + + iret + ENDPROC(intvec_\vecname) + .endm + +#ifdef __COLLECT_LINKER_FEEDBACK__ + .pushsection .text.intvec_feedback,"ax" +intvec_feedback: + .popsection +#endif + + /* + * Default interrupt handler. + * + * vecnum is where we'll put this code. + * c_routine is the C routine we'll call. + * + * The C routine is passed two arguments: + * - A pointer to the pt_regs state. + * - The interrupt vector number. + * + * The "processing" argument specifies the code for processing + * the interrupt. Defaults to "handle_interrupt". + */ + .macro __int_hand vecnum, vecname, c_routine,processing=handle_interrupt +intvec_\vecname: + /* Temporarily save a register so we have somewhere to work. */ + + mtspr SPR_SYSTEM_SAVE_K_1, r0 + mfspr r0, SPR_EX_CONTEXT_K_1 + + /* + * The unalign data fastpath code sets the low bit in sp to + * force us to reset it here on fault. + */ + { + blbs sp, 2f + IS_KERNEL_EX1(r0, r0) + } + + .ifc \vecnum, INT_DOUBLE_FAULT + /* + * For double-faults from user-space, fall through to the normal + * register save and stack setup path. Otherwise, it's the + * hypervisor giving us one last chance to dump diagnostics, and we + * branch to the kernel_double_fault routine to do so. + */ + beqz r0, 1f + j _kernel_double_fault +1: + .else + /* + * If we're coming from user-space, then set sp to the top of + * the kernel stack. Otherwise, assume sp is already valid. + */ + { + bnez r0, 0f + move r0, sp + } + .endif + + .ifc \c_routine, do_page_fault + /* + * The page_fault handler may be downcalled directly by the + * hypervisor even when Linux is running and has ICS set. + * + * In this case the contents of EX_CONTEXT_K_1 reflect the + * previous fault and can't be relied on to choose whether or + * not to reinitialize the stack pointer. So we add a test + * to see whether SYSTEM_SAVE_K_2 has the high bit set, + * and if so we don't reinitialize sp, since we must be coming + * from Linux. (In fact the precise case is !(val & ~1), + * but any Linux PC has to have the high bit set.) + * + * Note that the hypervisor *always* sets SYSTEM_SAVE_K_2 for + * any path that turns into a downcall to one of our TLB handlers. + * + * FIXME: if we end up never using this path, perhaps we should + * prevent the hypervisor from generating downcalls in this case. + * The advantage of getting a downcall is we can panic in Linux. + */ + mfspr r0, SPR_SYSTEM_SAVE_K_2 + { + bltz r0, 0f /* high bit in S_S_1_2 is for a PC to use */ + move r0, sp + } + .endif + +2: + /* + * SYSTEM_SAVE_K_0 holds the cpu number in the high bits, and + * the current stack top in the lower bits. So we recover + * our starting stack value by sign-extending the low bits, then + * point sp at the top aligned address on the actual stack page. + */ + mfspr r0, SPR_SYSTEM_SAVE_K_0 + bfexts r0, r0, 0, CPU_SHIFT-1 + +0: + /* + * Align the stack mod 64 so we can properly predict what + * cache lines we need to write-hint to reduce memory fetch + * latency as we enter the kernel. The layout of memory is + * as follows, with cache line 0 at the lowest VA, and cache + * line 8 just below the r0 value this "andi" computes. + * Note that we never write to cache line 8, and we skip + * cache lines 1-3 for syscalls. + * + * cache line 8: ptregs padding (two words) + * cache line 7: sp, lr, pc, ex1, faultnum, orig_r0, flags, cmpexch + * cache line 6: r46...r53 (tp) + * cache line 5: r38...r45 + * cache line 4: r30...r37 + * cache line 3: r22...r29 + * cache line 2: r14...r21 + * cache line 1: r6...r13 + * cache line 0: 2 x frame, r0..r5 + */ +#if STACK_TOP_DELTA != 64 +#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs() +#endif + andi r0, r0, -64 + + /* + * Push the first four registers on the stack, so that we can set + * them to vector-unique values before we jump to the common code. + * + * Registers are pushed on the stack as a struct pt_regs, + * with the sp initially just above the struct, and when we're + * done, sp points to the base of the struct, minus + * C_ABI_SAVE_AREA_SIZE, so we can directly jal to C code. + * + * This routine saves just the first four registers, plus the + * stack context so we can do proper backtracing right away, + * and defers to handle_interrupt to save the rest. + * The backtracer needs pc, ex1, lr, sp, r52, and faultnum, + * and needs sp set to its final location at the bottom of + * the stack frame. + */ + addli r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP) + wh64 r0 /* cache line 7 */ + { + st r0, lr + addli r0, r0, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR + } + { + st r0, sp + addli sp, r0, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_SP + } + wh64 sp /* cache line 6 */ + { + st sp, r52 + addli sp, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(52) + } + wh64 sp /* cache line 0 */ + { + st sp, r1 + addli sp, sp, PTREGS_OFFSET_REG(2) - PTREGS_OFFSET_REG(1) + } + { + st sp, r2 + addli sp, sp, PTREGS_OFFSET_REG(3) - PTREGS_OFFSET_REG(2) + } + { + st sp, r3 + addli sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3) + } + mfspr r0, SPR_EX_CONTEXT_K_0 + .ifc \processing,handle_syscall + /* + * Bump the saved PC by one bundle so that when we return, we won't + * execute the same swint instruction again. We need to do this while + * we're in the critical section. + */ + addi r0, r0, 8 + .endif + { + st sp, r0 + addli sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + } + mfspr r0, SPR_EX_CONTEXT_K_1 + { + st sp, r0 + addi sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 + /* + * Use r0 for syscalls so it's a temporary; use r1 for interrupts + * so that it gets passed through unchanged to the handler routine. + * Note that the .if conditional confusingly spans bundles. + */ + .ifc \processing,handle_syscall + movei r0, \vecnum + } + { + st sp, r0 + .else + movei r1, \vecnum + } + { + st sp, r1 + .endif + addli sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM + } + mfspr r0, SPR_SYSTEM_SAVE_K_1 /* Original r0 */ + { + st sp, r0 + addi sp, sp, -PTREGS_OFFSET_REG(0) - 8 + } + { + st sp, zero /* write zero into "Next SP" frame pointer */ + addi sp, sp, -8 /* leave SP pointing at bottom of frame */ + } + .ifc \processing,handle_syscall + j handle_syscall + .else + /* Capture per-interrupt SPR context to registers. */ + .ifc \c_routine, do_page_fault + mfspr r2, SPR_SYSTEM_SAVE_K_3 /* address of page fault */ + mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */ + .else + .ifc \vecnum, INT_ILL_TRANS + mfspr r2, ILL_VA_PC + .else + .ifc \vecnum, INT_DOUBLE_FAULT + mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */ + .else + .ifc \c_routine, do_trap + mfspr r2, GPV_REASON + .else + .ifc \c_routine, handle_perf_interrupt + mfspr r2, PERF_COUNT_STS + .else + .ifc \c_routine, handle_perf_interrupt + mfspr r2, AUX_PERF_COUNT_STS + .endif + .endif + .endif + .endif + .endif + .endif + /* Put function pointer in r0 */ + moveli r0, hw2_last(\c_routine) + shl16insli r0, r0, hw1(\c_routine) + { + shl16insli r0, r0, hw0(\c_routine) + j \processing + } + .endif + ENDPROC(intvec_\vecname) + +#ifdef __COLLECT_LINKER_FEEDBACK__ + .pushsection .text.intvec_feedback,"ax" + .org (\vecnum << 5) + FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8) + jrp lr + .popsection +#endif + + .endm + + + /* + * Save the rest of the registers that we didn't save in the actual + * vector itself. We can't use r0-r10 inclusive here. + */ + .macro finish_interrupt_save, function + + /* If it's a syscall, save a proper orig_r0, otherwise just zero. */ + PTREGS_PTR(r52, PTREGS_OFFSET_ORIG_R0) + { + .ifc \function,handle_syscall + st r52, r0 + .else + st r52, zero + .endif + PTREGS_PTR(r52, PTREGS_OFFSET_TP) + } + st r52, tp + { + mfspr tp, CMPEXCH_VALUE + PTREGS_PTR(r52, PTREGS_OFFSET_CMPEXCH) + } + + /* + * For ordinary syscalls, we save neither caller- nor callee- + * save registers, since the syscall invoker doesn't expect the + * caller-saves to be saved, and the called kernel functions will + * take care of saving the callee-saves for us. + * + * For interrupts we save just the caller-save registers. Saving + * them is required (since the "caller" can't save them). Again, + * the called kernel functions will restore the callee-save + * registers for us appropriately. + * + * On return, we normally restore nothing special for syscalls, + * and just the caller-save registers for interrupts. + * + * However, there are some important caveats to all this: + * + * - We always save a few callee-save registers to give us + * some scratchpad registers to carry across function calls. + * + * - fork/vfork/etc require us to save all the callee-save + * registers, which we do in PTREGS_SYSCALL_ALL_REGS, below. + * + * - We always save r0..r5 and r10 for syscalls, since we need + * to reload them a bit later for the actual kernel call, and + * since we might need them for -ERESTARTNOINTR, etc. + * + * - Before invoking a signal handler, we save the unsaved + * callee-save registers so they are visible to the + * signal handler or any ptracer. + * + * - If the unsaved callee-save registers are modified, we set + * a bit in pt_regs so we know to reload them from pt_regs + * and not just rely on the kernel function unwinding. + * (Done for ptrace register writes and SA_SIGINFO handler.) + */ + { + st r52, tp + PTREGS_PTR(r52, PTREGS_OFFSET_REG(33)) + } + wh64 r52 /* cache line 4 */ + push_reg r33, r52 + push_reg r32, r52 + push_reg r31, r52 + .ifc \function,handle_syscall + push_reg r30, r52, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(30) + push_reg TREG_SYSCALL_NR_NAME, r52, \ + PTREGS_OFFSET_REG(5) - PTREGS_OFFSET_SYSCALL + .else + + push_reg r30, r52, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(30) + wh64 r52 /* cache line 3 */ + push_reg r29, r52 + push_reg r28, r52 + push_reg r27, r52 + push_reg r26, r52 + push_reg r25, r52 + push_reg r24, r52 + push_reg r23, r52 + push_reg r22, r52 + wh64 r52 /* cache line 2 */ + push_reg r21, r52 + push_reg r20, r52 + push_reg r19, r52 + push_reg r18, r52 + push_reg r17, r52 + push_reg r16, r52 + push_reg r15, r52 + push_reg r14, r52 + wh64 r52 /* cache line 1 */ + push_reg r13, r52 + push_reg r12, r52 + push_reg r11, r52 + push_reg r10, r52 + push_reg r9, r52 + push_reg r8, r52 + push_reg r7, r52 + push_reg r6, r52 + + .endif + + push_reg r5, r52 + st r52, r4 + + /* + * If we will be returning to the kernel, we will need to + * reset the interrupt masks to the state they had before. + * Set DISABLE_IRQ in flags iff we came from kernel pl with + * irqs disabled. + */ + mfspr r32, SPR_EX_CONTEXT_K_1 + { + IS_KERNEL_EX1(r22, r22) + PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS) + } + beqzt r32, 1f /* zero if from user space */ + IRQS_DISABLED(r32) /* zero if irqs enabled */ +#if PT_FLAGS_DISABLE_IRQ != 1 +# error Value of IRQS_DISABLED used to set PT_FLAGS_DISABLE_IRQ; fix +#endif +1: + .ifnc \function,handle_syscall + /* Record the fact that we saved the caller-save registers above. */ + ori r32, r32, PT_FLAGS_CALLER_SAVES + .endif + st r21, r32 + + /* + * we've captured enough state to the stack (including in + * particular our EX_CONTEXT state) that we can now release + * the interrupt critical section and replace it with our + * standard "interrupts disabled" mask value. This allows + * synchronous interrupts (and profile interrupts) to punch + * through from this point onwards. + * + * It's important that no code before this point touch memory + * other than our own stack (to keep the invariant that this + * is all that gets touched under ICS), and that no code after + * this point reference any interrupt-specific SPR, in particular + * the EX_CONTEXT_K_ values. + */ + .ifc \function,handle_nmi + IRQ_DISABLE_ALL(r20) + .else + IRQ_DISABLE(r20, r21) + .endif + mtspr INTERRUPT_CRITICAL_SECTION, zero + + /* Load tp with our per-cpu offset. */ +#ifdef CONFIG_SMP + { + mfspr r20, SPR_SYSTEM_SAVE_K_0 + moveli r21, hw2_last(__per_cpu_offset) + } + { + shl16insli r21, r21, hw1(__per_cpu_offset) + bfextu r20, r20, CPU_SHIFT, 63 + } + shl16insli r21, r21, hw0(__per_cpu_offset) + shl3add r20, r20, r21 + ld tp, r20 +#else + move tp, zero +#endif + +#ifdef __COLLECT_LINKER_FEEDBACK__ + /* + * Notify the feedback routines that we were in the + * appropriate fixed interrupt vector area. Note that we + * still have ICS set at this point, so we can't invoke any + * atomic operations or we will panic. The feedback + * routines internally preserve r0..r10 and r30 up. + */ + .ifnc \function,handle_syscall + shli r20, r1, 5 + .else + moveli r20, INT_SWINT_1 << 5 + .endif + moveli r21, hw2_last(intvec_feedback) + shl16insli r21, r21, hw1(intvec_feedback) + shl16insli r21, r21, hw0(intvec_feedback) + add r20, r20, r21 + jalr r20 + + /* And now notify the feedback routines that we are here. */ + FEEDBACK_ENTER(\function) +#endif + + /* + * Prepare the first 256 stack bytes to be rapidly accessible + * without having to fetch the background data. + */ + addi r52, sp, -64 + { + wh64 r52 + addi r52, r52, -64 + } + { + wh64 r52 + addi r52, r52, -64 + } + { + wh64 r52 + addi r52, r52, -64 + } + wh64 r52 + +#ifdef CONFIG_TRACE_IRQFLAGS + .ifnc \function,handle_nmi + /* + * We finally have enough state set up to notify the irq + * tracing code that irqs were disabled on entry to the handler. + * The TRACE_IRQS_OFF call clobbers registers r0-r29. + * For syscalls, we already have the register state saved away + * on the stack, so we don't bother to do any register saves here, + * and later we pop the registers back off the kernel stack. + * For interrupt handlers, save r0-r3 in callee-saved registers. + */ + .ifnc \function,handle_syscall + { move r30, r0; move r31, r1 } + { move r32, r2; move r33, r3 } + .endif + TRACE_IRQS_OFF + .ifnc \function,handle_syscall + { move r0, r30; move r1, r31 } + { move r2, r32; move r3, r33 } + .endif + .endif +#endif + + .endm + + /* + * Redispatch a downcall. + */ + .macro dc_dispatch vecnum, vecname + .org (\vecnum << 8) +intvec_\vecname: + j _hv_downcall_dispatch + ENDPROC(intvec_\vecname) + .endm + + /* + * Common code for most interrupts. The C function we're eventually + * going to is in r0, and the faultnum is in r1; the original + * values for those registers are on the stack. + */ + .pushsection .text.handle_interrupt,"ax" +handle_interrupt: + finish_interrupt_save handle_interrupt + + /* Jump to the C routine; it should enable irqs as soon as possible. */ + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_interrupt) + { + movei r30, 0 /* not an NMI */ + j interrupt_return + } + STD_ENDPROC(handle_interrupt) + +/* + * This routine takes a boolean in r30 indicating if this is an NMI. + * If so, we also expect a boolean in r31 indicating whether to + * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. + */ +STD_ENTRY(interrupt_return) + /* If we're resuming to kernel space, don't check thread flags. */ + { + bnez r30, .Lrestore_all /* NMIs don't special-case user-space */ + PTREGS_PTR(r29, PTREGS_OFFSET_EX1) + } + ld r29, r29 + IS_KERNEL_EX1(r29, r29) + { + beqzt r29, .Lresume_userspace + move r29, sp + } + +#ifdef CONFIG_PREEMPT + /* Returning to kernel space. Check if we need preemption. */ + EXTRACT_THREAD_INFO(r29) + addli r28, r29, THREAD_INFO_FLAGS_OFFSET + { + ld r28, r28 + addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET + } + { + andi r28, r28, _TIF_NEED_RESCHED + ld4s r29, r29 + } + beqzt r28, 1f + bnez r29, 1f + /* Disable interrupts explicitly for preemption. */ + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF + jal preempt_schedule_irq + FEEDBACK_REENTER(interrupt_return) +1: +#endif + + /* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */ + { + moveli r27, hw2_last(_cpu_idle_nap) + PTREGS_PTR(r29, PTREGS_OFFSET_PC) + } + { + ld r28, r29 + shl16insli r27, r27, hw1(_cpu_idle_nap) + } + { + shl16insli r27, r27, hw0(_cpu_idle_nap) + } + { + cmpeq r27, r27, r28 + } + { + blbc r27, .Lrestore_all + addi r28, r28, 8 + } + st r29, r28 + j .Lrestore_all + +.Lresume_userspace: + FEEDBACK_REENTER(interrupt_return) + + /* + * Use r33 to hold whether we have already loaded the callee-saves + * into ptregs. We don't want to do it twice in this loop, since + * then we'd clobber whatever changes are made by ptrace, etc. + */ + { + movei r33, 0 + move r32, sp + } + + /* Get base of stack in r32. */ + EXTRACT_THREAD_INFO(r32) + +.Lretry_work_pending: + /* + * Disable interrupts so as to make sure we don't + * miss an interrupt that sets any of the thread flags (like + * need_resched or sigpending) between sampling and the iret. + * Routines like schedule() or do_signal() may re-enable + * interrupts before returning. + */ + IRQ_DISABLE(r20, r21) + TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */ + + + /* Check to see if there is any work to do before returning to user. */ + { + addi r29, r32, THREAD_INFO_FLAGS_OFFSET + moveli r1, hw1_last(_TIF_ALLWORK_MASK) + } + { + ld r29, r29 + shl16insli r1, r1, hw0(_TIF_ALLWORK_MASK) + } + and r1, r29, r1 + beqzt r1, .Lrestore_all + + /* + * Make sure we have all the registers saved for signal + * handling or notify-resume. Call out to C code to figure out + * exactly what we need to do for each flag bit, then if + * necessary, reload the flags and recheck. + */ + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + bnez r33, 1f + } + push_extra_callee_saves r0 + movei r33, 1 +1: jal do_work_pending + bnez r0, .Lretry_work_pending + + /* + * In the NMI case we + * omit the call to single_process_check_nohz, which normally checks + * to see if we should start or stop the scheduler tick, because + * we can't call arbitrary Linux code from an NMI context. + * We always call the homecache TLB deferral code to re-trigger + * the deferral mechanism. + * + * The other chunk of responsibility this code has is to reset the + * interrupt masks appropriately to reset irqs and NMIs. We have + * to call TRACE_IRQS_OFF and TRACE_IRQS_ON to support all the + * lockdep-type stuff, but we can't set ICS until afterwards, since + * ICS can only be used in very tight chunks of code to avoid + * tripping over various assertions that it is off. + */ +.Lrestore_all: + PTREGS_PTR(r0, PTREGS_OFFSET_EX1) + { + ld r0, r0 + PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS) + } + { + IS_KERNEL_EX1(r0, r0) + ld r32, r32 + } + bnez r0, 1f + j 2f +#if PT_FLAGS_DISABLE_IRQ != 1 +# error Assuming PT_FLAGS_DISABLE_IRQ == 1 so we can use blbct below +#endif +1: blbct r32, 2f + IRQ_DISABLE(r20,r21) + TRACE_IRQS_OFF + movei r0, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r0 + beqzt r30, .Lrestore_regs + j 3f +2: TRACE_IRQS_ON + IRQ_ENABLE_LOAD(r20, r21) + movei r0, 1 + mtspr INTERRUPT_CRITICAL_SECTION, r0 + IRQ_ENABLE_APPLY(r20, r21) + beqzt r30, .Lrestore_regs +3: + +#if INT_PERF_COUNT + 1 != INT_AUX_PERF_COUNT +# error Bad interrupt assumption +#endif + { + movei r0, 3 /* two adjacent bits for the PERF_COUNT mask */ + beqz r31, .Lrestore_regs + } + shli r0, r0, INT_PERF_COUNT + mtspr SPR_INTERRUPT_MASK_RESET_K, r0 + + /* + * We now commit to returning from this interrupt, since we will be + * doing things like setting EX_CONTEXT SPRs and unwinding the stack + * frame. No calls should be made to any other code after this point. + * This code should only be entered with ICS set. + * r32 must still be set to ptregs.flags. + * We launch loads to each cache line separately first, so we can + * get some parallelism out of the memory subsystem. + * We start zeroing caller-saved registers throughout, since + * that will save some cycles if this turns out to be a syscall. + */ +.Lrestore_regs: + + /* + * Rotate so we have one high bit and one low bit to test. + * - low bit says whether to restore all the callee-saved registers, + * or just r30-r33, and r52 up. + * - high bit (i.e. sign bit) says whether to restore all the + * caller-saved registers, or just r0. + */ +#if PT_FLAGS_CALLER_SAVES != 2 || PT_FLAGS_RESTORE_REGS != 4 +# error Rotate trick does not work :-) +#endif + { + rotli r20, r32, 62 + PTREGS_PTR(sp, PTREGS_OFFSET_REG(0)) + } + + /* + * Load cache lines 0, 4, 6 and 7, in that order, then use + * the last loaded value, which makes it likely that the other + * cache lines have also loaded, at which point we should be + * able to safely read all the remaining words on those cache + * lines without waiting for the memory subsystem. + */ + pop_reg r0, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0) + pop_reg r30, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_REG(30) + pop_reg_zero r52, r3, sp, PTREGS_OFFSET_CMPEXCH - PTREGS_OFFSET_REG(52) + pop_reg_zero r21, r27, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_CMPEXCH + pop_reg_zero lr, r2, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_EX1 + { + mtspr CMPEXCH_VALUE, r21 + move r4, zero + } + pop_reg r21, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_PC + { + mtspr SPR_EX_CONTEXT_K_1, lr + IS_KERNEL_EX1(lr, lr) + } + { + mtspr SPR_EX_CONTEXT_K_0, r21 + move r5, zero + } + + /* Restore callee-saveds that we actually use. */ + pop_reg_zero r31, r6 + pop_reg_zero r32, r7 + pop_reg_zero r33, r8, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(33) + + /* + * If we modified other callee-saveds, restore them now. + * This is rare, but could be via ptrace or signal handler. + */ + { + move r9, zero + blbs r20, .Lrestore_callees + } +.Lcontinue_restore_regs: + + /* Check if we're returning from a syscall. */ + { + move r10, zero + bltzt r20, 1f /* no, so go restore callee-save registers */ + } + + /* + * Check if we're returning to userspace. + * Note that if we're not, we don't worry about zeroing everything. + */ + { + addli sp, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(29) + bnez lr, .Lkernel_return + } + + /* + * On return from syscall, we've restored r0 from pt_regs, but we + * clear the remainder of the caller-saved registers. We could + * restore the syscall arguments, but there's not much point, + * and it ensures user programs aren't trying to use the + * caller-saves if we clear them, as well as avoiding leaking + * kernel pointers into userspace. + */ + pop_reg_zero lr, r11, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR + pop_reg_zero tp, r12, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP + { + ld sp, sp + move r13, zero + move r14, zero + } + { move r15, zero; move r16, zero } + { move r17, zero; move r18, zero } + { move r19, zero; move r20, zero } + { move r21, zero; move r22, zero } + { move r23, zero; move r24, zero } + { move r25, zero; move r26, zero } + + /* Set r1 to errno if we are returning an error, otherwise zero. */ + { + moveli r29, 4096 + sub r1, zero, r0 + } + { + move r28, zero + cmpltu r29, r1, r29 + } + { + mnz r1, r29, r1 + move r29, zero + } + iret + + /* + * Not a syscall, so restore caller-saved registers. + * First kick off loads for cache lines 1-3, which we're touching + * for the first time here. + */ + .align 64 +1: pop_reg r29, sp, PTREGS_OFFSET_REG(21) - PTREGS_OFFSET_REG(29) + pop_reg r21, sp, PTREGS_OFFSET_REG(13) - PTREGS_OFFSET_REG(21) + pop_reg r13, sp, PTREGS_OFFSET_REG(1) - PTREGS_OFFSET_REG(13) + pop_reg r1 + pop_reg r2 + pop_reg r3 + pop_reg r4 + pop_reg r5 + pop_reg r6 + pop_reg r7 + pop_reg r8 + pop_reg r9 + pop_reg r10 + pop_reg r11 + pop_reg r12, sp, 16 + /* r13 already restored above */ + pop_reg r14 + pop_reg r15 + pop_reg r16 + pop_reg r17 + pop_reg r18 + pop_reg r19 + pop_reg r20, sp, 16 + /* r21 already restored above */ + pop_reg r22 + pop_reg r23 + pop_reg r24 + pop_reg r25 + pop_reg r26 + pop_reg r27 + pop_reg r28, sp, PTREGS_OFFSET_LR - PTREGS_OFFSET_REG(28) + /* r29 already restored above */ + bnez lr, .Lkernel_return + pop_reg lr, sp, PTREGS_OFFSET_TP - PTREGS_OFFSET_LR + pop_reg tp, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_TP + ld sp, sp + iret + + /* + * We can't restore tp when in kernel mode, since a thread might + * have migrated from another cpu and brought a stale tp value. + */ +.Lkernel_return: + pop_reg lr, sp, PTREGS_OFFSET_SP - PTREGS_OFFSET_LR + ld sp, sp + iret + + /* Restore callee-saved registers from r34 to r51. */ +.Lrestore_callees: + addli sp, sp, PTREGS_OFFSET_REG(34) - PTREGS_OFFSET_REG(29) + pop_reg r34 + pop_reg r35 + pop_reg r36 + pop_reg r37 + pop_reg r38 + pop_reg r39 + pop_reg r40 + pop_reg r41 + pop_reg r42 + pop_reg r43 + pop_reg r44 + pop_reg r45 + pop_reg r46 + pop_reg r47 + pop_reg r48 + pop_reg r49 + pop_reg r50 + pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) + j .Lcontinue_restore_regs + STD_ENDPROC(interrupt_return) + + /* + * "NMI" interrupts mask ALL interrupts before calling the + * handler, and don't check thread flags, etc., on the way + * back out. In general, the only things we do here for NMIs + * are register save/restore and dataplane kernel-TLB management. + * We don't (for example) deal with start/stop of the sched tick. + */ + .pushsection .text.handle_nmi,"ax" +handle_nmi: + finish_interrupt_save handle_nmi + { + jalr r0 + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + } + FEEDBACK_REENTER(handle_nmi) + { + movei r30, 1 + cmpeq r31, r0, zero + } + j interrupt_return + STD_ENDPROC(handle_nmi) + + /* + * Parallel code for syscalls to handle_interrupt. + */ + .pushsection .text.handle_syscall,"ax" +handle_syscall: + finish_interrupt_save handle_syscall + + /* Enable irqs. */ + TRACE_IRQS_ON + IRQ_ENABLE(r20, r21) + + /* Bump the counter for syscalls made on this tile. */ + moveli r20, hw2_last(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + shl16insli r20, r20, hw1(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + shl16insli r20, r20, hw0(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) + add r20, r20, tp + ld4s r21, r20 + { + addi r21, r21, 1 + move r31, sp + } + { + st4 r20, r21 + EXTRACT_THREAD_INFO(r31) + } + + /* Trace syscalls, if requested. */ + addi r31, r31, THREAD_INFO_FLAGS_OFFSET + { + ld r30, r31 + moveli r32, _TIF_SYSCALL_ENTRY_WORK + } + and r30, r30, r32 + { + addi r30, r31, THREAD_INFO_STATUS_OFFSET - THREAD_INFO_FLAGS_OFFSET + beqzt r30, .Lrestore_syscall_regs + } + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + jal do_syscall_trace_enter + } + FEEDBACK_REENTER(handle_syscall) + + /* + * We always reload our registers from the stack at this + * point. They might be valid, if we didn't build with + * TRACE_IRQFLAGS, and this isn't a dataplane tile, and we're not + * doing syscall tracing, but there are enough cases now that it + * seems simplest just to do the reload unconditionally. + */ +.Lrestore_syscall_regs: + { + ld r30, r30 + PTREGS_PTR(r11, PTREGS_OFFSET_REG(0)) + } + pop_reg r0, r11 + pop_reg r1, r11 + pop_reg r2, r11 + pop_reg r3, r11 + pop_reg r4, r11 + pop_reg r5, r11, PTREGS_OFFSET_SYSCALL - PTREGS_OFFSET_REG(5) + { + ld TREG_SYSCALL_NR_NAME, r11 + moveli r21, __NR_syscalls + } + + /* Ensure that the syscall number is within the legal range. */ + { + moveli r20, hw2(sys_call_table) +#ifdef CONFIG_COMPAT + blbs r30, .Lcompat_syscall +#endif + } + { + cmpltu r21, TREG_SYSCALL_NR_NAME, r21 + shl16insli r20, r20, hw1(sys_call_table) + } + { + blbc r21, .Linvalid_syscall + shl16insli r20, r20, hw0(sys_call_table) + } +.Lload_syscall_pointer: + shl3add r20, TREG_SYSCALL_NR_NAME, r20 + ld r20, r20 + + /* Jump to syscall handler. */ + jalr r20 +.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */ + + /* + * Write our r0 onto the stack so it gets restored instead + * of whatever the user had there before. + * In compat mode, sign-extend r0 before storing it. + */ + { + PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) + blbct r30, 1f + } + addxi r0, r0, 0 +1: st r29, r0 + +.Lsyscall_sigreturn_skip: + FEEDBACK_REENTER(handle_syscall) + + /* Do syscall trace again, if requested. */ + { + ld r30, r31 + moveli r32, _TIF_SYSCALL_EXIT_WORK + } + and r0, r30, r32 + { + andi r0, r30, _TIF_SINGLESTEP + beqzt r0, 1f + } + { + PTREGS_PTR(r0, PTREGS_OFFSET_BASE) + jal do_syscall_trace_exit + } + FEEDBACK_REENTER(handle_syscall) + andi r0, r30, _TIF_SINGLESTEP + +1: beqzt r0, 2f + + /* Single stepping -- notify ptrace. */ + { + movei r0, SIGTRAP + jal ptrace_notify + } + FEEDBACK_REENTER(handle_syscall) + +2: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + +#ifdef CONFIG_COMPAT +.Lcompat_syscall: + /* + * Load the base of the compat syscall table in r20, and + * range-check the syscall number (duplicated from 64-bit path). + * Sign-extend all the user's passed arguments to make them consistent. + * Also save the original "r(n)" values away in "r(11+n)" in + * case the syscall table entry wants to validate them. + */ + moveli r20, hw2(compat_sys_call_table) + { + cmpltu r21, TREG_SYSCALL_NR_NAME, r21 + shl16insli r20, r20, hw1(compat_sys_call_table) + } + { + blbc r21, .Linvalid_syscall + shl16insli r20, r20, hw0(compat_sys_call_table) + } + { move r11, r0; addxi r0, r0, 0 } + { move r12, r1; addxi r1, r1, 0 } + { move r13, r2; addxi r2, r2, 0 } + { move r14, r3; addxi r3, r3, 0 } + { move r15, r4; addxi r4, r4, 0 } + { move r16, r5; addxi r5, r5, 0 } + j .Lload_syscall_pointer +#endif + +.Linvalid_syscall: + /* Report an invalid syscall back to the user program */ + { + PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) + movei r28, -ENOSYS + } + st r29, r28 + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(handle_syscall) + + /* Return the address for oprofile to suppress in backtraces. */ +STD_ENTRY_SECTION(handle_syscall_link_address, .text.handle_syscall) + lnk r0 + { + addli r0, r0, .Lhandle_syscall_link - . + jrp lr + } + STD_ENDPROC(handle_syscall_link_address) + +STD_ENTRY(ret_from_fork) + jal sim_notify_fork + jal schedule_tail + FEEDBACK_REENTER(ret_from_fork) + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(ret_from_fork) + +STD_ENTRY(ret_from_kernel_thread) + jal sim_notify_fork + jal schedule_tail + FEEDBACK_REENTER(ret_from_fork) + { + move r0, r31 + jalr r30 + } + FEEDBACK_REENTER(ret_from_kernel_thread) + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } + STD_ENDPROC(ret_from_kernel_thread) + +/* Various stub interrupt handlers and syscall handlers */ + +STD_ENTRY_LOCAL(_kernel_double_fault) + mfspr r1, SPR_EX_CONTEXT_K_0 + move r2, lr + move r3, sp + move r4, r52 + addi sp, sp, -C_ABI_SAVE_AREA_SIZE + j kernel_double_fault + STD_ENDPROC(_kernel_double_fault) + +STD_ENTRY_LOCAL(bad_intr) + mfspr r2, SPR_EX_CONTEXT_K_0 + panic "Unhandled interrupt %#x: PC %#lx" + STD_ENDPROC(bad_intr) + +/* + * Special-case sigreturn to not write r0 to the stack on return. + * This is technically more efficient, but it also avoids difficulties + * in the 64-bit OS when handling 32-bit compat code, since we must not + * sign-extend r0 for the sigreturn return-value case. + */ +#define PTREGS_SYSCALL_SIGRETURN(x, reg) \ + STD_ENTRY(_##x); \ + addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) + +PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0) +#ifdef CONFIG_COMPAT +PTREGS_SYSCALL_SIGRETURN(compat_sys_rt_sigreturn, r0) +#endif + +/* Save additional callee-saves to pt_regs and jump to standard function. */ +STD_ENTRY(_sys_clone) + push_extra_callee_saves r4 + j sys_clone + STD_ENDPROC(_sys_clone) + + /* + * Recover r3, r2, r1 and r0 here saved by unalign fast vector. + * The vector area limit is 32 bundles, so we handle the reload here. + * r0, r1, r2 are in thread_info from low to high memory in order. + * r3 points to location the original r3 was saved. + * We put this code in the __HEAD section so it can be reached + * via a conditional branch from the fast path. + */ + __HEAD +hand_unalign_slow: + andi sp, sp, ~1 +hand_unalign_slow_badsp: + addi r3, r3, -(3 * 8) + ld_add r0, r3, 8 + ld_add r1, r3, 8 + ld r2, r3 +hand_unalign_slow_nonuser: + mfspr r3, SPR_SYSTEM_SAVE_K_1 + __int_hand INT_UNALIGN_DATA, UNALIGN_DATA_SLOW, int_unalign + +/* The unaligned data support needs to read all the registers. */ +int_unalign: + push_extra_callee_saves r0 + j do_unaligned +ENDPROC(hand_unalign_slow) + +/* Fill the return address stack with nonzero entries. */ +STD_ENTRY(fill_ra_stack) + { + move r0, lr + jal 1f + } +1: jal 2f +2: jal 3f +3: jal 4f +4: jrp r0 + STD_ENDPROC(fill_ra_stack) + + .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt + .org (\vecnum << 8) + __int_hand \vecnum, \vecname, \c_routine, \processing + .endm + +/* Include .intrpt array of interrupt vectors */ + .section ".intrpt", "ax" + .global intrpt_start +intrpt_start: + +#ifndef CONFIG_USE_PMC +#define handle_perf_interrupt bad_intr +#endif + +#ifndef CONFIG_HARDWALL +#define do_hardwall_trap bad_intr +#endif + + int_hand INT_MEM_ERROR, MEM_ERROR, do_trap + int_hand INT_SINGLE_STEP_3, SINGLE_STEP_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, gx_singlestep_handle + int_hand INT_SINGLE_STEP_1, SINGLE_STEP_1, bad_intr +#else + int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, bad_intr + int_hand INT_SINGLE_STEP_1, SINGLE_STEP_1, gx_singlestep_handle +#endif + int_hand INT_SINGLE_STEP_0, SINGLE_STEP_0, bad_intr + int_hand INT_IDN_COMPLETE, IDN_COMPLETE, bad_intr + int_hand INT_UDN_COMPLETE, UDN_COMPLETE, bad_intr + int_hand INT_ITLB_MISS, ITLB_MISS, do_page_fault + int_hand INT_ILL, ILL, do_trap + int_hand INT_GPV, GPV, do_trap + int_hand INT_IDN_ACCESS, IDN_ACCESS, do_trap + int_hand INT_UDN_ACCESS, UDN_ACCESS, do_trap + int_hand INT_SWINT_3, SWINT_3, do_trap + int_hand INT_SWINT_2, SWINT_2, do_trap + int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall + int_hand INT_SWINT_0, SWINT_0, do_trap + int_hand INT_ILL_TRANS, ILL_TRANS, do_trap + int_hand_unalign_fast INT_UNALIGN_DATA, UNALIGN_DATA + int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault + int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault + int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap + int_hand INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap + int_hand INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt + int_hand INT_IDN_TIMER, IDN_TIMER, bad_intr + int_hand INT_UDN_TIMER, UDN_TIMER, bad_intr + int_hand INT_IDN_AVAIL, IDN_AVAIL, bad_intr + int_hand INT_UDN_AVAIL, UDN_AVAIL, bad_intr + int_hand INT_IPI_3, IPI_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + int_hand INT_IPI_2, IPI_2, tile_dev_intr + int_hand INT_IPI_1, IPI_1, bad_intr +#else + int_hand INT_IPI_2, IPI_2, bad_intr + int_hand INT_IPI_1, IPI_1, tile_dev_intr +#endif + int_hand INT_IPI_0, IPI_0, bad_intr + int_hand INT_PERF_COUNT, PERF_COUNT, \ + handle_perf_interrupt, handle_nmi + int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ + handle_perf_interrupt, handle_nmi + int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + dc_dispatch INT_INTCTRL_2, INTCTRL_2 + int_hand INT_INTCTRL_1, INTCTRL_1, bad_intr +#else + int_hand INT_INTCTRL_2, INTCTRL_2, bad_intr + dc_dispatch INT_INTCTRL_1, INTCTRL_1 +#endif + int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr + int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ + hv_message_intr + int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, bad_intr + int_hand INT_I_ASID, I_ASID, bad_intr + int_hand INT_D_ASID, D_ASID, bad_intr + int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap + + /* Synthetic interrupt delivered only by the simulator */ + int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index aa0134db2dd..637f2ffaa5f 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -21,6 +21,7 @@ #include <hv/drv_pcie_rc_intf.h> #include <arch/spr_def.h> #include <asm/traps.h> +#include <linux/perf_event.h> /* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */ #define IS_HW_CLEARED 1 @@ -53,12 +54,6 @@ static DEFINE_PER_CPU(unsigned long, irq_disable_mask) */ static DEFINE_PER_CPU(int, irq_depth); -/* State for allocating IRQs on Gx. */ -#if CHIP_HAS_IPI() -static unsigned long available_irqs = ~(1UL << IRQ_RESCHEDULE); -static DEFINE_SPINLOCK(available_irqs_lock); -#endif - #if CHIP_HAS_IPI() /* Use SPRs to manipulate device interrupts. */ #define mask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_SET_K, irq_mask) @@ -73,7 +68,8 @@ static DEFINE_SPINLOCK(available_irqs_lock); /* * The interrupt handling path, implemented in terms of HV interrupt - * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx. + * emulation on TILEPro, and IPI hardware on TILE-Gx. + * Entered with interrupts disabled. */ void tile_dev_intr(struct pt_regs *regs, int intnum) { @@ -152,14 +148,13 @@ void tile_dev_intr(struct pt_regs *regs, int intnum) * Remove an irq from the disabled mask. If we're in an interrupt * context, defer enabling the HW interrupt until we leave. */ -void enable_percpu_irq(unsigned int irq) +static void tile_irq_chip_enable(struct irq_data *d) { - get_cpu_var(irq_disable_mask) &= ~(1UL << irq); + get_cpu_var(irq_disable_mask) &= ~(1UL << d->irq); if (__get_cpu_var(irq_depth) == 0) - unmask_irqs(1UL << irq); + unmask_irqs(1UL << d->irq); put_cpu_var(irq_disable_mask); } -EXPORT_SYMBOL(enable_percpu_irq); /* * Add an irq to the disabled mask. We disable the HW interrupt @@ -167,13 +162,12 @@ EXPORT_SYMBOL(enable_percpu_irq); * in an interrupt context, the return path is careful to avoid * unmasking a newly disabled interrupt. */ -void disable_percpu_irq(unsigned int irq) +static void tile_irq_chip_disable(struct irq_data *d) { - get_cpu_var(irq_disable_mask) |= (1UL << irq); - mask_irqs(1UL << irq); + get_cpu_var(irq_disable_mask) |= (1UL << d->irq); + mask_irqs(1UL << d->irq); put_cpu_var(irq_disable_mask); } -EXPORT_SYMBOL(disable_percpu_irq); /* Mask an interrupt. */ static void tile_irq_chip_mask(struct irq_data *d) @@ -209,6 +203,8 @@ static void tile_irq_chip_eoi(struct irq_data *d) static struct irq_chip tile_irq_chip = { .name = "tile_irq_chip", + .irq_enable = tile_irq_chip_enable, + .irq_disable = tile_irq_chip_disable, .irq_ack = tile_irq_chip_ack, .irq_eoi = tile_irq_chip_eoi, .irq_mask = tile_irq_chip_mask, @@ -220,7 +216,7 @@ void __init init_IRQ(void) ipi_init(); } -void __cpuinit setup_irq_regs(void) +void setup_irq_regs(void) { /* Enable interrupt delivery. */ unmask_irqs(~0UL); @@ -233,7 +229,7 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type) { /* * We use handle_level_irq() by default because the pending - * interrupt vector (whether modeled by the HV on TILE64 and + * interrupt vector (whether modeled by the HV on * TILEPro or implemented in hardware on TILE-Gx) has * level-style semantics for each bit. An interrupt fires * whenever a bit is high, not just at edges. @@ -259,37 +255,27 @@ void ack_bad_irq(unsigned int irq) } /* - * Generic, controller-independent functions: + * /proc/interrupts printing: */ - -#if CHIP_HAS_IPI() -int create_irq(void) +int arch_show_interrupts(struct seq_file *p, int prec) { - unsigned long flags; - int result; - - spin_lock_irqsave(&available_irqs_lock, flags); - if (available_irqs == 0) - result = -ENOMEM; - else { - result = __ffs(available_irqs); - available_irqs &= ~(1UL << result); - dynamic_irq_init(result); - } - spin_unlock_irqrestore(&available_irqs_lock, flags); +#ifdef CONFIG_PERF_EVENTS + int i; + + seq_printf(p, "%*s: ", prec, "PMI"); - return result; + for_each_online_cpu(i) + seq_printf(p, "%10llu ", per_cpu(perf_irqs, i)); + seq_puts(p, " perf_events\n"); +#endif + return 0; } -EXPORT_SYMBOL(create_irq); -void destroy_irq(unsigned int irq) +#if CHIP_HAS_IPI() +int arch_setup_hwirq(unsigned int irq, int node) { - unsigned long flags; - - spin_lock_irqsave(&available_irqs_lock, flags); - available_irqs |= (1UL << irq); - dynamic_irq_cleanup(irq); - spin_unlock_irqrestore(&available_irqs_lock, flags); + return irq >= NR_IRQS ? -EINVAL : 0; } -EXPORT_SYMBOL(destroy_irq); + +void arch_teardown_hwirq(unsigned int irq) { } #endif diff --git a/arch/tile/kernel/kgdb.c b/arch/tile/kernel/kgdb.c new file mode 100644 index 00000000000..4cd88381a83 --- /dev/null +++ b/arch/tile/kernel/kgdb.c @@ -0,0 +1,499 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE-Gx KGDB support. + */ + +#include <linux/ptrace.h> +#include <linux/kgdb.h> +#include <linux/kdebug.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <asm/cacheflush.h> + +static tile_bundle_bits singlestep_insn = TILEGX_BPT_BUNDLE | DIE_SSTEPBP; +static unsigned long stepped_addr; +static tile_bundle_bits stepped_instr; + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0])}, + { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1])}, + { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2])}, + { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3])}, + { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4])}, + { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5])}, + { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6])}, + { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7])}, + { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8])}, + { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9])}, + { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10])}, + { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11])}, + { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12])}, + { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13])}, + { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14])}, + { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15])}, + { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16])}, + { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17])}, + { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18])}, + { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19])}, + { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20])}, + { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21])}, + { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22])}, + { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23])}, + { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24])}, + { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25])}, + { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26])}, + { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27])}, + { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28])}, + { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29])}, + { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30])}, + { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31])}, + { "r32", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[32])}, + { "r33", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[33])}, + { "r34", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[34])}, + { "r35", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[35])}, + { "r36", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[36])}, + { "r37", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[37])}, + { "r38", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[38])}, + { "r39", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[39])}, + { "r40", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[40])}, + { "r41", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[41])}, + { "r42", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[42])}, + { "r43", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[43])}, + { "r44", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[44])}, + { "r45", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[45])}, + { "r46", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[46])}, + { "r47", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[47])}, + { "r48", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[48])}, + { "r49", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[49])}, + { "r50", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[50])}, + { "r51", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[51])}, + { "r52", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[52])}, + { "tp", GDB_SIZEOF_REG, offsetof(struct pt_regs, tp)}, + { "sp", GDB_SIZEOF_REG, offsetof(struct pt_regs, sp)}, + { "lr", GDB_SIZEOF_REG, offsetof(struct pt_regs, lr)}, + { "sn", GDB_SIZEOF_REG, -1}, + { "idn0", GDB_SIZEOF_REG, -1}, + { "idn1", GDB_SIZEOF_REG, -1}, + { "udn0", GDB_SIZEOF_REG, -1}, + { "udn1", GDB_SIZEOF_REG, -1}, + { "udn2", GDB_SIZEOF_REG, -1}, + { "udn3", GDB_SIZEOF_REG, -1}, + { "zero", GDB_SIZEOF_REG, -1}, + { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, pc)}, + { "faultnum", GDB_SIZEOF_REG, offsetof(struct pt_regs, faultnum)}, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1) + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + else + memset(mem, 0, dbg_reg_def[regno].size); + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1) + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + return 0; +} + +/* + * Similar to pt_regs_to_gdb_regs() except that process is sleeping and so + * we may not be able to get all the info. + */ +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ + int reg; + struct pt_regs *thread_regs; + unsigned long *ptr = gdb_regs; + + if (task == NULL) + return; + + /* Initialize to zero. */ + memset(gdb_regs, 0, NUMREGBYTES); + + thread_regs = task_pt_regs(task); + for (reg = 0; reg <= TREG_LAST_GPR; reg++) + *(ptr++) = thread_regs->regs[reg]; + + gdb_regs[TILEGX_PC_REGNUM] = thread_regs->pc; + gdb_regs[TILEGX_FAULTNUM_REGNUM] = thread_regs->faultnum; +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->pc = pc; +} + +static void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmicallback(raw_smp_processor_id(), NULL); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, NULL, 0); + local_irq_disable(); +} + +/* + * Convert a kernel address to the writable kernel text mapping. + */ +static unsigned long writable_address(unsigned long addr) +{ + unsigned long ret = 0; + + if (core_kernel_text(addr)) + ret = addr - MEM_SV_START + PAGE_OFFSET; + else if (is_module_text_address(addr)) + ret = addr; + else + pr_err("Unknown virtual address 0x%lx\n", addr); + + return ret; +} + +/* + * Calculate the new address for after a step. + */ +static unsigned long get_step_address(struct pt_regs *regs) +{ + int src_reg; + int jump_off; + int br_off; + unsigned long addr; + unsigned int opcode; + tile_bundle_bits bundle; + + /* Move to the next instruction by default. */ + addr = regs->pc + TILEGX_BUNDLE_SIZE_IN_BYTES; + bundle = *(unsigned long *)instruction_pointer(regs); + + /* 0: X mode, Otherwise: Y mode. */ + if (bundle & TILEGX_BUNDLE_MODE_MASK) { + if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 && + get_RRROpcodeExtension_Y1(bundle) == + UNARY_RRR_1_OPCODE_Y1) { + opcode = get_UnaryOpcodeExtension_Y1(bundle); + + switch (opcode) { + case JALR_UNARY_OPCODE_Y1: + case JALRP_UNARY_OPCODE_Y1: + case JR_UNARY_OPCODE_Y1: + case JRP_UNARY_OPCODE_Y1: + src_reg = get_SrcA_Y1(bundle); + dbg_get_reg(src_reg, &addr, regs); + break; + } + } + } else if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) { + if (get_RRROpcodeExtension_X1(bundle) == + UNARY_RRR_0_OPCODE_X1) { + opcode = get_UnaryOpcodeExtension_X1(bundle); + + switch (opcode) { + case JALR_UNARY_OPCODE_X1: + case JALRP_UNARY_OPCODE_X1: + case JR_UNARY_OPCODE_X1: + case JRP_UNARY_OPCODE_X1: + src_reg = get_SrcA_X1(bundle); + dbg_get_reg(src_reg, &addr, regs); + break; + } + } + } else if (get_Opcode_X1(bundle) == JUMP_OPCODE_X1) { + opcode = get_JumpOpcodeExtension_X1(bundle); + + switch (opcode) { + case JAL_JUMP_OPCODE_X1: + case J_JUMP_OPCODE_X1: + jump_off = sign_extend(get_JumpOff_X1(bundle), 27); + addr = regs->pc + + (jump_off << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES); + break; + } + } else if (get_Opcode_X1(bundle) == BRANCH_OPCODE_X1) { + br_off = 0; + opcode = get_BrType_X1(bundle); + + switch (opcode) { + case BEQZT_BRANCH_OPCODE_X1: + case BEQZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) == 0) + br_off = get_BrOff_X1(bundle); + break; + case BGEZT_BRANCH_OPCODE_X1: + case BGEZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) >= 0) + br_off = get_BrOff_X1(bundle); + break; + case BGTZT_BRANCH_OPCODE_X1: + case BGTZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) > 0) + br_off = get_BrOff_X1(bundle); + break; + case BLBCT_BRANCH_OPCODE_X1: + case BLBC_BRANCH_OPCODE_X1: + if (!(get_SrcA_X1(bundle) & 1)) + br_off = get_BrOff_X1(bundle); + break; + case BLBST_BRANCH_OPCODE_X1: + case BLBS_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) & 1) + br_off = get_BrOff_X1(bundle); + break; + case BLEZT_BRANCH_OPCODE_X1: + case BLEZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) <= 0) + br_off = get_BrOff_X1(bundle); + break; + case BLTZT_BRANCH_OPCODE_X1: + case BLTZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) < 0) + br_off = get_BrOff_X1(bundle); + break; + case BNEZT_BRANCH_OPCODE_X1: + case BNEZ_BRANCH_OPCODE_X1: + if (get_SrcA_X1(bundle) != 0) + br_off = get_BrOff_X1(bundle); + break; + } + + if (br_off != 0) { + br_off = sign_extend(br_off, 17); + addr = regs->pc + + (br_off << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES); + } + } + + return addr; +} + +/* + * Replace the next instruction after the current instruction with a + * breakpoint instruction. + */ +static void do_single_step(struct pt_regs *regs) +{ + unsigned long addr_wr; + + /* Determine where the target instruction will send us to. */ + stepped_addr = get_step_address(regs); + probe_kernel_read((char *)&stepped_instr, (char *)stepped_addr, + BREAK_INSTR_SIZE); + + addr_wr = writable_address(stepped_addr); + probe_kernel_write((char *)addr_wr, (char *)&singlestep_insn, + BREAK_INSTR_SIZE); + smp_wmb(); + flush_icache_range(stepped_addr, stepped_addr + BREAK_INSTR_SIZE); +} + +static void undo_single_step(struct pt_regs *regs) +{ + unsigned long addr_wr; + + if (stepped_instr == 0) + return; + + addr_wr = writable_address(stepped_addr); + probe_kernel_write((char *)addr_wr, (char *)&stepped_instr, + BREAK_INSTR_SIZE); + stepped_instr = 0; + smp_wmb(); + flush_icache_range(stepped_addr, stepped_addr + BREAK_INSTR_SIZE); +} + +/* + * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, + * then try to fall into the debugger. + */ +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + int ret; + unsigned long flags; + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + +#ifdef CONFIG_KPROBES + /* + * Return immediately if the kprobes fault notifier has set + * DIE_PAGE_FAULT. + */ + if (cmd == DIE_PAGE_FAULT) + return NOTIFY_DONE; +#endif /* CONFIG_KPROBES */ + + switch (cmd) { + case DIE_BREAK: + case DIE_COMPILED_BPT: + break; + case DIE_SSTEPBP: + local_irq_save(flags); + kgdb_handle_exception(0, SIGTRAP, 0, regs); + local_irq_restore(flags); + return NOTIFY_STOP; + default: + /* Userspace events, ignore. */ + if (user_mode(regs)) + return NOTIFY_DONE; + } + + local_irq_save(flags); + ret = kgdb_handle_exception(args->trapnr, args->signr, args->err, regs); + local_irq_restore(flags); + if (ret) + return NOTIFY_DONE; + + return NOTIFY_STOP; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, +}; + +/* + * kgdb_arch_handle_exception - Handle architecture specific GDB packets. + * @vector: The error vector of the exception that happened. + * @signo: The signal number of the exception that happened. + * @err_code: The error code of the exception that happened. + * @remcom_in_buffer: The buffer of the packet we have read. + * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into. + * @regs: The &struct pt_regs of the current process. + * + * This function MUST handle the 'c' and 's' command packets, + * as well packets to set / remove a hardware breakpoint, if used. + * If there are additional packets which the hardware needs to handle, + * they are handled here. The code should return -1 if it wants to + * process more packets, and a %0 or %1 if it wants to exit from the + * kgdb callback. + */ +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *regs) +{ + char *ptr; + unsigned long address; + + /* Undo any stepping we may have done. */ + undo_single_step(regs); + + switch (remcom_in_buffer[0]) { + case 'c': + case 's': + case 'D': + case 'k': + /* + * Try to read optional parameter, pc unchanged if no parm. + * If this was a compiled-in breakpoint, we need to move + * to the next instruction or we will just breakpoint + * over and over again. + */ + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &address)) + regs->pc = address; + else if (*(unsigned long *)regs->pc == compiled_bpt) + regs->pc += BREAK_INSTR_SIZE; + + if (remcom_in_buffer[0] == 's') { + do_single_step(regs); + kgdb_single_step = 1; + atomic_set(&kgdb_cpu_doing_single_step, + raw_smp_processor_id()); + } else + atomic_set(&kgdb_cpu_doing_single_step, -1); + + return 0; + } + + return -1; /* this means that we do not want to exit from the handler */ +} + +struct kgdb_arch arch_kgdb_ops; + +/* + * kgdb_arch_init - Perform any architecture specific initalization. + * + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ + tile_bundle_bits bundle = TILEGX_BPT_BUNDLE; + + memcpy(arch_kgdb_ops.gdb_bpt_instr, &bundle, BREAK_INSTR_SIZE); + return register_die_notifier(&kgdb_notifier); +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_die_notifier(&kgdb_notifier); +} + +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + unsigned long addr_wr = writable_address(bpt->bpt_addr); + + if (addr_wr == 0) + return -1; + + err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + BREAK_INSTR_SIZE); + if (err) + return err; + + err = probe_kernel_write((char *)addr_wr, arch_kgdb_ops.gdb_bpt_instr, + BREAK_INSTR_SIZE); + smp_wmb(); + flush_icache_range((unsigned long)bpt->bpt_addr, + (unsigned long)bpt->bpt_addr + BREAK_INSTR_SIZE); + return err; +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + unsigned long addr_wr = writable_address(bpt->bpt_addr); + + if (addr_wr == 0) + return -1; + + err = probe_kernel_write((char *)addr_wr, (char *)bpt->saved_instr, + BREAK_INSTR_SIZE); + smp_wmb(); + flush_icache_range((unsigned long)bpt->bpt_addr, + (unsigned long)bpt->bpt_addr + BREAK_INSTR_SIZE); + return err; +} diff --git a/arch/tile/kernel/kprobes.c b/arch/tile/kernel/kprobes.c new file mode 100644 index 00000000000..27cdcacbe81 --- /dev/null +++ b/arch/tile/kernel/kprobes.c @@ -0,0 +1,528 @@ +/* + * arch/tile/kernel/kprobes.c + * Kprobes on TILE-Gx + * + * Some portions copied from the MIPS version. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright 2006 Sony Corp. + * Copyright 2010 Cavium Networks + * + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/kprobes.h> +#include <linux/kdebug.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> + +#include <arch/opcode.h> + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +tile_bundle_bits breakpoint_insn = TILEGX_BPT_BUNDLE; +tile_bundle_bits breakpoint2_insn = TILEGX_BPT_BUNDLE | DIE_SSTEPBP; + +/* + * Check whether instruction is branch or jump, or if executing it + * has different results depending on where it is executed (e.g. lnk). + */ +static int __kprobes insn_has_control(kprobe_opcode_t insn) +{ + if (get_Mode(insn) != 0) { /* Y-format bundle */ + if (get_Opcode_Y1(insn) != RRR_1_OPCODE_Y1 || + get_RRROpcodeExtension_Y1(insn) != UNARY_RRR_1_OPCODE_Y1) + return 0; + + switch (get_UnaryOpcodeExtension_Y1(insn)) { + case JALRP_UNARY_OPCODE_Y1: + case JALR_UNARY_OPCODE_Y1: + case JRP_UNARY_OPCODE_Y1: + case JR_UNARY_OPCODE_Y1: + case LNK_UNARY_OPCODE_Y1: + return 1; + default: + return 0; + } + } + + switch (get_Opcode_X1(insn)) { + case BRANCH_OPCODE_X1: /* branch instructions */ + case JUMP_OPCODE_X1: /* jump instructions: j and jal */ + return 1; + + case RRR_0_OPCODE_X1: /* other jump instructions */ + if (get_RRROpcodeExtension_X1(insn) != UNARY_RRR_0_OPCODE_X1) + return 0; + switch (get_UnaryOpcodeExtension_X1(insn)) { + case JALRP_UNARY_OPCODE_X1: + case JALR_UNARY_OPCODE_X1: + case JRP_UNARY_OPCODE_X1: + case JR_UNARY_OPCODE_X1: + case LNK_UNARY_OPCODE_X1: + return 1; + default: + return 0; + } + default: + return 0; + } +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + + if (addr & (sizeof(kprobe_opcode_t) - 1)) + return -EINVAL; + + if (insn_has_control(*p->addr)) { + pr_notice("Kprobes for control instructions are not " + "supported\n"); + return -EINVAL; + } + + /* insn: must be on special executable page on tile. */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + + /* + * In the kprobe->ainsn.insn[] array we store the original + * instruction at index zero and a break trap instruction at + * index one. + */ + memcpy(&p->ainsn.insn[0], p->addr, sizeof(kprobe_opcode_t)); + p->ainsn.insn[1] = breakpoint2_insn; + p->opcode = *p->addr; + + return 0; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + unsigned long addr_wr; + + /* Operate on writable kernel text mapping. */ + addr_wr = (unsigned long)p->addr - MEM_SV_START + PAGE_OFFSET; + + if (probe_kernel_write((void *)addr_wr, &breakpoint_insn, + sizeof(breakpoint_insn))) + pr_err("%s: failed to enable kprobe\n", __func__); + + smp_wmb(); + flush_insn_slot(p); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *kp) +{ + unsigned long addr_wr; + + /* Operate on writable kernel text mapping. */ + addr_wr = (unsigned long)kp->addr - MEM_SV_START + PAGE_OFFSET; + + if (probe_kernel_write((void *)addr_wr, &kp->opcode, + sizeof(kp->opcode))) + pr_err("%s: failed to enable kprobe\n", __func__); + + smp_wmb(); + flush_insn_slot(kp); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.saved_pc = kcb->kprobe_saved_pc; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_saved_pc = kcb->prev_kprobe.saved_pc; +} + +static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, p); + kcb->kprobe_saved_pc = regs->pc; +} + +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ + /* Single step inline if the instruction is a break. */ + if (p->opcode == breakpoint_insn || + p->opcode == breakpoint2_insn) + regs->pc = (unsigned long)p->addr; + else + regs->pc = (unsigned long)&p->ainsn.insn[0]; +} + +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + int ret = 0; + kprobe_opcode_t *addr; + struct kprobe_ctlblk *kcb; + + addr = (kprobe_opcode_t *)regs->pc; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing. + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + + /* Check we're not actually recursing. */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if (kcb->kprobe_status == KPROBE_HIT_SS && + p->ainsn.insn[0] == breakpoint_insn) { + goto no_kprobe; + } + /* + * We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kprobes_inc_nmissed_count(p); + prepare_singlestep(p, regs); + kcb->kprobe_status = KPROBE_REENTER; + return 1; + } else { + if (*addr != breakpoint_insn) { + /* + * The breakpoint instruction was removed by + * another cpu right after we hit, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + goto no_kprobe; + } + p = __this_cpu_read(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) + goto ss_probe; + } + goto no_kprobe; + } + + p = get_kprobe(addr); + if (!p) { + if (*addr != breakpoint_insn) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + } + /* Not one of ours: let kernel handle it. */ + goto no_kprobe; + } + + set_current_kprobe(p, regs, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + if (p->pre_handler && p->pre_handler(p, regs)) { + /* Handler has already set things up, so skip ss setup. */ + return 1; + } + +ss_probe: + prepare_singlestep(p, regs); + kcb->kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +/* + * Called after single-stepping. p->addr is the address of the + * instruction that has been replaced by the breakpoint. To avoid the + * SMP problems that can occur when we temporarily put back the + * original opcode to single-step, we single-stepped a copy of the + * instruction. The address of this copy is p->ainsn.insn. + * + * This function prepares to return from the post-single-step + * breakpoint trap. + */ +static void __kprobes resume_execution(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + unsigned long orig_pc = kcb->kprobe_saved_pc; + regs->pc = orig_pc + 8; +} + +static inline int post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + resume_execution(cur, regs, kcb); + + /* Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); +out: + preempt_enable_no_resched(); + + return 1; +} + +static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + if (kcb->kprobe_status & KPROBE_HIT_SS) { + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the ip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + resume_execution(cur, regs, kcb); + reset_current_kprobe(); + preempt_enable_no_resched(); + } + return 0; +} + +/* + * Wrapper routine for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_BREAK: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEPBP: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id(). */ + preempt_disable(); + + if (kprobe_running() + && kprobe_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + preempt_enable(); + break; + default: + break; + } + return ret; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + kcb->jprobe_saved_regs = *regs; + kcb->jprobe_saved_sp = regs->sp; + + memcpy(kcb->jprobes_stack, (void *)kcb->jprobe_saved_sp, + MIN_JPROBES_STACK_SIZE(kcb->jprobe_saved_sp)); + + regs->pc = (unsigned long)(jp->entry); + + return 1; +} + +/* Defined in the inline asm below. */ +void jprobe_return_end(void); + +void __kprobes jprobe_return(void) +{ + asm volatile( + "bpt\n\t" + ".globl jprobe_return_end\n" + "jprobe_return_end:\n"); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (regs->pc >= (unsigned long)jprobe_return && + regs->pc <= (unsigned long)jprobe_return_end) { + *regs = kcb->jprobe_saved_regs; + memcpy((void *)kcb->jprobe_saved_sp, kcb->jprobes_stack, + MIN_JPROBES_STACK_SIZE(kcb->jprobe_saved_sp)); + preempt_enable_no_resched(); + + return 1; + } + return 0; +} + +/* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe causes the + * handlers to fire + */ +static void __used kretprobe_trampoline_holder(void) +{ + asm volatile( + "nop\n\t" + ".global kretprobe_trampoline\n" + "kretprobe_trampoline:\n\t" + "nop\n\t" + : : : "memory"); +} + +void kretprobe_trampoline(void); + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *) regs->lr; + + /* Replace the return addr with trampoline addr */ + regs->lr = (unsigned long)kretprobe_trampoline; +} + +/* + * Called when the probe at kretprobe trampoline is hit. + */ +static int __kprobes trampoline_probe_handler(struct kprobe *p, + struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = (unsigned long)kretprobe_trampoline; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because multiple functions in the call path have + * a return probe installed on them, and/or more than one return + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + if (ri->rp && ri->rp->handler) + ri->rp->handler(ri, regs); + + orig_ret_address = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) { + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + instruction_pointer(regs) = orig_ret_address; + + reset_current_kprobe(); + kretprobe_hash_unlock(current, &flags); + preempt_enable_no_resched(); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + if (p->addr == (kprobe_opcode_t *)kretprobe_trampoline) + return 1; + + return 0; +} + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *)kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + register_kprobe(&trampoline_p); + return 0; +} diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c index e00d7179989..f0b54a93471 100644 --- a/arch/tile/kernel/machine_kexec.c +++ b/arch/tile/kernel/machine_kexec.c @@ -31,6 +31,8 @@ #include <asm/pgalloc.h> #include <asm/cacheflush.h> #include <asm/checksum.h> +#include <asm/tlbflush.h> +#include <asm/homecache.h> #include <hv/hypervisor.h> @@ -222,11 +224,22 @@ struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order) return alloc_pages_node(0, gfp_mask, order); } +/* + * Address range in which pa=va mapping is set in setup_quasi_va_is_pa(). + * For tilepro, PAGE_OFFSET is used since this is the largest possbile value + * for tilepro, while for tilegx, we limit it to entire middle level page + * table which we assume has been allocated and is undoubtedly large enough. + */ +#ifndef __tilegx__ +#define QUASI_VA_IS_PA_ADDR_RANGE PAGE_OFFSET +#else +#define QUASI_VA_IS_PA_ADDR_RANGE PGDIR_SIZE +#endif + static void setup_quasi_va_is_pa(void) { - HV_PTE *pgtable; HV_PTE pte; - int i; + unsigned long i; /* * Flush our TLB to prevent conflicts between the previous contents @@ -234,25 +247,32 @@ static void setup_quasi_va_is_pa(void) */ local_flush_tlb_all(); - /* setup VA is PA, at least up to PAGE_OFFSET */ - - pgtable = (HV_PTE *)current->mm->pgd; + /* + * setup VA is PA, at least up to QUASI_VA_IS_PA_ADDR_RANGE. + * Note here we assume that level-1 page table is defined by + * HPAGE_SIZE. + */ pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); - - for (i = 0; i < pgd_index(PAGE_OFFSET); i++) { + for (i = 0; i < (QUASI_VA_IS_PA_ADDR_RANGE >> HPAGE_SHIFT); i++) { + unsigned long vaddr = i << HPAGE_SHIFT; + pgd_t *pgd = pgd_offset(current->mm, vaddr); + pud_t *pud = pud_offset(pgd, vaddr); + pte_t *ptep = (pte_t *) pmd_offset(pud, vaddr); unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT); + if (pfn_valid(pfn)) - __set_pte(&pgtable[i], pfn_pte(pfn, pte)); + __set_pte(ptep, pfn_pte(pfn, pte)); } } -NORET_TYPE void machine_kexec(struct kimage *image) +void machine_kexec(struct kimage *image) { void *reboot_code_buffer; - NORET_TYPE void (*rnk)(unsigned long, void *, unsigned long) - ATTRIB_NORET; + pte_t *ptep; + void (*rnk)(unsigned long, void *, unsigned long) + __noreturn; /* Mask all interrupts before starting to reboot. */ interrupt_mask_set_mask(~0ULL); @@ -266,8 +286,10 @@ NORET_TYPE void machine_kexec(struct kimage *image) */ homecache_change_page_home(image->control_code_page, 0, smp_processor_id()); - reboot_code_buffer = vmap(&image->control_code_page, 1, 0, - __pgprot(_PAGE_KERNEL | _PAGE_EXECUTABLE)); + reboot_code_buffer = page_address(image->control_code_page); + BUG_ON(reboot_code_buffer == NULL); + ptep = virt_to_pte(NULL, (unsigned long)reboot_code_buffer); + __set_pte(ptep, pte_mkexec(*ptep)); memcpy(reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); __flush_icache_range( diff --git a/arch/tile/kernel/mcount_64.S b/arch/tile/kernel/mcount_64.S new file mode 100644 index 00000000000..70d7bb0c4d8 --- /dev/null +++ b/arch/tile/kernel/mcount_64.S @@ -0,0 +1,224 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE-Gx specific __mcount support + */ + +#include <linux/linkage.h> +#include <asm/ftrace.h> + +#define REGSIZE 8 + + .text + .global __mcount + + .macro MCOUNT_SAVE_REGS + addli sp, sp, -REGSIZE + { + st sp, lr + addli r29, sp, - (12 * REGSIZE) + } + { + addli sp, sp, - (13 * REGSIZE) + st r29, sp + } + addli r29, r29, REGSIZE + { st r29, r0; addli r29, r29, REGSIZE } + { st r29, r1; addli r29, r29, REGSIZE } + { st r29, r2; addli r29, r29, REGSIZE } + { st r29, r3; addli r29, r29, REGSIZE } + { st r29, r4; addli r29, r29, REGSIZE } + { st r29, r5; addli r29, r29, REGSIZE } + { st r29, r6; addli r29, r29, REGSIZE } + { st r29, r7; addli r29, r29, REGSIZE } + { st r29, r8; addli r29, r29, REGSIZE } + { st r29, r9; addli r29, r29, REGSIZE } + { st r29, r10; addli r29, r29, REGSIZE } + .endm + + .macro MCOUNT_RESTORE_REGS + addli r29, sp, (2 * REGSIZE) + { ld r0, r29; addli r29, r29, REGSIZE } + { ld r1, r29; addli r29, r29, REGSIZE } + { ld r2, r29; addli r29, r29, REGSIZE } + { ld r3, r29; addli r29, r29, REGSIZE } + { ld r4, r29; addli r29, r29, REGSIZE } + { ld r5, r29; addli r29, r29, REGSIZE } + { ld r6, r29; addli r29, r29, REGSIZE } + { ld r7, r29; addli r29, r29, REGSIZE } + { ld r8, r29; addli r29, r29, REGSIZE } + { ld r9, r29; addli r29, r29, REGSIZE } + { ld r10, r29; addli lr, sp, (13 * REGSIZE) } + { ld lr, lr; addli sp, sp, (14 * REGSIZE) } + .endm + + .macro RETURN_BACK + { move r12, lr; move lr, r10 } + jrp r12 + .endm + +#ifdef CONFIG_DYNAMIC_FTRACE + + .align 64 +STD_ENTRY(__mcount) +__mcount: + j ftrace_stub +STD_ENDPROC(__mcount) + + .align 64 +STD_ENTRY(ftrace_caller) + moveli r11, hw2_last(function_trace_stop) + { shl16insli r11, r11, hw1(function_trace_stop); move r12, lr } + { shl16insli r11, r11, hw0(function_trace_stop); move lr, r10 } + ld r11, r11 + beqz r11, 1f + jrp r12 + +1: + { move r10, lr; move lr, r12 } + MCOUNT_SAVE_REGS + + /* arg1: self return address */ + /* arg2: parent's return address */ + { move r0, lr; move r1, r10 } + + .global ftrace_call +ftrace_call: + /* + * a placeholder for the call to a real tracing function, i.e. + * ftrace_trace_function() + */ + nop + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .global ftrace_graph_call +ftrace_graph_call: + /* + * a placeholder for the call to a real tracing function, i.e. + * ftrace_graph_caller() + */ + nop +#endif + MCOUNT_RESTORE_REGS + .global ftrace_stub +ftrace_stub: + RETURN_BACK +STD_ENDPROC(ftrace_caller) + +#else /* ! CONFIG_DYNAMIC_FTRACE */ + + .align 64 +STD_ENTRY(__mcount) + moveli r11, hw2_last(function_trace_stop) + { shl16insli r11, r11, hw1(function_trace_stop); move r12, lr } + { shl16insli r11, r11, hw0(function_trace_stop); move lr, r10 } + ld r11, r11 + beqz r11, 1f + jrp r12 + +1: + { move r10, lr; move lr, r12 } + { + moveli r11, hw2_last(ftrace_trace_function) + moveli r13, hw2_last(ftrace_stub) + } + { + shl16insli r11, r11, hw1(ftrace_trace_function) + shl16insli r13, r13, hw1(ftrace_stub) + } + { + shl16insli r11, r11, hw0(ftrace_trace_function) + shl16insli r13, r13, hw0(ftrace_stub) + } + + ld r11, r11 + sub r14, r13, r11 + bnez r14, static_trace + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + moveli r15, hw2_last(ftrace_graph_return) + shl16insli r15, r15, hw1(ftrace_graph_return) + shl16insli r15, r15, hw0(ftrace_graph_return) + ld r15, r15 + sub r15, r15, r13 + bnez r15, ftrace_graph_caller + + { + moveli r16, hw2_last(ftrace_graph_entry) + moveli r17, hw2_last(ftrace_graph_entry_stub) + } + { + shl16insli r16, r16, hw1(ftrace_graph_entry) + shl16insli r17, r17, hw1(ftrace_graph_entry_stub) + } + { + shl16insli r16, r16, hw0(ftrace_graph_entry) + shl16insli r17, r17, hw0(ftrace_graph_entry_stub) + } + ld r16, r16 + sub r17, r16, r17 + bnez r17, ftrace_graph_caller + +#endif + RETURN_BACK + +static_trace: + MCOUNT_SAVE_REGS + + /* arg1: self return address */ + /* arg2: parent's return address */ + { move r0, lr; move r1, r10 } + + /* call ftrace_trace_function() */ + jalr r11 + + MCOUNT_RESTORE_REGS + + .global ftrace_stub +ftrace_stub: + RETURN_BACK +STD_ENDPROC(__mcount) + +#endif /* ! CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +STD_ENTRY(ftrace_graph_caller) +ftrace_graph_caller: +#ifndef CONFIG_DYNAMIC_FTRACE + MCOUNT_SAVE_REGS +#endif + + /* arg1: Get the location of the parent's return address */ + addi r0, sp, 12 * REGSIZE + /* arg2: Get self return address */ + move r1, lr + + jal prepare_ftrace_return + + MCOUNT_RESTORE_REGS + RETURN_BACK +STD_ENDPROC(ftrace_graph_caller) + + .global return_to_handler +return_to_handler: + MCOUNT_SAVE_REGS + + jal ftrace_return_to_handler + /* restore the real parent address */ + move r11, r0 + + MCOUNT_RESTORE_REGS + jr r11 + +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c index 0858ee6b520..7867266f971 100644 --- a/arch/tile/kernel/messaging.c +++ b/arch/tile/kernel/messaging.c @@ -25,7 +25,7 @@ /* All messages are stored here */ static DEFINE_PER_CPU(HV_MsgState, msg_state); -void __cpuinit init_messaging(void) +void init_messaging(void) { /* Allocate storage for messages in kernel space */ HV_MsgState *state = &__get_cpu_var(msg_state); @@ -68,8 +68,8 @@ void hv_message_intr(struct pt_regs *regs, int intnum) #endif while (1) { - rmi = hv_receive_message(__get_cpu_var(msg_state), - (HV_VirtAddr) message, + HV_MsgState *state = this_cpu_ptr(&msg_state); + rmi = hv_receive_message(*state, (HV_VirtAddr) message, sizeof(message)); if (rmi.msglen == 0) break; diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index e2ab82b7c7e..4918d91bc3a 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -20,18 +20,9 @@ #include <linux/fs.h> #include <linux/string.h> #include <linux/kernel.h> -#include <asm/opcode-tile.h> #include <asm/pgtable.h> - -#ifdef __tilegx__ -# define Elf_Rela Elf64_Rela -# define ELF_R_SYM ELF64_R_SYM -# define ELF_R_TYPE ELF64_R_TYPE -#else -# define Elf_Rela Elf32_Rela -# define ELF_R_SYM ELF32_R_SYM -# define ELF_R_TYPE ELF32_R_TYPE -#endif +#include <asm/homecache.h> +#include <arch/opcode.h> #ifdef MODULE_DEBUG #define DEBUGP printk @@ -51,8 +42,6 @@ void *module_alloc(unsigned long size) int i = 0; int npages; - if (size == 0) - return NULL; npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; pages = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); if (pages == NULL) @@ -66,6 +55,8 @@ void *module_alloc(unsigned long size) area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END); if (!area) goto error; + area->nr_pages = npages; + area->pages = pages; if (map_vm_area(area, prot_rwx, &pages)) { vunmap(area->addr); @@ -86,31 +77,17 @@ error: void module_free(struct module *mod, void *module_region) { vfree(module_region); + + /* Globally flush the L1 icache. */ + flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask, + 0, 0, 0, NULL, NULL, 0); + /* - * FIXME: If module_region == mod->init_region, trim exception + * FIXME: If module_region == mod->module_init, trim exception * table entries. */ } -/* We don't need anything special. */ -int module_frob_arch_sections(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - char *secstrings, - struct module *mod) -{ - return 0; -} - -int apply_relocate(Elf_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) -{ - pr_err("module %s: .rel relocation unsupported\n", me->name); - return -ENOEXEC; -} - #ifdef __tilegx__ /* * Validate that the high 16 bits of "value" is just the sign-extension of @@ -170,7 +147,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs, switch (ELF_R_TYPE(rel[i].r_info)) { -#define MUNGE(func) (*location = ((*location & ~func(-1)) | func(value))) +#ifdef __LITTLE_ENDIAN +# define MUNGE(func) \ + (*location = ((*location & ~func(-1)) | func(value))) +#else +/* + * Instructions are always little-endian, so when we read them as data, + * we have to swap them around before and after modifying them. + */ +# define MUNGE(func) \ + (*location = swab64((swab64(*location) & ~func(-1)) | func(value))) +#endif #ifndef __tilegx__ case R_TILE_32: @@ -243,15 +230,3 @@ int apply_relocate_add(Elf_Shdr *sechdrs, } return 0; } - -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - /* FIXME: perhaps remove the "writable" bit from the TLB? */ - return 0; -} - -void module_arch_cleanup(struct module *mod) -{ -} diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index 658752b2835..09b58703ac2 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c @@ -14,23 +14,31 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> +#include <linux/swiotlb.h> #include <linux/vmalloc.h> +#include <linux/export.h> #include <asm/tlbflush.h> #include <asm/homecache.h> /* Generic DMA mapping functions: */ /* - * Allocate what Linux calls "coherent" memory, which for us just - * means uncached. + * Allocate what Linux calls "coherent" memory. On TILEPro this is + * uncached memory; on TILE-Gx it is hash-for-home memory. */ -void *dma_alloc_coherent(struct device *dev, - size_t size, - dma_addr_t *dma_handle, - gfp_t gfp) +#ifdef __tilepro__ +#define PAGE_HOME_DMA PAGE_HOME_UNCACHED +#else +#define PAGE_HOME_DMA PAGE_HOME_HASH +#endif + +static void *tile_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + struct dma_attrs *attrs) { - u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32); - int node = dev_to_node(dev); + u64 dma_mask = (dev && dev->coherent_dma_mask) ? + dev->coherent_dma_mask : DMA_BIT_MASK(32); + int node = dev ? dev_to_node(dev) : 0; int order = get_order(size); struct page *pg; dma_addr_t addr; @@ -38,39 +46,42 @@ void *dma_alloc_coherent(struct device *dev, gfp |= __GFP_ZERO; /* - * By forcing NUMA node 0 for 32-bit masks we ensure that the - * high 32 bits of the resulting PA will be zero. If the mask - * size is, e.g., 24, we may still not be able to guarantee a - * suitable memory address, in which case we will return NULL. - * But such devices are uncommon. + * If the mask specifies that the memory be in the first 4 GB, then + * we force the allocation to come from the DMA zone. We also + * force the node to 0 since that's the only node where the DMA + * zone isn't empty. If the mask size is smaller than 32 bits, we + * may still not be able to guarantee a suitable memory address, in + * which case we will return NULL. But such devices are uncommon. */ - if (dma_mask <= DMA_BIT_MASK(32)) + if (dma_mask <= DMA_BIT_MASK(32)) { + gfp |= GFP_DMA; node = 0; + } - pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_UNCACHED); + pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA); if (pg == NULL) return NULL; addr = page_to_phys(pg); if (addr + size > dma_mask) { - homecache_free_pages(addr, order); + __homecache_free_pages(pg, order); return NULL; } *dma_handle = addr; + return page_address(pg); } -EXPORT_SYMBOL(dma_alloc_coherent); /* - * Free memory that was allocated with dma_alloc_coherent. + * Free memory that was allocated with tile_dma_alloc_coherent. */ -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) +static void tile_dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) { homecache_free_pages((unsigned long)vaddr, get_order(size)); } -EXPORT_SYMBOL(dma_free_coherent); /* * The map routines "map" the specified address range for DMA @@ -86,52 +97,285 @@ EXPORT_SYMBOL(dma_free_coherent); * can count on nothing having been touched. */ -/* Flush a PA range from cache page by page. */ -static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size) +/* Set up a single page for DMA access. */ +static void __dma_prep_page(struct page *page, unsigned long offset, + size_t size, enum dma_data_direction direction) +{ + /* + * Flush the page from cache if necessary. + * On tilegx, data is delivered to hash-for-home L3; on tilepro, + * data is delivered direct to memory. + * + * NOTE: If we were just doing DMA_TO_DEVICE we could optimize + * this to be a "flush" not a "finv" and keep some of the + * state in cache across the DMA operation, but it doesn't seem + * worth creating the necessary flush_buffer_xxx() infrastructure. + */ + int home = page_home(page); + switch (home) { + case PAGE_HOME_HASH: +#ifdef __tilegx__ + return; +#endif + break; + case PAGE_HOME_UNCACHED: +#ifdef __tilepro__ + return; +#endif + break; + case PAGE_HOME_IMMUTABLE: + /* Should be going to the device only. */ + BUG_ON(direction == DMA_FROM_DEVICE || + direction == DMA_BIDIRECTIONAL); + return; + case PAGE_HOME_INCOHERENT: + /* Incoherent anyway, so no need to work hard here. */ + return; + default: + BUG_ON(home < 0 || home >= NR_CPUS); + break; + } + homecache_finv_page(page); + +#ifdef DEBUG_ALIGNMENT + /* Warn if the region isn't cacheline aligned. */ + if (offset & (L2_CACHE_BYTES - 1) || (size & (L2_CACHE_BYTES - 1))) + pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n", + PFN_PHYS(page_to_pfn(page)) + offset, size); +#endif +} + +/* Make the page ready to be read by the core. */ +static void __dma_complete_page(struct page *page, unsigned long offset, + size_t size, enum dma_data_direction direction) +{ +#ifdef __tilegx__ + switch (page_home(page)) { + case PAGE_HOME_HASH: + /* I/O device delivered data the way the cpu wanted it. */ + break; + case PAGE_HOME_INCOHERENT: + /* Incoherent anyway, so no need to work hard here. */ + break; + case PAGE_HOME_IMMUTABLE: + /* Extra read-only copies are not a problem. */ + break; + default: + /* Flush the bogus hash-for-home I/O entries to memory. */ + homecache_finv_map_page(page, PAGE_HOME_HASH); + break; + } +#endif +} + +static void __dma_prep_pa_range(dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) { struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); - size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1)); + unsigned long offset = dma_addr & (PAGE_SIZE - 1); + size_t bytes = min(size, (size_t)(PAGE_SIZE - offset)); + + while (size != 0) { + __dma_prep_page(page, offset, bytes, direction); + size -= bytes; + ++page; + offset = 0; + bytes = min((size_t)PAGE_SIZE, size); + } +} - while ((ssize_t)size > 0) { - /* Flush the page. */ - homecache_flush_cache(page++, 0); +static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); + unsigned long offset = dma_addr & (PAGE_SIZE - 1); + size_t bytes = min(size, (size_t)(PAGE_SIZE - offset)); + + while (size != 0) { + __dma_complete_page(page, offset, bytes, direction); + size -= bytes; + ++page; + offset = 0; + bytes = min((size_t)PAGE_SIZE, size); + } +} + +static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i; + + BUG_ON(!valid_dma_direction(direction)); + + WARN_ON(nents == 0 || sglist->length == 0); - /* Figure out if we need to continue on the next page. */ - size -= bytesleft; - bytesleft = PAGE_SIZE; + for_each_sg(sglist, sg, nents, i) { + sg->dma_address = sg_phys(sg); + __dma_prep_pa_range(sg->dma_address, sg->length, direction); +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } + + return nents; } -/* - * dma_map_single can be passed any memory address, and there appear - * to be no alignment constraints. - * - * There is a chance that the start of the buffer will share a cache - * line with some other data that has been touched in the meantime. - */ -dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction) +static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) { - dma_addr_t dma_addr = __pa(ptr); + struct scatterlist *sg; + int i; BUG_ON(!valid_dma_direction(direction)); - WARN_ON(size == 0); + for_each_sg(sglist, sg, nents, i) { + sg->dma_address = sg_phys(sg); + __dma_complete_pa_range(sg->dma_address, sg->length, + direction); + } +} + +static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + BUG_ON(!valid_dma_direction(direction)); + + BUG_ON(offset + size > PAGE_SIZE); + __dma_prep_page(page, offset, size, direction); + + return page_to_pa(page) + offset; +} + +static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address, + size_t size, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + BUG_ON(!valid_dma_direction(direction)); + + __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)), + dma_address & (PAGE_SIZE - 1), size, direction); +} + +static void tile_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, + size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); - __dma_map_pa_range(dma_addr, size); + __dma_complete_pa_range(dma_handle, size, direction); +} - return dma_addr; +static void tile_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + __dma_prep_pa_range(dma_handle, size, direction); } -EXPORT_SYMBOL(dma_map_single); -void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction) +static void tile_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction) { + struct scatterlist *sg; + int i; + BUG_ON(!valid_dma_direction(direction)); + WARN_ON(nelems == 0 || sglist->length == 0); + + for_each_sg(sglist, sg, nelems, i) { + dma_sync_single_for_cpu(dev, sg->dma_address, + sg_dma_len(sg), direction); + } } -EXPORT_SYMBOL(dma_unmap_single); -int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, - enum dma_data_direction direction) +static void tile_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + BUG_ON(!valid_dma_direction(direction)); + WARN_ON(nelems == 0 || sglist->length == 0); + + for_each_sg(sglist, sg, nelems, i) { + dma_sync_single_for_device(dev, sg->dma_address, + sg_dma_len(sg), direction); + } +} + +static inline int +tile_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +static inline int +tile_dma_supported(struct device *dev, u64 mask) +{ + return 1; +} + +static struct dma_map_ops tile_default_dma_map_ops = { + .alloc = tile_dma_alloc_coherent, + .free = tile_dma_free_coherent, + .map_page = tile_dma_map_page, + .unmap_page = tile_dma_unmap_page, + .map_sg = tile_dma_map_sg, + .unmap_sg = tile_dma_unmap_sg, + .sync_single_for_cpu = tile_dma_sync_single_for_cpu, + .sync_single_for_device = tile_dma_sync_single_for_device, + .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu, + .sync_sg_for_device = tile_dma_sync_sg_for_device, + .mapping_error = tile_dma_mapping_error, + .dma_supported = tile_dma_supported +}; + +struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops; +EXPORT_SYMBOL(tile_dma_map_ops); + +/* Generic PCI DMA mapping functions */ + +static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + struct dma_attrs *attrs) +{ + int node = dev_to_node(dev); + int order = get_order(size); + struct page *pg; + dma_addr_t addr; + + gfp |= __GFP_ZERO; + + pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA); + if (pg == NULL) + return NULL; + + addr = page_to_phys(pg); + + *dma_handle = addr + get_dma_offset(dev); + + return page_address(pg); +} + +/* + * Free memory that was allocated with tile_pci_dma_alloc_coherent. + */ +static void tile_pci_dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + homecache_free_pages((unsigned long)vaddr, get_order(size)); +} + +static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) { struct scatterlist *sg; int i; @@ -142,73 +386,103 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, for_each_sg(sglist, sg, nents, i) { sg->dma_address = sg_phys(sg); - __dma_map_pa_range(sg->dma_address, sg->length); + __dma_prep_pa_range(sg->dma_address, sg->length, direction); + + sg->dma_address = sg->dma_address + get_dma_offset(dev); +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } return nents; } -EXPORT_SYMBOL(dma_map_sg); -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, - enum dma_data_direction direction) +static void tile_pci_dma_unmap_sg(struct device *dev, + struct scatterlist *sglist, int nents, + enum dma_data_direction direction, + struct dma_attrs *attrs) { + struct scatterlist *sg; + int i; + BUG_ON(!valid_dma_direction(direction)); + for_each_sg(sglist, sg, nents, i) { + sg->dma_address = sg_phys(sg); + __dma_complete_pa_range(sg->dma_address, sg->length, + direction); + } } -EXPORT_SYMBOL(dma_unmap_sg); -dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) +static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) { BUG_ON(!valid_dma_direction(direction)); BUG_ON(offset + size > PAGE_SIZE); - homecache_flush_cache(page, 0); + __dma_prep_page(page, offset, size, direction); - return page_to_pa(page) + offset; + return page_to_pa(page) + offset + get_dma_offset(dev); } -EXPORT_SYMBOL(dma_map_page); -void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, - enum dma_data_direction direction) +static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address, + size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) { BUG_ON(!valid_dma_direction(direction)); + + dma_address -= get_dma_offset(dev); + + __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)), + dma_address & (PAGE_SIZE - 1), size, direction); } -EXPORT_SYMBOL(dma_unmap_page); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) +static void tile_pci_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, + size_t size, + enum dma_data_direction direction) { BUG_ON(!valid_dma_direction(direction)); + + dma_handle -= get_dma_offset(dev); + + __dma_complete_pa_range(dma_handle, size, direction); } -EXPORT_SYMBOL(dma_sync_single_for_cpu); -void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) +static void tile_pci_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, + size_t size, + enum dma_data_direction + direction) { - unsigned long start = PFN_DOWN(dma_handle); - unsigned long end = PFN_DOWN(dma_handle + size - 1); - unsigned long i; + dma_handle -= get_dma_offset(dev); - BUG_ON(!valid_dma_direction(direction)); - for (i = start; i <= end; ++i) - homecache_flush_cache(pfn_to_page(i), 0); + __dma_prep_pa_range(dma_handle, size, direction); } -EXPORT_SYMBOL(dma_sync_single_for_device); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) +static void tile_pci_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sglist, + int nelems, + enum dma_data_direction direction) { + struct scatterlist *sg; + int i; + BUG_ON(!valid_dma_direction(direction)); - WARN_ON(nelems == 0 || sg[0].length == 0); + WARN_ON(nelems == 0 || sglist->length == 0); + + for_each_sg(sglist, sg, nelems, i) { + dma_sync_single_for_cpu(dev, sg->dma_address, + sg_dma_len(sg), direction); + } } -EXPORT_SYMBOL(dma_sync_sg_for_cpu); -/* - * Flush and invalidate cache for scatterlist. - */ -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) +static void tile_pci_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sglist, + int nelems, + enum dma_data_direction direction) { struct scatterlist *sg; int i; @@ -221,31 +495,136 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, sg_dma_len(sg), direction); } } -EXPORT_SYMBOL(dma_sync_sg_for_device); -void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction) +static inline int +tile_pci_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +static inline int +tile_pci_dma_supported(struct device *dev, u64 mask) +{ + return 1; +} + +static struct dma_map_ops tile_pci_default_dma_map_ops = { + .alloc = tile_pci_dma_alloc_coherent, + .free = tile_pci_dma_free_coherent, + .map_page = tile_pci_dma_map_page, + .unmap_page = tile_pci_dma_unmap_page, + .map_sg = tile_pci_dma_map_sg, + .unmap_sg = tile_pci_dma_unmap_sg, + .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu, + .sync_single_for_device = tile_pci_dma_sync_single_for_device, + .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu, + .sync_sg_for_device = tile_pci_dma_sync_sg_for_device, + .mapping_error = tile_pci_dma_mapping_error, + .dma_supported = tile_pci_dma_supported +}; + +struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops; +EXPORT_SYMBOL(gx_pci_dma_map_ops); + +/* PCI DMA mapping functions for legacy PCI devices */ + +#ifdef CONFIG_SWIOTLB +static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + struct dma_attrs *attrs) { - dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction); + gfp |= GFP_DMA; + return swiotlb_alloc_coherent(dev, size, dma_handle, gfp); } -EXPORT_SYMBOL(dma_sync_single_range_for_cpu); -void dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction) +static void tile_swiotlb_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + struct dma_attrs *attrs) { - dma_sync_single_for_device(dev, dma_handle + offset, size, direction); + swiotlb_free_coherent(dev, size, vaddr, dma_addr); +} + +static struct dma_map_ops pci_swiotlb_dma_ops = { + .alloc = tile_swiotlb_alloc_coherent, + .free = tile_swiotlb_free_coherent, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .dma_supported = swiotlb_dma_supported, + .mapping_error = swiotlb_dma_mapping_error, +}; + +static struct dma_map_ops pci_hybrid_dma_ops = { + .alloc = tile_swiotlb_alloc_coherent, + .free = tile_swiotlb_free_coherent, + .map_page = tile_pci_dma_map_page, + .unmap_page = tile_pci_dma_unmap_page, + .map_sg = tile_pci_dma_map_sg, + .unmap_sg = tile_pci_dma_unmap_sg, + .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu, + .sync_single_for_device = tile_pci_dma_sync_single_for_device, + .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu, + .sync_sg_for_device = tile_pci_dma_sync_sg_for_device, + .mapping_error = tile_pci_dma_mapping_error, + .dma_supported = tile_pci_dma_supported +}; + +struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops; +struct dma_map_ops *gx_hybrid_pci_dma_map_ops = &pci_hybrid_dma_ops; +#else +struct dma_map_ops *gx_legacy_pci_dma_map_ops; +struct dma_map_ops *gx_hybrid_pci_dma_map_ops; +#endif +EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops); +EXPORT_SYMBOL(gx_hybrid_pci_dma_map_ops); + +#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK +int dma_set_coherent_mask(struct device *dev, u64 mask) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + + /* + * For PCI devices with 64-bit DMA addressing capability, promote + * the dma_ops to full capability for both streams and consistent + * memory access. For 32-bit capable devices, limit the consistent + * memory DMA range to max_direct_dma_addr. + */ + if (dma_ops == gx_pci_dma_map_ops || + dma_ops == gx_hybrid_pci_dma_map_ops || + dma_ops == gx_legacy_pci_dma_map_ops) { + if (mask == DMA_BIT_MASK(64)) + set_dma_ops(dev, gx_pci_dma_map_ops); + else if (mask > dev->archdata.max_direct_dma_addr) + mask = dev->archdata.max_direct_dma_addr; + } + + if (!dma_supported(dev, mask)) + return -EIO; + dev->coherent_dma_mask = mask; + return 0; } -EXPORT_SYMBOL(dma_sync_single_range_for_device); +EXPORT_SYMBOL(dma_set_coherent_mask); +#endif +#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK /* - * dma_alloc_noncoherent() returns non-cacheable memory, so there's no - * need to do any flushing here. + * The generic dma_get_required_mask() uses the highest physical address + * (max_pfn) to provide the hint to the PCI drivers regarding 32-bit or + * 64-bit DMA configuration. Since TILEGx has I/O TLB/MMU, allowing the + * DMAs to use the full 64-bit PCI address space and not limited by + * the physical memory space, we always let the PCI devices use + * 64-bit DMA if they have that capability, by returning the 64-bit + * DMA mask here. The device driver has the option to use 32-bit DMA if + * the device is not capable of 64-bit DMA. */ -void dma_cache_sync(void *vaddr, size_t size, - enum dma_data_direction direction) +u64 dma_get_required_mask(struct device *dev) { + return DMA_BIT_MASK(64); } -EXPORT_SYMBOL(dma_cache_sync); +EXPORT_SYMBOL_GPL(dma_get_required_mask); +#endif diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index ea38f0c9ec7..1f80a88c75a 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c @@ -1,5 +1,5 @@ /* - * Copyright 2010 Tilera Corporation. All Rights Reserved. + * Copyright 2011 Tilera Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -20,10 +20,10 @@ #include <linux/capability.h> #include <linux/sched.h> #include <linux/errno.h> -#include <linux/bootmem.h> #include <linux/irq.h> #include <linux/io.h> #include <linux/uaccess.h> +#include <linux/export.h> #include <asm/processor.h> #include <asm/sections.h> @@ -51,6 +51,8 @@ * */ +static int pci_probe = 1; + /* * This flag tells if the platform is TILEmpower that needs * special configuration for the PLX switch chip. @@ -59,6 +61,7 @@ int __write_once tile_plx_gen1; static struct pci_controller controllers[TILE_NUM_PCIE]; static int num_controllers; +static int pci_scan_flags[TILE_NUM_PCIE]; static struct pci_ops tile_cfg_ops; @@ -79,7 +82,7 @@ EXPORT_SYMBOL(pcibios_align_resource); * controller_id is the controller number, config type is 0 or 1 for * config0 or config1 operations. */ -static int __init tile_pcie_open(int controller_id, int config_type) +static int tile_pcie_open(int controller_id, int config_type) { char filename[32]; int fd; @@ -95,8 +98,7 @@ static int __init tile_pcie_open(int controller_id, int config_type) /* * Get the IRQ numbers from the HV and set up the handlers for them. */ -static int __init tile_init_irqs(int controller_id, - struct pci_controller *controller) +static int tile_init_irqs(int controller_id, struct pci_controller *controller) { char filename[32]; int fd; @@ -143,67 +145,74 @@ int __init tile_pci_init(void) { int i; + if (!pci_probe) { + pr_info("PCI: disabled by boot argument\n"); + return 0; + } + pr_info("PCI: Searching for controllers...\n"); + /* Re-init number of PCIe controllers to support hot-plug feature. */ + num_controllers = 0; + /* Do any configuration we need before using the PCIe */ for (i = 0; i < TILE_NUM_PCIE; i++) { - int hv_cfg_fd0 = -1; - int hv_cfg_fd1 = -1; - int hv_mem_fd = -1; - char name[32]; - struct pci_controller *controller; - /* - * Open the fd to the HV. If it fails then this - * device doesn't exist. + * To see whether we need a real config op based on + * the results of pcibios_init(), to support PCIe hot-plug. */ - hv_cfg_fd0 = tile_pcie_open(i, 0); - if (hv_cfg_fd0 < 0) - continue; - hv_cfg_fd1 = tile_pcie_open(i, 1); - if (hv_cfg_fd1 < 0) { - pr_err("PCI: Couldn't open config fd to HV " - "for controller %d\n", i); - goto err_cont; - } - - sprintf(name, "pcie/%d/mem", i); - hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0); - if (hv_mem_fd < 0) { - pr_err("PCI: Could not open mem fd to HV!\n"); - goto err_cont; - } + if (pci_scan_flags[i] == 0) { + int hv_cfg_fd0 = -1; + int hv_cfg_fd1 = -1; + int hv_mem_fd = -1; + char name[32]; + struct pci_controller *controller; + + /* + * Open the fd to the HV. If it fails then this + * device doesn't exist. + */ + hv_cfg_fd0 = tile_pcie_open(i, 0); + if (hv_cfg_fd0 < 0) + continue; + hv_cfg_fd1 = tile_pcie_open(i, 1); + if (hv_cfg_fd1 < 0) { + pr_err("PCI: Couldn't open config fd to HV " + "for controller %d\n", i); + goto err_cont; + } - pr_info("PCI: Found PCI controller #%d\n", i); + sprintf(name, "pcie/%d/mem", i); + hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0); + if (hv_mem_fd < 0) { + pr_err("PCI: Could not open mem fd to HV!\n"); + goto err_cont; + } - controller = &controllers[num_controllers]; + pr_info("PCI: Found PCI controller #%d\n", i); - if (tile_init_irqs(i, controller)) { - pr_err("PCI: Could not initialize " - "IRQs, aborting.\n"); - goto err_cont; - } + controller = &controllers[i]; - controller->index = num_controllers; - controller->hv_cfg_fd[0] = hv_cfg_fd0; - controller->hv_cfg_fd[1] = hv_cfg_fd1; - controller->hv_mem_fd = hv_mem_fd; - controller->first_busno = 0; - controller->last_busno = 0xff; - controller->ops = &tile_cfg_ops; + controller->index = i; + controller->hv_cfg_fd[0] = hv_cfg_fd0; + controller->hv_cfg_fd[1] = hv_cfg_fd1; + controller->hv_mem_fd = hv_mem_fd; + controller->last_busno = 0xff; + controller->ops = &tile_cfg_ops; - num_controllers++; - continue; + num_controllers++; + continue; err_cont: - if (hv_cfg_fd0 >= 0) - hv_dev_close(hv_cfg_fd0); - if (hv_cfg_fd1 >= 0) - hv_dev_close(hv_cfg_fd1); - if (hv_mem_fd >= 0) - hv_dev_close(hv_mem_fd); - continue; + if (hv_cfg_fd0 >= 0) + hv_dev_close(hv_cfg_fd0); + if (hv_cfg_fd1 >= 0) + hv_dev_close(hv_cfg_fd1); + if (hv_mem_fd >= 0) + hv_dev_close(hv_mem_fd); + continue; + } } /* @@ -224,7 +233,7 @@ err_cont: * (pin - 1) converts from the PCI standard's [1:4] convention to * a normal [0:3] range. */ -static int tile_map_irq(struct pci_dev *dev, u8 slot, u8 pin) +static int tile_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { struct pci_controller *controller = (struct pci_controller *)dev->sysdata; @@ -232,7 +241,7 @@ static int tile_map_irq(struct pci_dev *dev, u8 slot, u8 pin) } -static void __init fixup_read_and_payload_sizes(void) +static void fixup_read_and_payload_sizes(void) { struct pci_dev *dev = NULL; int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */ @@ -240,39 +249,20 @@ static void __init fixup_read_and_payload_sizes(void) u16 new_values; /* Scan for the smallest maximum payload size. */ - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - int pcie_caps_offset; - u32 devcap; - int max_payload; - - pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (pcie_caps_offset == 0) + for_each_pci_dev(dev) { + if (!pci_is_pcie(dev)) continue; - pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP, - &devcap); - max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD; - if (max_payload < smallest_max_payload) - smallest_max_payload = max_payload; + if (dev->pcie_mpss < smallest_max_payload) + smallest_max_payload = dev->pcie_mpss; } /* Now, set the max_payload_size for all devices to that value. */ new_values = (max_read_size << 12) | (smallest_max_payload << 5); - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - int pcie_caps_offset; - u16 devctl; - - pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (pcie_caps_offset == 0) - continue; - - pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, - &devctl); - devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ); - devctl |= new_values; - pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, - devctl); - } + for_each_pci_dev(dev) + pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ, + new_values); } @@ -282,7 +272,7 @@ static void __init fixup_read_and_payload_sizes(void) * The controllers have been set up by the time we get here, by a call to * tile_pci_init. */ -static int __init pcibios_init(void) +int __init pcibios_init(void) { int i; @@ -293,28 +283,34 @@ static int __init pcibios_init(void) * known to require at least 20ms here, but we use a more * conservative value. */ - mdelay(250); + msleep(250); /* Scan all of the recorded PCI controllers. */ - for (i = 0; i < num_controllers; i++) { - struct pci_controller *controller = &controllers[i]; - struct pci_bus *bus; - - pr_info("PCI: initializing controller #%d\n", i); - + for (i = 0; i < TILE_NUM_PCIE; i++) { /* - * This comes from the generic Linux PCI driver. - * - * It reads the PCI tree for this bus into the Linux - * data structures. - * - * This is inlined in linux/pci.h and calls into - * pci_scan_bus_parented() in probe.c. + * Do real pcibios init ops if the controller is initialized + * by tile_pci_init() successfully and not initialized by + * pcibios_init() yet to support PCIe hot-plug. */ - bus = pci_scan_bus(0, controller->ops, controller); - controller->root_bus = bus; - controller->last_busno = bus->subordinate; + if (pci_scan_flags[i] == 0 && controllers[i].ops != NULL) { + struct pci_controller *controller = &controllers[i]; + struct pci_bus *bus; + LIST_HEAD(resources); + + if (tile_init_irqs(i, controller)) { + pr_err("PCI: Could not initialize IRQs\n"); + continue; + } + pr_info("PCI: initializing controller #%d\n", i); + + pci_add_resource(&resources, &ioport_resource); + pci_add_resource(&resources, &iomem_resource); + bus = pci_scan_root_bus(NULL, 0, controller->ops, + controller, &resources); + controller->root_bus = bus; + controller->last_busno = bus->busn_res.end; + } } /* Do machine dependent PCI interrupt routing */ @@ -326,34 +322,45 @@ static int __init pcibios_init(void) * It allocates all of the resources (I/O memory, etc) * associated with the devices read in above. */ - pci_assign_unassigned_resources(); /* Configure the max_read_size and max_payload_size values. */ fixup_read_and_payload_sizes(); /* Record the I/O resources in the PCI controller structure. */ - for (i = 0; i < num_controllers; i++) { - struct pci_bus *root_bus = controllers[i].root_bus; - struct pci_bus *next_bus; - struct pci_dev *dev; - - list_for_each_entry(dev, &root_bus->devices, bus_list) { - /* Find the PCI host controller, ie. the 1st bridge. */ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && - (PCI_SLOT(dev->devfn) == 0)) { - next_bus = dev->subordinate; - controllers[i].mem_resources[0] = - *next_bus->resource[0]; - controllers[i].mem_resources[1] = - *next_bus->resource[1]; - controllers[i].mem_resources[2] = - *next_bus->resource[2]; - - break; + for (i = 0; i < TILE_NUM_PCIE; i++) { + /* + * Do real pcibios init ops if the controller is initialized + * by tile_pci_init() successfully and not initialized by + * pcibios_init() yet to support PCIe hot-plug. + */ + if (pci_scan_flags[i] == 0 && controllers[i].ops != NULL) { + struct pci_bus *root_bus = controllers[i].root_bus; + struct pci_bus *next_bus; + struct pci_dev *dev; + + list_for_each_entry(dev, &root_bus->devices, bus_list) { + /* + * Find the PCI host controller, ie. the 1st + * bridge. + */ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && + (PCI_SLOT(dev->devfn) == 0)) { + next_bus = dev->subordinate; + controllers[i].mem_resources[0] = + *next_bus->resource[0]; + controllers[i].mem_resources[1] = + *next_bus->resource[1]; + controllers[i].mem_resources[2] = + *next_bus->resource[2]; + + /* Setup flags. */ + pci_scan_flags[i] = 1; + + break; + } } } - } return 0; @@ -363,27 +370,24 @@ subsys_initcall(pcibios_init); /* * No bus fixups needed. */ -void __devinit pcibios_fixup_bus(struct pci_bus *bus) +void pcibios_fixup_bus(struct pci_bus *bus) { /* Nothing needs to be done. */ } -/* - * This can be called from the generic PCI layer, but doesn't need to - * do anything. - */ -char __devinit *pcibios_setup(char *str) +void pcibios_set_master(struct pci_dev *dev) { - /* Nothing needs to be done. */ - return str; + /* No special bus mastering setup handling. */ } -/* - * This is called from the generic Linux layer. - */ -void __init pcibios_update_irq(struct pci_dev *dev, int irq) +/* Process any "pci=" kernel boot arguments. */ +char *__init pcibios_setup(char *str) { - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); + if (!strcmp(str, "off")) { + pci_probe = 0; + return NULL; + } + return str; } /* @@ -439,27 +443,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return 0; } -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) -{ - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); - unsigned long flags = pci_resource_flags(dev, bar); - - if (!len) - return NULL; - if (max && len > max) - len = max; - - if (!(flags & IORESOURCE_MEM)) { - pr_info("PCI: Trying to map invalid resource %#lx\n", flags); - start = 0; - } - - return (void __iomem *)start; -} -EXPORT_SYMBOL(pci_iomap); - - /**************************************************************** * * Tile PCI config space read/write routines @@ -476,11 +459,8 @@ EXPORT_SYMBOL(pci_iomap); * specified bus & slot. */ -static int __devinit tile_cfg_read(struct pci_bus *bus, - unsigned int devfn, - int offset, - int size, - u32 *val) +static int tile_cfg_read(struct pci_bus *bus, unsigned int devfn, int offset, + int size, u32 *val) { struct pci_controller *controller = bus->sysdata; int busnum = bus->number & 0xff; @@ -522,11 +502,8 @@ static int __devinit tile_cfg_read(struct pci_bus *bus, * See tile_cfg_read() for relevant comments. * Note that "val" is the value to write, not a pointer to that value. */ -static int __devinit tile_cfg_write(struct pci_bus *bus, - unsigned int devfn, - int offset, - int size, - u32 val) +static int tile_cfg_write(struct pci_bus *bus, unsigned int devfn, int offset, + int size, u32 val) { struct pci_controller *controller = bus->sysdata; int busnum = bus->number & 0xff; diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c new file mode 100644 index 00000000000..e39f9c54280 --- /dev/null +++ b/arch/tile/kernel/pci_gx.c @@ -0,0 +1,1610 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/kernel.h> +#include <linux/mmzone.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/capability.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/irq.h> +#include <linux/msi.h> +#include <linux/io.h> +#include <linux/uaccess.h> +#include <linux/ctype.h> + +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/byteorder.h> + +#include <gxio/iorpc_globals.h> +#include <gxio/kiorpc.h> +#include <gxio/trio.h> +#include <gxio/iorpc_trio.h> +#include <hv/drv_trio_intf.h> + +#include <arch/sim.h> + +/* + * This file containes the routines to search for PCI buses, + * enumerate the buses, and configure any attached devices. + */ + +#define DEBUG_PCI_CFG 0 + +#if DEBUG_PCI_CFG +#define TRACE_CFG_WR(size, val, bus, dev, func, offset) \ + pr_info("CFG WR %d-byte VAL %#x to bus %d dev %d func %d addr %u\n", \ + size, val, bus, dev, func, offset & 0xFFF); +#define TRACE_CFG_RD(size, val, bus, dev, func, offset) \ + pr_info("CFG RD %d-byte VAL %#x from bus %d dev %d func %d addr %u\n", \ + size, val, bus, dev, func, offset & 0xFFF); +#else +#define TRACE_CFG_WR(...) +#define TRACE_CFG_RD(...) +#endif + +static int pci_probe = 1; + +/* Information on the PCIe RC ports configuration. */ +static int pcie_rc[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES]; + +/* + * On some platforms with one or more Gx endpoint ports, we need to + * delay the PCIe RC port probe for a few seconds to work around + * a HW PCIe link-training bug. The exact delay is specified with + * a kernel boot argument in the form of "pcie_rc_delay=T,P,S", + * where T is the TRIO instance number, P is the port number and S is + * the delay in seconds. If the argument is specified, but the delay is + * not provided, the value will be DEFAULT_RC_DELAY. + */ +static int rc_delay[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES]; + +/* Default number of seconds that the PCIe RC port probe can be delayed. */ +#define DEFAULT_RC_DELAY 10 + +/* The PCI I/O space size in each PCI domain. */ +#define IO_SPACE_SIZE 0x10000 + +/* Provide shorter versions of some very long constant names. */ +#define AUTO_CONFIG_RC \ + TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC +#define AUTO_CONFIG_RC_G1 \ + TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC_G1 +#define AUTO_CONFIG_EP \ + TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT +#define AUTO_CONFIG_EP_G1 \ + TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT_G1 + +/* Array of the PCIe ports configuration info obtained from the BIB. */ +struct pcie_trio_ports_property pcie_ports[TILEGX_NUM_TRIO]; + +/* Number of configured TRIO instances. */ +int num_trio_shims; + +/* All drivers share the TRIO contexts defined here. */ +gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO]; + +/* Pointer to an array of PCIe RC controllers. */ +struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES]; +int num_rc_controllers; + +static struct pci_ops tile_cfg_ops; + +/* Mask of CPUs that should receive PCIe interrupts. */ +static struct cpumask intr_cpus_map; + +/* We don't need to worry about the alignment of resources. */ +resource_size_t pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, + resource_size_t align) +{ + return res->start; +} +EXPORT_SYMBOL(pcibios_align_resource); + +/* + * Pick a CPU to receive and handle the PCIe interrupts, based on the IRQ #. + * For now, we simply send interrupts to non-dataplane CPUs. + * We may implement methods to allow user to specify the target CPUs, + * e.g. via boot arguments. + */ +static int tile_irq_cpu(int irq) +{ + unsigned int count; + int i = 0; + int cpu; + + count = cpumask_weight(&intr_cpus_map); + if (unlikely(count == 0)) { + pr_warning("intr_cpus_map empty, interrupts will be" + " delievered to dataplane tiles\n"); + return irq % (smp_height * smp_width); + } + + count = irq % count; + for_each_cpu(cpu, &intr_cpus_map) { + if (i++ == count) + break; + } + return cpu; +} + +/* Open a file descriptor to the TRIO shim. */ +static int tile_pcie_open(int trio_index) +{ + gxio_trio_context_t *context = &trio_contexts[trio_index]; + int ret; + int mac; + + /* This opens a file descriptor to the TRIO shim. */ + ret = gxio_trio_init(context, trio_index); + if (ret < 0) + goto gxio_trio_init_failure; + + /* Allocate an ASID for the kernel. */ + ret = gxio_trio_alloc_asids(context, 1, 0, 0); + if (ret < 0) { + pr_err("PCI: ASID alloc failure on TRIO %d, give up\n", + trio_index); + goto asid_alloc_failure; + } + + context->asid = ret; + +#ifdef USE_SHARED_PCIE_CONFIG_REGION + /* + * Alloc a PIO region for config access, shared by all MACs per TRIO. + * This shouldn't fail since the kernel is supposed to the first + * client of the TRIO's PIO regions. + */ + ret = gxio_trio_alloc_pio_regions(context, 1, 0, 0); + if (ret < 0) { + pr_err("PCI: CFG PIO alloc failure on TRIO %d, give up\n", + trio_index); + goto pio_alloc_failure; + } + + context->pio_cfg_index = ret; + + /* + * For PIO CFG, the bus_address_hi parameter is 0. The mac parameter + * is also 0 because it is specified in PIO_REGION_SETUP_CFG_ADDR. + */ + ret = gxio_trio_init_pio_region_aux(context, context->pio_cfg_index, + 0, 0, HV_TRIO_PIO_FLAG_CONFIG_SPACE); + if (ret < 0) { + pr_err("PCI: CFG PIO init failure on TRIO %d, give up\n", + trio_index); + goto pio_alloc_failure; + } +#endif + + /* Get the properties of the PCIe ports on this TRIO instance. */ + ret = gxio_trio_get_port_property(context, &pcie_ports[trio_index]); + if (ret < 0) { + pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d," + " on TRIO %d\n", ret, trio_index); + goto get_port_property_failure; + } + + context->mmio_base_mac = + iorpc_ioremap(context->fd, 0, HV_TRIO_CONFIG_IOREMAP_SIZE); + if (context->mmio_base_mac == NULL) { + pr_err("PCI: TRIO config space mapping failure, error %d," + " on TRIO %d\n", ret, trio_index); + ret = -ENOMEM; + + goto trio_mmio_mapping_failure; + } + + /* Check the port strap state which will override the BIB setting. */ + for (mac = 0; mac < TILEGX_TRIO_PCIES; mac++) { + TRIO_PCIE_INTFC_PORT_CONFIG_t port_config; + unsigned int reg_offset; + + /* Ignore ports that are not specified in the BIB. */ + if (!pcie_ports[trio_index].ports[mac].allow_rc && + !pcie_ports[trio_index].ports[mac].allow_ep) + continue; + + reg_offset = + (TRIO_PCIE_INTFC_PORT_CONFIG << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + port_config.word = + __gxio_mmio_read(context->mmio_base_mac + reg_offset); + + if (port_config.strap_state != AUTO_CONFIG_RC && + port_config.strap_state != AUTO_CONFIG_RC_G1) { + /* + * If this is really intended to be an EP port, record + * it so that the endpoint driver will know about it. + */ + if (port_config.strap_state == AUTO_CONFIG_EP || + port_config.strap_state == AUTO_CONFIG_EP_G1) + pcie_ports[trio_index].ports[mac].allow_ep = 1; + } + } + + return ret; + +trio_mmio_mapping_failure: +get_port_property_failure: +asid_alloc_failure: +#ifdef USE_SHARED_PCIE_CONFIG_REGION +pio_alloc_failure: +#endif + hv_dev_close(context->fd); +gxio_trio_init_failure: + context->fd = -1; + + return ret; +} + +static int __init tile_trio_init(void) +{ + int i; + + /* We loop over all the TRIO shims. */ + for (i = 0; i < TILEGX_NUM_TRIO; i++) { + if (tile_pcie_open(i) < 0) + continue; + num_trio_shims++; + } + + return 0; +} +postcore_initcall(tile_trio_init); + +static void tilegx_legacy_irq_ack(struct irq_data *d) +{ + __insn_mtspr(SPR_IPI_EVENT_RESET_K, 1UL << d->irq); +} + +static void tilegx_legacy_irq_mask(struct irq_data *d) +{ + __insn_mtspr(SPR_IPI_MASK_SET_K, 1UL << d->irq); +} + +static void tilegx_legacy_irq_unmask(struct irq_data *d) +{ + __insn_mtspr(SPR_IPI_MASK_RESET_K, 1UL << d->irq); +} + +static struct irq_chip tilegx_legacy_irq_chip = { + .name = "tilegx_legacy_irq", + .irq_ack = tilegx_legacy_irq_ack, + .irq_mask = tilegx_legacy_irq_mask, + .irq_unmask = tilegx_legacy_irq_unmask, + + /* TBD: support set_affinity. */ +}; + +/* + * This is a wrapper function of the kernel level-trigger interrupt + * handler handle_level_irq() for PCI legacy interrupts. The TRIO + * is configured such that only INTx Assert interrupts are proxied + * to Linux which just calls handle_level_irq() after clearing the + * MAC INTx Assert status bit associated with this interrupt. + */ +static void trio_handle_level_irq(unsigned int irq, struct irq_desc *desc) +{ + struct pci_controller *controller = irq_desc_get_handler_data(desc); + gxio_trio_context_t *trio_context = controller->trio; + uint64_t intx = (uint64_t)irq_desc_get_chip_data(desc); + int mac = controller->mac; + unsigned int reg_offset; + uint64_t level_mask; + + handle_level_irq(irq, desc); + + /* + * Clear the INTx Level status, otherwise future interrupts are + * not sent. + */ + reg_offset = (TRIO_PCIE_INTFC_MAC_INT_STS << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + level_mask = TRIO_PCIE_INTFC_MAC_INT_STS__INT_LEVEL_MASK << intx; + + __gxio_mmio_write(trio_context->mmio_base_mac + reg_offset, level_mask); +} + +/* + * Create kernel irqs and set up the handlers for the legacy interrupts. + * Also some minimum initialization for the MSI support. + */ +static int tile_init_irqs(struct pci_controller *controller) +{ + int i; + int j; + int irq; + int result; + + cpumask_copy(&intr_cpus_map, cpu_online_mask); + + + for (i = 0; i < 4; i++) { + gxio_trio_context_t *context = controller->trio; + int cpu; + + /* Ask the kernel to allocate an IRQ. */ + irq = irq_alloc_hwirq(-1); + if (!irq) { + pr_err("PCI: no free irq vectors, failed for %d\n", i); + goto free_irqs; + } + controller->irq_intx_table[i] = irq; + + /* Distribute the 4 IRQs to different tiles. */ + cpu = tile_irq_cpu(irq); + + /* Configure the TRIO intr binding for this IRQ. */ + result = gxio_trio_config_legacy_intr(context, cpu_x(cpu), + cpu_y(cpu), KERNEL_PL, + irq, controller->mac, i); + if (result < 0) { + pr_err("PCI: MAC intx config failed for %d\n", i); + + goto free_irqs; + } + + /* Register the IRQ handler with the kernel. */ + irq_set_chip_and_handler(irq, &tilegx_legacy_irq_chip, + trio_handle_level_irq); + irq_set_chip_data(irq, (void *)(uint64_t)i); + irq_set_handler_data(irq, controller); + } + + return 0; + +free_irqs: + for (j = 0; j < i; j++) + irq_free_hwirq(controller->irq_intx_table[j]); + + return -1; +} + +/* + * Return 1 if the port is strapped to operate in RC mode. + */ +static int +strapped_for_rc(gxio_trio_context_t *trio_context, int mac) +{ + TRIO_PCIE_INTFC_PORT_CONFIG_t port_config; + unsigned int reg_offset; + + /* Check the port configuration. */ + reg_offset = + (TRIO_PCIE_INTFC_PORT_CONFIG << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + port_config.word = + __gxio_mmio_read(trio_context->mmio_base_mac + reg_offset); + + if (port_config.strap_state == AUTO_CONFIG_RC || + port_config.strap_state == AUTO_CONFIG_RC_G1) + return 1; + else + return 0; +} + +/* + * Find valid controllers and fill in pci_controller structs for each + * of them. + * + * Return the number of controllers discovered. + */ +int __init tile_pci_init(void) +{ + int ctl_index = 0; + int i, j; + + if (!pci_probe) { + pr_info("PCI: disabled by boot argument\n"); + return 0; + } + + pr_info("PCI: Searching for controllers...\n"); + + if (num_trio_shims == 0 || sim_is_simulator()) + return 0; + + /* + * Now determine which PCIe ports are configured to operate in RC + * mode. There is a differece in the port configuration capability + * between the Gx36 and Gx72 devices. + * + * The Gx36 has configuration capability for each of the 3 PCIe + * interfaces (disable, auto endpoint, auto RC, etc.). + * On the Gx72, you can only select one of the 3 PCIe interfaces per + * TRIO to train automatically. Further, the allowable training modes + * are reduced to four options (auto endpoint, auto RC, stream x1, + * stream x4). + * + * For Gx36 ports, it must be allowed to be in RC mode by the + * Board Information Block, and the hardware strapping pins must be + * set to RC mode. + * + * For Gx72 ports, the port will operate in RC mode if either of the + * following is true: + * 1. It is allowed to be in RC mode by the Board Information Block, + * and the BIB doesn't allow the EP mode. + * 2. It is allowed to be in either the RC or the EP mode by the BIB, + * and the hardware strapping pin is set to RC mode. + */ + for (i = 0; i < TILEGX_NUM_TRIO; i++) { + gxio_trio_context_t *context = &trio_contexts[i]; + + if (context->fd < 0) + continue; + + for (j = 0; j < TILEGX_TRIO_PCIES; j++) { + int is_rc = 0; + + if (pcie_ports[i].is_gx72 && + pcie_ports[i].ports[j].allow_rc) { + if (!pcie_ports[i].ports[j].allow_ep || + strapped_for_rc(context, j)) + is_rc = 1; + } else if (pcie_ports[i].ports[j].allow_rc && + strapped_for_rc(context, j)) { + is_rc = 1; + } + if (is_rc) { + pcie_rc[i][j] = 1; + num_rc_controllers++; + } + } + } + + /* Return if no PCIe ports are configured to operate in RC mode. */ + if (num_rc_controllers == 0) + return 0; + + /* Set the TRIO pointer and MAC index for each PCIe RC port. */ + for (i = 0; i < TILEGX_NUM_TRIO; i++) { + for (j = 0; j < TILEGX_TRIO_PCIES; j++) { + if (pcie_rc[i][j]) { + pci_controllers[ctl_index].trio = + &trio_contexts[i]; + pci_controllers[ctl_index].mac = j; + pci_controllers[ctl_index].trio_index = i; + ctl_index++; + if (ctl_index == num_rc_controllers) + goto out; + } + } + } + +out: + /* Configure each PCIe RC port. */ + for (i = 0; i < num_rc_controllers; i++) { + + /* Configure the PCIe MAC to run in RC mode. */ + struct pci_controller *controller = &pci_controllers[i]; + + controller->index = i; + controller->ops = &tile_cfg_ops; + + controller->io_space.start = PCIBIOS_MIN_IO + + (i * IO_SPACE_SIZE); + controller->io_space.end = controller->io_space.start + + IO_SPACE_SIZE - 1; + BUG_ON(controller->io_space.end > IO_SPACE_LIMIT); + controller->io_space.flags = IORESOURCE_IO; + snprintf(controller->io_space_name, + sizeof(controller->io_space_name), + "PCI I/O domain %d", i); + controller->io_space.name = controller->io_space_name; + + /* + * The PCI memory resource is located above the PA space. + * For every host bridge, the BAR window or the MMIO aperture + * is in range [3GB, 4GB - 1] of a 4GB space beyond the + * PA space. + */ + controller->mem_offset = TILE_PCI_MEM_START + + (i * TILE_PCI_BAR_WINDOW_TOP); + controller->mem_space.start = controller->mem_offset + + TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE; + controller->mem_space.end = controller->mem_offset + + TILE_PCI_BAR_WINDOW_TOP - 1; + controller->mem_space.flags = IORESOURCE_MEM; + snprintf(controller->mem_space_name, + sizeof(controller->mem_space_name), + "PCI mem domain %d", i); + controller->mem_space.name = controller->mem_space_name; + } + + return num_rc_controllers; +} + +/* + * (pin - 1) converts from the PCI standard's [1:4] convention to + * a normal [0:3] range. + */ +static int tile_map_irq(const struct pci_dev *dev, u8 device, u8 pin) +{ + struct pci_controller *controller = + (struct pci_controller *)dev->sysdata; + return controller->irq_intx_table[pin - 1]; +} + +static void fixup_read_and_payload_sizes(struct pci_controller *controller) +{ + gxio_trio_context_t *trio_context = controller->trio; + struct pci_bus *root_bus = controller->root_bus; + TRIO_PCIE_RC_DEVICE_CONTROL_t dev_control; + TRIO_PCIE_RC_DEVICE_CAP_t rc_dev_cap; + unsigned int reg_offset; + struct pci_bus *child; + int mac; + int err; + + mac = controller->mac; + + /* Set our max read request size to be 4KB. */ + reg_offset = + (TRIO_PCIE_RC_DEVICE_CONTROL << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + dev_control.word = __gxio_mmio_read32(trio_context->mmio_base_mac + + reg_offset); + dev_control.max_read_req_sz = 5; + __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset, + dev_control.word); + + /* + * Set the max payload size supported by this Gx PCIe MAC. + * Though Gx PCIe supports Max Payload Size of up to 1024 bytes, + * experiments have shown that setting MPS to 256 yields the + * best performance. + */ + reg_offset = + (TRIO_PCIE_RC_DEVICE_CAP << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + rc_dev_cap.word = __gxio_mmio_read32(trio_context->mmio_base_mac + + reg_offset); + rc_dev_cap.mps_sup = 1; + __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset, + rc_dev_cap.word); + + /* Configure PCI Express MPS setting. */ + list_for_each_entry(child, &root_bus->children, node) + pcie_bus_configure_settings(child); + + /* + * Set the mac_config register in trio based on the MPS/MRS of the link. + */ + reg_offset = + (TRIO_PCIE_RC_DEVICE_CONTROL << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + dev_control.word = __gxio_mmio_read32(trio_context->mmio_base_mac + + reg_offset); + + err = gxio_trio_set_mps_mrs(trio_context, + dev_control.max_payload_size, + dev_control.max_read_req_sz, + mac); + if (err < 0) { + pr_err("PCI: PCIE_CONFIGURE_MAC_MPS_MRS failure, " + "MAC %d on TRIO %d\n", + mac, controller->trio_index); + } +} + +static int setup_pcie_rc_delay(char *str) +{ + unsigned long delay = 0; + unsigned long trio_index; + unsigned long mac; + + if (str == NULL || !isdigit(*str)) + return -EINVAL; + trio_index = simple_strtoul(str, (char **)&str, 10); + if (trio_index >= TILEGX_NUM_TRIO) + return -EINVAL; + + if (*str != ',') + return -EINVAL; + + str++; + if (!isdigit(*str)) + return -EINVAL; + mac = simple_strtoul(str, (char **)&str, 10); + if (mac >= TILEGX_TRIO_PCIES) + return -EINVAL; + + if (*str != '\0') { + if (*str != ',') + return -EINVAL; + + str++; + if (!isdigit(*str)) + return -EINVAL; + delay = simple_strtoul(str, (char **)&str, 10); + } + + rc_delay[trio_index][mac] = delay ? : DEFAULT_RC_DELAY; + return 0; +} +early_param("pcie_rc_delay", setup_pcie_rc_delay); + +/* PCI initialization entry point, called by subsys_initcall. */ +int __init pcibios_init(void) +{ + resource_size_t offset; + LIST_HEAD(resources); + int next_busno; + int i; + + tile_pci_init(); + + if (num_rc_controllers == 0) + return 0; + + /* + * Delay a bit in case devices aren't ready. Some devices are + * known to require at least 20ms here, but we use a more + * conservative value. + */ + msleep(250); + + /* Scan all of the recorded PCI controllers. */ + for (next_busno = 0, i = 0; i < num_rc_controllers; i++) { + struct pci_controller *controller = &pci_controllers[i]; + gxio_trio_context_t *trio_context = controller->trio; + TRIO_PCIE_INTFC_PORT_STATUS_t port_status; + TRIO_PCIE_INTFC_TX_FIFO_CTL_t tx_fifo_ctl; + struct pci_bus *bus; + unsigned int reg_offset; + unsigned int class_code_revision; + int trio_index; + int mac; + int ret; + + if (trio_context->fd < 0) + continue; + + trio_index = controller->trio_index; + mac = controller->mac; + + /* + * Check for PCIe link-up status to decide if we need + * to force the link to come up. + */ + reg_offset = + (TRIO_PCIE_INTFC_PORT_STATUS << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + port_status.word = + __gxio_mmio_read(trio_context->mmio_base_mac + + reg_offset); + if (!port_status.dl_up) { + if (rc_delay[trio_index][mac]) { + pr_info("Delaying PCIe RC TRIO init %d sec" + " on MAC %d on TRIO %d\n", + rc_delay[trio_index][mac], mac, + trio_index); + msleep(rc_delay[trio_index][mac] * 1000); + } + ret = gxio_trio_force_rc_link_up(trio_context, mac); + if (ret < 0) + pr_err("PCI: PCIE_FORCE_LINK_UP failure, " + "MAC %d on TRIO %d\n", mac, trio_index); + } + + pr_info("PCI: Found PCI controller #%d on TRIO %d MAC %d\n", i, + trio_index, controller->mac); + + /* Delay the bus probe if needed. */ + if (rc_delay[trio_index][mac]) { + pr_info("Delaying PCIe RC bus enumerating %d sec" + " on MAC %d on TRIO %d\n", + rc_delay[trio_index][mac], mac, + trio_index); + msleep(rc_delay[trio_index][mac] * 1000); + } else { + /* + * Wait a bit here because some EP devices + * take longer to come up. + */ + msleep(1000); + } + + /* Check for PCIe link-up status again. */ + port_status.word = + __gxio_mmio_read(trio_context->mmio_base_mac + + reg_offset); + if (!port_status.dl_up) { + if (pcie_ports[trio_index].ports[mac].removable) { + pr_info("PCI: link is down, MAC %d on TRIO %d\n", + mac, trio_index); + pr_info("This is expected if no PCIe card" + " is connected to this link\n"); + } else + pr_err("PCI: link is down, MAC %d on TRIO %d\n", + mac, trio_index); + continue; + } + + /* + * Ensure that the link can come out of L1 power down state. + * Strictly speaking, this is needed only in the case of + * heavy RC-initiated DMAs. + */ + reg_offset = + (TRIO_PCIE_INTFC_TX_FIFO_CTL << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + tx_fifo_ctl.word = + __gxio_mmio_read(trio_context->mmio_base_mac + + reg_offset); + tx_fifo_ctl.min_p_credits = 0; + __gxio_mmio_write(trio_context->mmio_base_mac + reg_offset, + tx_fifo_ctl.word); + + /* + * Change the device ID so that Linux bus crawl doesn't confuse + * the internal bridge with any Tilera endpoints. + */ + reg_offset = + (TRIO_PCIE_RC_DEVICE_ID_VEN_ID << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + __gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset, + (TILERA_GX36_RC_DEV_ID << + TRIO_PCIE_RC_DEVICE_ID_VEN_ID__DEV_ID_SHIFT) | + TILERA_VENDOR_ID); + + /* Set the internal P2P bridge class code. */ + reg_offset = + (TRIO_PCIE_RC_REVISION_ID << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_STANDARD << + TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) | + (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + class_code_revision = + __gxio_mmio_read32(trio_context->mmio_base_mac + + reg_offset); + class_code_revision = (class_code_revision & 0xff) | + (PCI_CLASS_BRIDGE_PCI << 16); + + __gxio_mmio_write32(trio_context->mmio_base_mac + + reg_offset, class_code_revision); + +#ifdef USE_SHARED_PCIE_CONFIG_REGION + + /* Map in the MMIO space for the PIO region. */ + offset = HV_TRIO_PIO_OFFSET(trio_context->pio_cfg_index) | + (((unsigned long long)mac) << + TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT); + +#else + + /* Alloc a PIO region for PCI config access per MAC. */ + ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); + if (ret < 0) { + pr_err("PCI: PCI CFG PIO alloc failure for mac %d " + "on TRIO %d, give up\n", mac, trio_index); + + continue; + } + + trio_context->pio_cfg_index[mac] = ret; + + /* For PIO CFG, the bus_address_hi parameter is 0. */ + ret = gxio_trio_init_pio_region_aux(trio_context, + trio_context->pio_cfg_index[mac], + mac, 0, HV_TRIO_PIO_FLAG_CONFIG_SPACE); + if (ret < 0) { + pr_err("PCI: PCI CFG PIO init failure for mac %d " + "on TRIO %d, give up\n", mac, trio_index); + + continue; + } + + offset = HV_TRIO_PIO_OFFSET(trio_context->pio_cfg_index[mac]) | + (((unsigned long long)mac) << + TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT); + +#endif + + /* + * To save VMALLOC space, we take advantage of the fact that + * bit 29 in the PIO CFG address format is reserved 0. With + * TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT being 30, + * this cuts VMALLOC space usage from 1GB to 512MB per mac. + */ + trio_context->mmio_base_pio_cfg[mac] = + iorpc_ioremap(trio_context->fd, offset, (1UL << + (TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT - 1))); + if (trio_context->mmio_base_pio_cfg[mac] == NULL) { + pr_err("PCI: PIO map failure for mac %d on TRIO %d\n", + mac, trio_index); + + continue; + } + + /* Initialize the PCIe interrupts. */ + if (tile_init_irqs(controller)) { + pr_err("PCI: IRQs init failure for mac %d on TRIO %d\n", + mac, trio_index); + + continue; + } + + /* + * The PCI memory resource is located above the PA space. + * The memory range for the PCI root bus should not overlap + * with the physical RAM. + */ + pci_add_resource_offset(&resources, &controller->mem_space, + controller->mem_offset); + pci_add_resource(&resources, &controller->io_space); + controller->first_busno = next_busno; + bus = pci_scan_root_bus(NULL, next_busno, controller->ops, + controller, &resources); + controller->root_bus = bus; + next_busno = bus->busn_res.end + 1; + } + + /* Do machine dependent PCI interrupt routing */ + pci_fixup_irqs(pci_common_swizzle, tile_map_irq); + + /* + * This comes from the generic Linux PCI driver. + * + * It allocates all of the resources (I/O memory, etc) + * associated with the devices read in above. + */ + pci_assign_unassigned_resources(); + + /* Record the I/O resources in the PCI controller structure. */ + for (i = 0; i < num_rc_controllers; i++) { + struct pci_controller *controller = &pci_controllers[i]; + gxio_trio_context_t *trio_context = controller->trio; + struct pci_bus *root_bus = pci_controllers[i].root_bus; + int ret; + int j; + + /* + * Skip controllers that are not properly initialized or + * have down links. + */ + if (root_bus == NULL) + continue; + + /* Configure the max_payload_size values for this domain. */ + fixup_read_and_payload_sizes(controller); + + /* Alloc a PIO region for PCI memory access for each RC port. */ + ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); + if (ret < 0) { + pr_err("PCI: MEM PIO alloc failure on TRIO %d mac %d, " + "give up\n", controller->trio_index, + controller->mac); + + continue; + } + + controller->pio_mem_index = ret; + + /* + * For PIO MEM, the bus_address_hi parameter is hard-coded 0 + * because we always assign 32-bit PCI bus BAR ranges. + */ + ret = gxio_trio_init_pio_region_aux(trio_context, + controller->pio_mem_index, + controller->mac, + 0, + 0); + if (ret < 0) { + pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, " + "give up\n", controller->trio_index, + controller->mac); + + continue; + } + +#ifdef CONFIG_TILE_PCI_IO + /* + * Alloc a PIO region for PCI I/O space access for each RC port. + */ + ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0); + if (ret < 0) { + pr_err("PCI: I/O PIO alloc failure on TRIO %d mac %d, " + "give up\n", controller->trio_index, + controller->mac); + + continue; + } + + controller->pio_io_index = ret; + + /* + * For PIO IO, the bus_address_hi parameter is hard-coded 0 + * because PCI I/O address space is 32-bit. + */ + ret = gxio_trio_init_pio_region_aux(trio_context, + controller->pio_io_index, + controller->mac, + 0, + HV_TRIO_PIO_FLAG_IO_SPACE); + if (ret < 0) { + pr_err("PCI: I/O PIO init failure on TRIO %d mac %d, " + "give up\n", controller->trio_index, + controller->mac); + + continue; + } +#endif + + /* + * Configure a Mem-Map region for each memory controller so + * that Linux can map all of its PA space to the PCI bus. + * Use the IOMMU to handle hash-for-home memory. + */ + for_each_online_node(j) { + unsigned long start_pfn = node_start_pfn[j]; + unsigned long end_pfn = node_end_pfn[j]; + unsigned long nr_pages = end_pfn - start_pfn; + + ret = gxio_trio_alloc_memory_maps(trio_context, 1, 0, + 0); + if (ret < 0) { + pr_err("PCI: Mem-Map alloc failure on TRIO %d " + "mac %d for MC %d, give up\n", + controller->trio_index, + controller->mac, j); + + goto alloc_mem_map_failed; + } + + controller->mem_maps[j] = ret; + + /* + * Initialize the Mem-Map and the I/O MMU so that all + * the physical memory can be accessed by the endpoint + * devices. The base bus address is set to the base CPA + * of this memory controller plus an offset (see pci.h). + * The region's base VA is set to the base CPA. The + * I/O MMU table essentially translates the CPA to + * the real PA. Implicitly, for node 0, we create + * a separate Mem-Map region that serves as the inbound + * window for legacy 32-bit devices. This is a direct + * map of the low 4GB CPA space. + */ + ret = gxio_trio_init_memory_map_mmu_aux(trio_context, + controller->mem_maps[j], + start_pfn << PAGE_SHIFT, + nr_pages << PAGE_SHIFT, + trio_context->asid, + controller->mac, + (start_pfn << PAGE_SHIFT) + + TILE_PCI_MEM_MAP_BASE_OFFSET, + j, + GXIO_TRIO_ORDER_MODE_UNORDERED); + if (ret < 0) { + pr_err("PCI: Mem-Map init failure on TRIO %d " + "mac %d for MC %d, give up\n", + controller->trio_index, + controller->mac, j); + + goto alloc_mem_map_failed; + } + continue; + +alloc_mem_map_failed: + break; + } + } + + return 0; +} +subsys_initcall(pcibios_init); + +/* No bus fixups needed. */ +void pcibios_fixup_bus(struct pci_bus *bus) +{ +} + +/* Process any "pci=" kernel boot arguments. */ +char *__init pcibios_setup(char *str) +{ + if (!strcmp(str, "off")) { + pci_probe = 0; + return NULL; + } + return str; +} + +/* + * Called for each device after PCI setup is done. + * We initialize the PCI device capabilities conservatively, assuming that + * all devices can only address the 32-bit DMA space. The exception here is + * that the device dma_offset is set to the value that matches the 64-bit + * capable devices. This is OK because dma_offset is not used by legacy + * dma_ops, nor by the hybrid dma_ops's streaming DMAs, which are 64-bit ops. + * This implementation matches the kernel design of setting PCI devices' + * coherent_dma_mask to 0xffffffffull by default, allowing the device drivers + * to skip calling pci_set_consistent_dma_mask(DMA_BIT_MASK(32)). + */ +static void pcibios_fixup_final(struct pci_dev *pdev) +{ + set_dma_ops(&pdev->dev, gx_legacy_pci_dma_map_ops); + set_dma_offset(&pdev->dev, TILE_PCI_MEM_MAP_BASE_OFFSET); + pdev->dev.archdata.max_direct_dma_addr = + TILE_PCI_MAX_DIRECT_DMA_ADDRESS; + pdev->dev.coherent_dma_mask = TILE_PCI_MAX_DIRECT_DMA_ADDRESS; +} +DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final); + +/* Map a PCI MMIO bus address into VA space. */ +void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) +{ + struct pci_controller *controller = NULL; + resource_size_t bar_start; + resource_size_t bar_end; + resource_size_t offset; + resource_size_t start; + resource_size_t end; + int trio_fd; + int i; + + start = phys_addr; + end = phys_addr + size - 1; + + /* + * By searching phys_addr in each controller's mem_space, we can + * determine the controller that should accept the PCI memory access. + */ + for (i = 0; i < num_rc_controllers; i++) { + /* + * Skip controllers that are not properly initialized or + * have down links. + */ + if (pci_controllers[i].root_bus == NULL) + continue; + + bar_start = pci_controllers[i].mem_space.start; + bar_end = pci_controllers[i].mem_space.end; + + if ((start >= bar_start) && (end <= bar_end)) { + controller = &pci_controllers[i]; + break; + } + } + + if (controller == NULL) + return NULL; + + trio_fd = controller->trio->fd; + + /* Convert the resource start to the bus address offset. */ + start = phys_addr - controller->mem_offset; + + offset = HV_TRIO_PIO_OFFSET(controller->pio_mem_index) + start; + + /* We need to keep the PCI bus address's in-page offset in the VA. */ + return iorpc_ioremap(trio_fd, offset, size) + + (start & (PAGE_SIZE - 1)); +} +EXPORT_SYMBOL(ioremap); + +#ifdef CONFIG_TILE_PCI_IO +/* Map a PCI I/O address into VA space. */ +void __iomem *ioport_map(unsigned long port, unsigned int size) +{ + struct pci_controller *controller = NULL; + resource_size_t bar_start; + resource_size_t bar_end; + resource_size_t offset; + resource_size_t start; + resource_size_t end; + int trio_fd; + int i; + + start = port; + end = port + size - 1; + + /* + * By searching the port in each controller's io_space, we can + * determine the controller that should accept the PCI I/O access. + */ + for (i = 0; i < num_rc_controllers; i++) { + /* + * Skip controllers that are not properly initialized or + * have down links. + */ + if (pci_controllers[i].root_bus == NULL) + continue; + + bar_start = pci_controllers[i].io_space.start; + bar_end = pci_controllers[i].io_space.end; + + if ((start >= bar_start) && (end <= bar_end)) { + controller = &pci_controllers[i]; + break; + } + } + + if (controller == NULL) + return NULL; + + trio_fd = controller->trio->fd; + + /* Convert the resource start to the bus address offset. */ + port -= controller->io_space.start; + + offset = HV_TRIO_PIO_OFFSET(controller->pio_io_index) + port; + + /* We need to keep the PCI bus address's in-page offset in the VA. */ + return iorpc_ioremap(trio_fd, offset, size) + (port & (PAGE_SIZE - 1)); +} +EXPORT_SYMBOL(ioport_map); + +void ioport_unmap(void __iomem *addr) +{ + iounmap(addr); +} +EXPORT_SYMBOL(ioport_unmap); +#endif + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + iounmap(addr); +} +EXPORT_SYMBOL(pci_iounmap); + +/**************************************************************** + * + * Tile PCI config space read/write routines + * + ****************************************************************/ + +/* + * These are the normal read and write ops + * These are expanded with macros from pci_bus_read_config_byte() etc. + * + * devfn is the combined PCI device & function. + * + * offset is in bytes, from the start of config space for the + * specified bus & device. + */ +static int tile_cfg_read(struct pci_bus *bus, unsigned int devfn, int offset, + int size, u32 *val) +{ + struct pci_controller *controller = bus->sysdata; + gxio_trio_context_t *trio_context = controller->trio; + int busnum = bus->number & 0xff; + int device = PCI_SLOT(devfn); + int function = PCI_FUNC(devfn); + int config_type = 1; + TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR_t cfg_addr; + void *mmio_addr; + + /* + * Map all accesses to the local device on root bus into the + * MMIO space of the MAC. Accesses to the downstream devices + * go to the PIO space. + */ + if (pci_is_root_bus(bus)) { + if (device == 0) { + /* + * This is the internal downstream P2P bridge, + * access directly. + */ + unsigned int reg_offset; + + reg_offset = ((offset & 0xFFF) << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_PROTECTED + << TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) | + (controller->mac << + TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + mmio_addr = trio_context->mmio_base_mac + reg_offset; + + goto valid_device; + + } else { + /* + * We fake an empty device for (device > 0), + * since there is only one device on bus 0. + */ + goto invalid_device; + } + } + + /* + * Accesses to the directly attached device have to be + * sent as type-0 configs. + */ + if (busnum == (controller->first_busno + 1)) { + /* + * There is only one device off of our built-in P2P bridge. + */ + if (device != 0) + goto invalid_device; + + config_type = 0; + } + + cfg_addr.word = 0; + cfg_addr.reg_addr = (offset & 0xFFF); + cfg_addr.fn = function; + cfg_addr.dev = device; + cfg_addr.bus = busnum; + cfg_addr.type = config_type; + + /* + * Note that we don't set the mac field in cfg_addr because the + * mapping is per port. + */ + mmio_addr = trio_context->mmio_base_pio_cfg[controller->mac] + + cfg_addr.word; + +valid_device: + + switch (size) { + case 4: + *val = __gxio_mmio_read32(mmio_addr); + break; + + case 2: + *val = __gxio_mmio_read16(mmio_addr); + break; + + case 1: + *val = __gxio_mmio_read8(mmio_addr); + break; + + default: + return PCIBIOS_FUNC_NOT_SUPPORTED; + } + + TRACE_CFG_RD(size, *val, busnum, device, function, offset); + + return 0; + +invalid_device: + + switch (size) { + case 4: + *val = 0xFFFFFFFF; + break; + + case 2: + *val = 0xFFFF; + break; + + case 1: + *val = 0xFF; + break; + + default: + return PCIBIOS_FUNC_NOT_SUPPORTED; + } + + return 0; +} + + +/* + * See tile_cfg_read() for relevent comments. + * Note that "val" is the value to write, not a pointer to that value. + */ +static int tile_cfg_write(struct pci_bus *bus, unsigned int devfn, int offset, + int size, u32 val) +{ + struct pci_controller *controller = bus->sysdata; + gxio_trio_context_t *trio_context = controller->trio; + int busnum = bus->number & 0xff; + int device = PCI_SLOT(devfn); + int function = PCI_FUNC(devfn); + int config_type = 1; + TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR_t cfg_addr; + void *mmio_addr; + u32 val_32 = (u32)val; + u16 val_16 = (u16)val; + u8 val_8 = (u8)val; + + /* + * Map all accesses to the local device on root bus into the + * MMIO space of the MAC. Accesses to the downstream devices + * go to the PIO space. + */ + if (pci_is_root_bus(bus)) { + if (device == 0) { + /* + * This is the internal downstream P2P bridge, + * access directly. + */ + unsigned int reg_offset; + + reg_offset = ((offset & 0xFFF) << + TRIO_CFG_REGION_ADDR__REG_SHIFT) | + (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_PROTECTED + << TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) | + (controller->mac << + TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT); + + mmio_addr = trio_context->mmio_base_mac + reg_offset; + + goto valid_device; + + } else { + /* + * We fake an empty device for (device > 0), + * since there is only one device on bus 0. + */ + goto invalid_device; + } + } + + /* + * Accesses to the directly attached device have to be + * sent as type-0 configs. + */ + if (busnum == (controller->first_busno + 1)) { + /* + * There is only one device off of our built-in P2P bridge. + */ + if (device != 0) + goto invalid_device; + + config_type = 0; + } + + cfg_addr.word = 0; + cfg_addr.reg_addr = (offset & 0xFFF); + cfg_addr.fn = function; + cfg_addr.dev = device; + cfg_addr.bus = busnum; + cfg_addr.type = config_type; + + /* + * Note that we don't set the mac field in cfg_addr because the + * mapping is per port. + */ + mmio_addr = trio_context->mmio_base_pio_cfg[controller->mac] + + cfg_addr.word; + +valid_device: + + switch (size) { + case 4: + __gxio_mmio_write32(mmio_addr, val_32); + TRACE_CFG_WR(size, val_32, busnum, device, function, offset); + break; + + case 2: + __gxio_mmio_write16(mmio_addr, val_16); + TRACE_CFG_WR(size, val_16, busnum, device, function, offset); + break; + + case 1: + __gxio_mmio_write8(mmio_addr, val_8); + TRACE_CFG_WR(size, val_8, busnum, device, function, offset); + break; + + default: + return PCIBIOS_FUNC_NOT_SUPPORTED; + } + +invalid_device: + + return 0; +} + + +static struct pci_ops tile_cfg_ops = { + .read = tile_cfg_read, + .write = tile_cfg_write, +}; + + +/* MSI support starts here. */ +static unsigned int tilegx_msi_startup(struct irq_data *d) +{ + if (d->msi_desc) + unmask_msi_irq(d); + + return 0; +} + +static void tilegx_msi_ack(struct irq_data *d) +{ + __insn_mtspr(SPR_IPI_EVENT_RESET_K, 1UL << d->irq); +} + +static void tilegx_msi_mask(struct irq_data *d) +{ + mask_msi_irq(d); + __insn_mtspr(SPR_IPI_MASK_SET_K, 1UL << d->irq); +} + +static void tilegx_msi_unmask(struct irq_data *d) +{ + __insn_mtspr(SPR_IPI_MASK_RESET_K, 1UL << d->irq); + unmask_msi_irq(d); +} + +static struct irq_chip tilegx_msi_chip = { + .name = "tilegx_msi", + .irq_startup = tilegx_msi_startup, + .irq_ack = tilegx_msi_ack, + .irq_mask = tilegx_msi_mask, + .irq_unmask = tilegx_msi_unmask, + + /* TBD: support set_affinity. */ +}; + +int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) +{ + struct pci_controller *controller; + gxio_trio_context_t *trio_context; + struct msi_msg msg; + int default_irq; + uint64_t mem_map_base; + uint64_t mem_map_limit; + u64 msi_addr; + int mem_map; + int cpu; + int irq; + int ret; + + irq = irq_alloc_hwirq(-1); + if (!irq) + return -ENOSPC; + + /* + * Since we use a 64-bit Mem-Map to accept the MSI write, we fail + * devices that are not capable of generating a 64-bit message address. + * These devices will fall back to using the legacy interrupts. + * Most PCIe endpoint devices do support 64-bit message addressing. + */ + if (desc->msi_attrib.is_64 == 0) { + dev_printk(KERN_INFO, &pdev->dev, + "64-bit MSI message address not supported, " + "falling back to legacy interrupts.\n"); + + ret = -ENOMEM; + goto is_64_failure; + } + + default_irq = desc->msi_attrib.default_irq; + controller = irq_get_handler_data(default_irq); + + BUG_ON(!controller); + + trio_context = controller->trio; + + /* + * Allocate a scatter-queue that will accept the MSI write and + * trigger the TILE-side interrupts. We use the scatter-queue regions + * before the mem map regions, because the latter are needed by more + * applications. + */ + mem_map = gxio_trio_alloc_scatter_queues(trio_context, 1, 0, 0); + if (mem_map >= 0) { + TRIO_MAP_SQ_DOORBELL_FMT_t doorbell_template = {{ + .pop = 0, + .doorbell = 1, + }}; + + mem_map += TRIO_NUM_MAP_MEM_REGIONS; + mem_map_base = MEM_MAP_INTR_REGIONS_BASE + + mem_map * MEM_MAP_INTR_REGION_SIZE; + mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1; + + msi_addr = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 8; + msg.data = (unsigned int)doorbell_template.word; + } else { + /* SQ regions are out, allocate from map mem regions. */ + mem_map = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0); + if (mem_map < 0) { + dev_printk(KERN_INFO, &pdev->dev, + "%s Mem-Map alloc failure. " + "Failed to initialize MSI interrupts. " + "Falling back to legacy interrupts.\n", + desc->msi_attrib.is_msix ? "MSI-X" : "MSI"); + ret = -ENOMEM; + goto msi_mem_map_alloc_failure; + } + + mem_map_base = MEM_MAP_INTR_REGIONS_BASE + + mem_map * MEM_MAP_INTR_REGION_SIZE; + mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1; + + msi_addr = mem_map_base + TRIO_MAP_MEM_REG_INT3 - + TRIO_MAP_MEM_REG_INT0; + + msg.data = mem_map; + } + + /* We try to distribute different IRQs to different tiles. */ + cpu = tile_irq_cpu(irq); + + /* + * Now call up to the HV to configure the MSI interrupt and + * set up the IPI binding. + */ + ret = gxio_trio_config_msi_intr(trio_context, cpu_x(cpu), cpu_y(cpu), + KERNEL_PL, irq, controller->mac, + mem_map, mem_map_base, mem_map_limit, + trio_context->asid); + if (ret < 0) { + dev_printk(KERN_INFO, &pdev->dev, "HV MSI config failed.\n"); + + goto hv_msi_config_failure; + } + + irq_set_msi_desc(irq, desc); + + msg.address_hi = msi_addr >> 32; + msg.address_lo = msi_addr & 0xffffffff; + + write_msi_msg(irq, &msg); + irq_set_chip_and_handler(irq, &tilegx_msi_chip, handle_level_irq); + irq_set_handler_data(irq, controller); + + return 0; + +hv_msi_config_failure: + /* Free mem-map */ +msi_mem_map_alloc_failure: +is_64_failure: + irq_free_hwirq(irq); + return ret; +} + +void arch_teardown_msi_irq(unsigned int irq) +{ + irq_free_hwirq(irq); +} diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c new file mode 100644 index 00000000000..2bf6c9c135c --- /dev/null +++ b/arch/tile/kernel/perf_event.c @@ -0,0 +1,1005 @@ +/* + * Copyright 2014 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * Perf_events support for Tile processor. + * + * This code is based upon the x86 perf event + * code, which is: + * + * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> + * Copyright (C) 2009 Google, Inc., Stephane Eranian + */ + +#include <linux/kprobes.h> +#include <linux/kernel.h> +#include <linux/kdebug.h> +#include <linux/mutex.h> +#include <linux/bitmap.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/perf_event.h> +#include <linux/atomic.h> +#include <asm/traps.h> +#include <asm/stack.h> +#include <asm/pmc.h> +#include <hv/hypervisor.h> + +#define TILE_MAX_COUNTERS 4 + +#define PERF_COUNT_0_IDX 0 +#define PERF_COUNT_1_IDX 1 +#define AUX_PERF_COUNT_0_IDX 2 +#define AUX_PERF_COUNT_1_IDX 3 + +struct cpu_hw_events { + int n_events; + struct perf_event *events[TILE_MAX_COUNTERS]; /* counter order */ + struct perf_event *event_list[TILE_MAX_COUNTERS]; /* enabled + order */ + int assign[TILE_MAX_COUNTERS]; + unsigned long active_mask[BITS_TO_LONGS(TILE_MAX_COUNTERS)]; + unsigned long used_mask; +}; + +/* TILE arch specific performance monitor unit */ +struct tile_pmu { + const char *name; + int version; + const int *hw_events; /* generic hw events table */ + /* generic hw cache events table */ + const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + int (*map_hw_event)(u64); /*method used to map + hw events */ + int (*map_cache_event)(u64); /*method used to map + cache events */ + + u64 max_period; /* max sampling period */ + u64 cntval_mask; /* counter width mask */ + int cntval_bits; /* counter width */ + int max_events; /* max generic hw events + in map */ + int num_counters; /* number base + aux counters */ + int num_base_counters; /* number base counters */ +}; + +DEFINE_PER_CPU(u64, perf_irqs); +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +#define TILE_OP_UNSUPP (-1) + +#ifndef __tilegx__ +/* TILEPro hardware events map */ +static const int tile_hw_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0x01, /* ONE */ + [PERF_COUNT_HW_INSTRUCTIONS] = 0x06, /* MP_BUNDLE_RETIRED */ + [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP, + [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x16, /* + MP_CONDITIONAL_BRANCH_ISSUED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x14, /* + MP_CONDITIONAL_BRANCH_MISSPREDICT */ + [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP, +}; +#else +/* TILEGx hardware events map */ +static const int tile_hw_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0x181, /* ONE */ + [PERF_COUNT_HW_INSTRUCTIONS] = 0xdb, /* INSTRUCTION_BUNDLE */ + [PERF_COUNT_HW_CACHE_REFERENCES] = TILE_OP_UNSUPP, + [PERF_COUNT_HW_CACHE_MISSES] = TILE_OP_UNSUPP, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0xd9, /* + COND_BRANCH_PRED_CORRECT */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0xda, /* + COND_BRANCH_PRED_INCORRECT */ + [PERF_COUNT_HW_BUS_CYCLES] = TILE_OP_UNSUPP, +}; +#endif + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Generalized hw caching related hw_event table, filled + * in on a per model basis. A value of -1 means + * 'not supported', any other value means the + * raw hw_event ID. + */ +#ifndef __tilegx__ +/* TILEPro hardware cache event map */ +static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = 0x21, /* RD_MISS */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = 0x22, /* WR_MISS */ + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x12, /* MP_ICACHE_HIT_ISSUED */ + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x1d, /* TLB_CNT */ + [C(RESULT_MISS)] = 0x20, /* TLB_EXCEPTION */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x13, /* MP_ITLB_HIT_ISSUED */ + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +}; +#else +/* TILEGx hardware events map */ +static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + /* + * Like some other architectures (e.g. ARM), the performance + * counters don't differentiate between read and write + * accesses/misses, so this isn't strictly correct, but it's the + * best we can do. Writes and reads get combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = 0x44, /* RD_MISS */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = 0x45, /* WR_MISS */ + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */ + [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x40, /* TLB_CNT */ + [C(RESULT_MISS)] = 0x43, /* TLB_EXCEPTION */ + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */ + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = 0xd4, /* ITLB_MISS_INT */ + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = TILE_OP_UNSUPP, + [C(RESULT_MISS)] = TILE_OP_UNSUPP, + }, +}, +}; +#endif + +static atomic_t tile_active_events; +static DEFINE_MUTEX(perf_intr_reserve_mutex); + +static int tile_map_hw_event(u64 config); +static int tile_map_cache_event(u64 config); + +static int tile_pmu_handle_irq(struct pt_regs *regs, int fault); + +/* + * To avoid new_raw_count getting larger then pre_raw_count + * in tile_perf_event_update(), we limit the value of max_period to 2^31 - 1. + */ +static const struct tile_pmu tilepmu = { +#ifndef __tilegx__ + .name = "tilepro", +#else + .name = "tilegx", +#endif + .max_events = ARRAY_SIZE(tile_hw_event_map), + .map_hw_event = tile_map_hw_event, + .hw_events = tile_hw_event_map, + .map_cache_event = tile_map_cache_event, + .cache_events = &tile_cache_event_map, + .cntval_bits = 32, + .cntval_mask = (1ULL << 32) - 1, + .max_period = (1ULL << 31) - 1, + .num_counters = TILE_MAX_COUNTERS, + .num_base_counters = TILE_BASE_COUNTERS, +}; + +static const struct tile_pmu *tile_pmu __read_mostly; + +/* + * Check whether perf event is enabled. + */ +int tile_perf_enabled(void) +{ + return atomic_read(&tile_active_events) != 0; +} + +/* + * Read Performance Counters. + */ +static inline u64 read_counter(int idx) +{ + u64 val = 0; + + /* __insn_mfspr() only takes an immediate argument */ + switch (idx) { + case PERF_COUNT_0_IDX: + val = __insn_mfspr(SPR_PERF_COUNT_0); + break; + case PERF_COUNT_1_IDX: + val = __insn_mfspr(SPR_PERF_COUNT_1); + break; + case AUX_PERF_COUNT_0_IDX: + val = __insn_mfspr(SPR_AUX_PERF_COUNT_0); + break; + case AUX_PERF_COUNT_1_IDX: + val = __insn_mfspr(SPR_AUX_PERF_COUNT_1); + break; + default: + WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX || + idx < PERF_COUNT_0_IDX); + } + + return val; +} + +/* + * Write Performance Counters. + */ +static inline void write_counter(int idx, u64 value) +{ + /* __insn_mtspr() only takes an immediate argument */ + switch (idx) { + case PERF_COUNT_0_IDX: + __insn_mtspr(SPR_PERF_COUNT_0, value); + break; + case PERF_COUNT_1_IDX: + __insn_mtspr(SPR_PERF_COUNT_1, value); + break; + case AUX_PERF_COUNT_0_IDX: + __insn_mtspr(SPR_AUX_PERF_COUNT_0, value); + break; + case AUX_PERF_COUNT_1_IDX: + __insn_mtspr(SPR_AUX_PERF_COUNT_1, value); + break; + default: + WARN_ON_ONCE(idx > AUX_PERF_COUNT_1_IDX || + idx < PERF_COUNT_0_IDX); + } +} + +/* + * Enable performance event by setting + * Performance Counter Control registers. + */ +static inline void tile_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + unsigned long cfg, mask; + int shift, idx = hwc->idx; + + /* + * prevent early activation from tile_pmu_start() in hw_perf_enable + */ + + if (WARN_ON_ONCE(idx == -1)) + return; + + if (idx < tile_pmu->num_base_counters) + cfg = __insn_mfspr(SPR_PERF_COUNT_CTL); + else + cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL); + + switch (idx) { + case PERF_COUNT_0_IDX: + case AUX_PERF_COUNT_0_IDX: + mask = TILE_EVENT_MASK; + shift = 0; + break; + case PERF_COUNT_1_IDX: + case AUX_PERF_COUNT_1_IDX: + mask = TILE_EVENT_MASK << 16; + shift = 16; + break; + default: + WARN_ON_ONCE(idx < PERF_COUNT_0_IDX || + idx > AUX_PERF_COUNT_1_IDX); + return; + } + + /* Clear mask bits to enable the event. */ + cfg &= ~mask; + cfg |= hwc->config << shift; + + if (idx < tile_pmu->num_base_counters) + __insn_mtspr(SPR_PERF_COUNT_CTL, cfg); + else + __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg); +} + +/* + * Disable performance event by clearing + * Performance Counter Control registers. + */ +static inline void tile_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + unsigned long cfg, mask; + int idx = hwc->idx; + + if (idx == -1) + return; + + if (idx < tile_pmu->num_base_counters) + cfg = __insn_mfspr(SPR_PERF_COUNT_CTL); + else + cfg = __insn_mfspr(SPR_AUX_PERF_COUNT_CTL); + + switch (idx) { + case PERF_COUNT_0_IDX: + case AUX_PERF_COUNT_0_IDX: + mask = TILE_PLM_MASK; + break; + case PERF_COUNT_1_IDX: + case AUX_PERF_COUNT_1_IDX: + mask = TILE_PLM_MASK << 16; + break; + default: + WARN_ON_ONCE(idx < PERF_COUNT_0_IDX || + idx > AUX_PERF_COUNT_1_IDX); + return; + } + + /* Set mask bits to disable the event. */ + cfg |= mask; + + if (idx < tile_pmu->num_base_counters) + __insn_mtspr(SPR_PERF_COUNT_CTL, cfg); + else + __insn_mtspr(SPR_AUX_PERF_COUNT_CTL, cfg); +} + +/* + * Propagate event elapsed time into the generic event. + * Can only be executed on the CPU where the event is active. + * Returns the delta events processed. + */ +static u64 tile_perf_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int shift = 64 - tile_pmu->cntval_bits; + u64 prev_raw_count, new_raw_count; + u64 oldval; + int idx = hwc->idx; + u64 delta; + + /* + * Careful: an NMI might modify the previous event value. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic event atomically: + */ +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = read_counter(idx); + + oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count); + if (oldval != prev_raw_count) + goto again; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +/* + * Set the next IRQ period, based on the hwc->period_left value. + * To be called with the event disabled in hw: + */ +static int tile_event_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + /* + * If we are way outside a reasonable range then just skip forward: + */ + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + if (left > tile_pmu->max_period) + left = tile_pmu->max_period; + + /* + * The hw event starts counting from this event offset, + * mark it to be able to extra future deltas: + */ + local64_set(&hwc->prev_count, (u64)-left); + + write_counter(idx, (u64)(-left) & tile_pmu->cntval_mask); + + perf_event_update_userpage(event); + + return ret; +} + +/* + * Stop the event but do not release the PMU counter + */ +static void tile_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (__test_and_clear_bit(idx, cpuc->active_mask)) { + tile_pmu_disable_event(event); + cpuc->events[hwc->idx] = NULL; + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + tile_perf_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +/* + * Start an event (without re-assigning counter) + */ +static void tile_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = event->hw.idx; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + tile_event_set_period(event); + } + + event->hw.state = 0; + + cpuc->events[idx] = event; + __set_bit(idx, cpuc->active_mask); + + unmask_pmc_interrupts(); + + tile_pmu_enable_event(event); + + perf_event_update_userpage(event); +} + +/* + * Add a single event to the PMU. + * + * The event is added to the group of enabled events + * but only if it can be scehduled with existing events. + */ +static int tile_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc; + unsigned long mask; + int b, max_cnt; + + hwc = &event->hw; + + /* + * We are full. + */ + if (cpuc->n_events == tile_pmu->num_counters) + return -ENOSPC; + + cpuc->event_list[cpuc->n_events] = event; + cpuc->n_events++; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + + /* + * Find first empty counter. + */ + max_cnt = tile_pmu->num_counters; + mask = ~cpuc->used_mask; + + /* Find next free counter. */ + b = find_next_bit(&mask, max_cnt, 0); + + /* Should not happen. */ + if (WARN_ON_ONCE(b == max_cnt)) + return -ENOSPC; + + /* + * Assign counter to event. + */ + event->hw.idx = b; + __set_bit(b, &cpuc->used_mask); + + /* + * Start if requested. + */ + if (flags & PERF_EF_START) + tile_pmu_start(event, PERF_EF_RELOAD); + + return 0; +} + +/* + * Delete a single event from the PMU. + * + * The event is deleted from the group of enabled events. + * If it is the last event, disable PMU interrupt. + */ +static void tile_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int i; + + /* + * Remove event from list, compact list if necessary. + */ + for (i = 0; i < cpuc->n_events; i++) { + if (cpuc->event_list[i] == event) { + while (++i < cpuc->n_events) + cpuc->event_list[i-1] = cpuc->event_list[i]; + --cpuc->n_events; + cpuc->events[event->hw.idx] = NULL; + __clear_bit(event->hw.idx, &cpuc->used_mask); + tile_pmu_stop(event, PERF_EF_UPDATE); + break; + } + } + /* + * If there are no events left, then mask PMU interrupt. + */ + if (cpuc->n_events == 0) + mask_pmc_interrupts(); + perf_event_update_userpage(event); +} + +/* + * Propagate event elapsed time into the event. + */ +static inline void tile_pmu_read(struct perf_event *event) +{ + tile_perf_event_update(event); +} + +/* + * Map generic events to Tile PMU. + */ +static int tile_map_hw_event(u64 config) +{ + if (config >= tile_pmu->max_events) + return -EINVAL; + return tile_pmu->hw_events[config]; +} + +/* + * Map generic hardware cache events to Tile PMU. + */ +static int tile_map_cache_event(u64 config) +{ + unsigned int cache_type, cache_op, cache_result; + int code; + + if (!tile_pmu->cache_events) + return -ENOENT; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + code = (*tile_pmu->cache_events)[cache_type][cache_op][cache_result]; + if (code == TILE_OP_UNSUPP) + return -EINVAL; + + return code; +} + +static void tile_event_destroy(struct perf_event *event) +{ + if (atomic_dec_return(&tile_active_events) == 0) + release_pmc_hardware(); +} + +static int __tile_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + int code; + + switch (attr->type) { + case PERF_TYPE_HARDWARE: + code = tile_pmu->map_hw_event(attr->config); + break; + case PERF_TYPE_HW_CACHE: + code = tile_pmu->map_cache_event(attr->config); + break; + case PERF_TYPE_RAW: + code = attr->config & TILE_EVENT_MASK; + break; + default: + /* Should not happen. */ + return -EOPNOTSUPP; + } + + if (code < 0) + return code; + + hwc->config = code; + hwc->idx = -1; + + if (attr->exclude_user) + hwc->config |= TILE_CTL_EXCL_USER; + + if (attr->exclude_kernel) + hwc->config |= TILE_CTL_EXCL_KERNEL; + + if (attr->exclude_hv) + hwc->config |= TILE_CTL_EXCL_HV; + + if (!hwc->sample_period) { + hwc->sample_period = tile_pmu->max_period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + event->destroy = tile_event_destroy; + return 0; +} + +static int tile_event_init(struct perf_event *event) +{ + int err = 0; + perf_irq_t old_irq_handler = NULL; + + if (atomic_inc_return(&tile_active_events) == 1) + old_irq_handler = reserve_pmc_hardware(tile_pmu_handle_irq); + + if (old_irq_handler) { + pr_warn("PMC hardware busy (reserved by oprofile)\n"); + + atomic_dec(&tile_active_events); + return -EBUSY; + } + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + + err = __tile_event_init(event); + if (err) { + if (event->destroy) + event->destroy(event); + } + return err; +} + +static struct pmu tilera_pmu = { + .event_init = tile_event_init, + .add = tile_pmu_add, + .del = tile_pmu_del, + + .start = tile_pmu_start, + .stop = tile_pmu_stop, + + .read = tile_pmu_read, +}; + +/* + * PMU's IRQ handler, PMU has 2 interrupts, they share the same handler. + */ +int tile_pmu_handle_irq(struct pt_regs *regs, int fault) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + struct hw_perf_event *hwc; + u64 val; + unsigned long status; + int bit; + + __get_cpu_var(perf_irqs)++; + + if (!atomic_read(&tile_active_events)) + return 0; + + status = pmc_get_overflow(); + pmc_ack_overflow(status); + + for_each_set_bit(bit, &status, tile_pmu->num_counters) { + + event = cpuc->events[bit]; + + if (!event) + continue; + + if (!test_bit(bit, cpuc->active_mask)) + continue; + + hwc = &event->hw; + + val = tile_perf_event_update(event); + if (val & (1ULL << (tile_pmu->cntval_bits - 1))) + continue; + + perf_sample_data_init(&data, 0, event->hw.last_period); + if (!tile_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + tile_pmu_stop(event, 0); + } + + return 0; +} + +static bool __init supported_pmu(void) +{ + tile_pmu = &tilepmu; + return true; +} + +int __init init_hw_perf_events(void) +{ + supported_pmu(); + perf_pmu_register(&tilera_pmu, "cpu", PERF_TYPE_RAW); + return 0; +} +arch_initcall(init_hw_perf_events); + +/* Callchain handling code. */ + +/* + * Tile specific backtracing code for perf_events. + */ +static inline void perf_callchain(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + struct KBacktraceIterator kbt; + unsigned int i; + + /* + * Get the address just after the "jalr" instruction that + * jumps to the handler for a syscall. When we find this + * address in a backtrace, we silently ignore it, which gives + * us a one-step backtrace connection from the sys_xxx() + * function in the kernel to the xxx() function in libc. + * Otherwise, we lose the ability to properly attribute time + * from the libc calls to the kernel implementations, since + * oprofile only considers PCs from backtraces a pair at a time. + */ + unsigned long handle_syscall_pc = handle_syscall_link_address(); + + KBacktraceIterator_init(&kbt, NULL, regs); + kbt.profile = 1; + + /* + * The sample for the pc is already recorded. Now we are adding the + * address of the callsites on the stack. Our iterator starts + * with the frame of the (already sampled) call site. If our + * iterator contained a "return address" field, we could have just + * used it and wouldn't have needed to skip the first + * frame. That's in effect what the arm and x86 versions do. + * Instead we peel off the first iteration to get the equivalent + * behavior. + */ + + if (KBacktraceIterator_end(&kbt)) + return; + KBacktraceIterator_next(&kbt); + + /* + * Set stack depth to 16 for user and kernel space respectively, that + * is, total 32 stack frames. + */ + for (i = 0; i < 16; ++i) { + unsigned long pc; + if (KBacktraceIterator_end(&kbt)) + break; + pc = kbt.it.pc; + if (pc != handle_syscall_pc) + perf_callchain_store(entry, pc); + KBacktraceIterator_next(&kbt); + } +} + +void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + perf_callchain(entry, regs); +} + +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + perf_callchain(entry, regs); +} diff --git a/arch/tile/kernel/pmc.c b/arch/tile/kernel/pmc.c new file mode 100644 index 00000000000..db62cc34b95 --- /dev/null +++ b/arch/tile/kernel/pmc.c @@ -0,0 +1,121 @@ +/* + * Copyright 2014 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <linux/atomic.h> +#include <linux/interrupt.h> + +#include <asm/processor.h> +#include <asm/pmc.h> + +perf_irq_t perf_irq = NULL; +int handle_perf_interrupt(struct pt_regs *regs, int fault) +{ + int retval; + + if (!perf_irq) + panic("Unexpected PERF_COUNT interrupt %d\n", fault); + + nmi_enter(); + retval = perf_irq(regs, fault); + nmi_exit(); + return retval; +} + +/* Reserve PMC hardware if it is available. */ +perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq) +{ + return cmpxchg(&perf_irq, NULL, new_perf_irq); +} +EXPORT_SYMBOL(reserve_pmc_hardware); + +/* Release PMC hardware. */ +void release_pmc_hardware(void) +{ + perf_irq = NULL; +} +EXPORT_SYMBOL(release_pmc_hardware); + + +/* + * Get current overflow status of each performance counter, + * and auxiliary performance counter. + */ +unsigned long +pmc_get_overflow(void) +{ + unsigned long status; + + /* + * merge base+aux into a single vector + */ + status = __insn_mfspr(SPR_PERF_COUNT_STS); + status |= __insn_mfspr(SPR_AUX_PERF_COUNT_STS) << TILE_BASE_COUNTERS; + return status; +} + +/* + * Clear the status bit for the corresponding counter, if written + * with a one. + */ +void +pmc_ack_overflow(unsigned long status) +{ + /* + * clear overflow status by writing ones + */ + __insn_mtspr(SPR_PERF_COUNT_STS, status); + __insn_mtspr(SPR_AUX_PERF_COUNT_STS, status >> TILE_BASE_COUNTERS); +} + +/* + * The perf count interrupts are masked and unmasked explicitly, + * and only here. The normal irq_enable() does not enable them, + * and irq_disable() does not disable them. That lets these + * routines drive the perf count interrupts orthogonally. + * + * We also mask the perf count interrupts on entry to the perf count + * interrupt handler in assembly code, and by default unmask them + * again (with interrupt critical section protection) just before + * returning from the interrupt. If the perf count handler returns + * a non-zero error code, then we don't re-enable them before returning. + * + * For Pro, we rely on both interrupts being in the same word to update + * them atomically so we never have one enabled and one disabled. + */ + +#if CHIP_HAS_SPLIT_INTR_MASK() +# if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 +# error Fix assumptions about which word PERF_COUNT interrupts are in +# endif +#endif + +static inline unsigned long long pmc_mask(void) +{ + unsigned long long mask = 1ULL << INT_PERF_COUNT; + mask |= 1ULL << INT_AUX_PERF_COUNT; + return mask; +} + +void unmask_pmc_interrupts(void) +{ + interrupt_mask_reset_mask(pmc_mask()); +} + +void mask_pmc_interrupts(void) +{ + interrupt_mask_set_mask(pmc_mask()); +} diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 2e02c41ddf3..6829a950864 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -22,11 +22,14 @@ #include <linux/proc_fs.h> #include <linux/sysctl.h> #include <linux/hardirq.h> +#include <linux/hugetlb.h> #include <linux/mman.h> +#include <asm/unaligned.h> #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/sections.h> #include <asm/homecache.h> +#include <asm/hardwall.h> #include <arch/chip.h> @@ -88,3 +91,71 @@ const struct seq_operations cpuinfo_op = { .stop = c_stop, .show = show_cpuinfo, }; + +/* + * Support /proc/tile directory + */ + +static int __init proc_tile_init(void) +{ + struct proc_dir_entry *root = proc_mkdir("tile", NULL); + if (root == NULL) + return 0; + + proc_tile_hardwall_init(root); + + return 0; +} + +arch_initcall(proc_tile_init); + +/* + * Support /proc/sys/tile directory + */ + +static struct ctl_table unaligned_subtable[] = { + { + .procname = "enabled", + .data = &unaligned_fixup, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "printk", + .data = &unaligned_printk, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "count", + .data = &unaligned_fixup_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + {} +}; + +static struct ctl_table unaligned_table[] = { + { + .procname = "unaligned_fixup", + .mode = 0555, + .child = unaligned_subtable + }, + {} +}; + +static struct ctl_path tile_path[] = { + { .procname = "tile" }, + { } +}; + +static int __init proc_sys_tile_init(void) +{ + register_sysctl_paths(tile_path, unaligned_table); + return 0; +} + +arch_initcall(proc_sys_tile_init); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index d0065103eb7..16ed5894875 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -25,23 +25,27 @@ #include <linux/hardirq.h> #include <linux/syscalls.h> #include <linux/kernel.h> -#include <asm/system.h> +#include <linux/tracehook.h> +#include <linux/signal.h> #include <asm/stack.h> +#include <asm/switch_to.h> #include <asm/homecache.h> #include <asm/syscalls.h> +#include <asm/traps.h> +#include <asm/setup.h> +#include <asm/uaccess.h> #ifdef CONFIG_HARDWALL #include <asm/hardwall.h> #endif #include <arch/chip.h> #include <arch/abi.h> - +#include <arch/sim_def.h> /* * Use the (x86) "idle=poll" option to prefer low latency when leaving the * idle loop over low power while in the idle loop, e.g. if we have * one thread per core and we want to get threads out of futex waits fast. */ -static int no_idle_nap; static int __init idle_setup(char *str) { if (!str) @@ -49,103 +53,28 @@ static int __init idle_setup(char *str) if (!strcmp(str, "poll")) { pr_info("using polling idle threads.\n"); - no_idle_nap = 1; - } else if (!strcmp(str, "halt")) - no_idle_nap = 0; - else - return -1; - - return 0; -} -early_param("idle", idle_setup); - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle(void) -{ - int cpu = smp_processor_id(); - - - current_thread_info()->status |= TS_POLLING; - - if (no_idle_nap) { - while (1) { - while (!need_resched()) - cpu_relax(); - schedule(); - } - } - - /* endless idle loop with no priority at all */ - while (1) { - tick_nohz_stop_sched_tick(1); - while (!need_resched()) { - if (cpu_is_offline(cpu)) - BUG(); /* no HOTPLUG_CPU */ - - local_irq_disable(); - __get_cpu_var(irq_stat).idle_timestamp = jiffies; - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - - if (!need_resched()) - _cpu_idle(); - else - local_irq_enable(); - current_thread_info()->status |= TS_POLLING; - } - tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + cpu_idle_poll_ctrl(true); + return 0; + } else if (!strcmp(str, "halt")) { + return 0; } + return -1; } +early_param("idle", idle_setup); -struct thread_info *alloc_thread_info_node(struct task_struct *task, int node) +void arch_cpu_idle(void) { - struct page *page; - gfp_t flags = GFP_KERNEL; - -#ifdef CONFIG_DEBUG_STACK_USAGE - flags |= __GFP_ZERO; -#endif - - page = alloc_pages_node(node, flags, THREAD_SIZE_ORDER); - if (!page) - return NULL; - - return (struct thread_info *)page_address(page); + __get_cpu_var(irq_stat).idle_timestamp = jiffies; + _cpu_idle(); } /* - * Free a thread_info node, and all of its derivative - * data structures. + * Release a thread_info structure */ -void free_thread_info(struct thread_info *info) +void arch_release_thread_info(struct thread_info *info) { struct single_step_state *step_state = info->step_state; -#ifdef CONFIG_HARDWALL - /* - * We free a thread_info from the context of the task that has - * been scheduled next, so the original task is already dead. - * Calling deactivate here just frees up the data structures. - * If the task we're freeing held the last reference to a - * hardwall fd, it would have been released prior to this point - * anyway via exit_files(), and "hardwall" would be NULL by now. - */ - if (info->task->thread.hardwall) - hardwall_deactivate(info->task); -#endif - if (step_state) { /* @@ -164,31 +93,48 @@ void free_thread_info(struct thread_info *info) */ kfree(step_state); } - - free_pages((unsigned long)info, THREAD_SIZE_ORDER); } static void save_arch_state(struct thread_struct *t); int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long stack_size, - struct task_struct *p, struct pt_regs *regs) + unsigned long arg, struct task_struct *p) { - struct pt_regs *childregs; + struct pt_regs *childregs = task_pt_regs(p); unsigned long ksp; + unsigned long *callee_regs; /* - * When creating a new kernel thread we pass sp as zero. - * Assign it to a reasonable value now that we have the stack. + * Set up the stack and stack pointer appropriately for the + * new child to find itself woken up in __switch_to(). + * The callee-saved registers must be on the stack to be read; + * the new task will then jump to assembly support to handle + * calling schedule_tail(), etc., and (for userspace tasks) + * returning to the context set up in the pt_regs. */ - if (sp == 0 && regs->ex1 == PL_ICS_EX1(KERNEL_PL, 0)) - sp = KSTK_TOP(p); + ksp = (unsigned long) childregs; + ksp -= C_ABI_SAVE_AREA_SIZE; /* interrupt-entry save area */ + ((long *)ksp)[0] = ((long *)ksp)[1] = 0; + ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long); + callee_regs = (unsigned long *)ksp; + ksp -= C_ABI_SAVE_AREA_SIZE; /* __switch_to() save area */ + ((long *)ksp)[0] = ((long *)ksp)[1] = 0; + p->thread.ksp = ksp; - /* - * Do not clone step state from the parent; each thread - * must make its own lazily. - */ - task_thread_info(p)->step_state = NULL; + /* Record the pid of the task that created this one. */ + p->thread.creator_pid = current->pid; + + if (unlikely(p->flags & PF_KTHREAD)) { + /* kernel thread */ + memset(childregs, 0, sizeof(struct pt_regs)); + memset(&callee_regs[2], 0, + (CALLEE_SAVED_REGS_COUNT - 2) * sizeof(unsigned long)); + callee_regs[0] = sp; /* r30 = function */ + callee_regs[1] = arg; /* r31 = arg */ + childregs->ex1 = PL_ICS_EX1(KERNEL_PL, 0); + p->thread.pc = (unsigned long) ret_from_kernel_thread; + return 0; + } /* * Start new thread in ret_from_fork so it schedules properly @@ -196,46 +142,41 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, */ p->thread.pc = (unsigned long) ret_from_fork; - /* Save user stack top pointer so we can ID the stack vm area later. */ - p->thread.usp0 = sp; + /* + * Do not clone step state from the parent; each thread + * must make its own lazily. + */ + task_thread_info(p)->step_state = NULL; - /* Record the pid of the process that created this one. */ - p->thread.creator_pid = current->pid; +#ifdef __tilegx__ + /* + * Do not clone unalign jit fixup from the parent; each thread + * must allocate its own on demand. + */ + task_thread_info(p)->unalign_jit_base = NULL; +#endif /* * Copy the registers onto the kernel stack so the * return-from-interrupt code will reload it into registers. */ - childregs = task_pt_regs(p); - *childregs = *regs; + *childregs = *current_pt_regs(); childregs->regs[0] = 0; /* return value is zero */ - childregs->sp = sp; /* override with new user stack pointer */ + if (sp) + childregs->sp = sp; /* override with new user stack pointer */ + memcpy(callee_regs, &childregs->regs[CALLEE_SAVED_FIRST_REG], + CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long)); + + /* Save user stack top pointer so we can ID the stack vm area later. */ + p->thread.usp0 = childregs->sp; /* * If CLONE_SETTLS is set, set "tp" in the new task to "r4", * which is passed in as arg #5 to sys_clone(). */ if (clone_flags & CLONE_SETTLS) - childregs->tp = regs->regs[4]; + childregs->tp = childregs->regs[4]; - /* - * Copy the callee-saved registers from the passed pt_regs struct - * into the context-switch callee-saved registers area. - * This way when we start the interrupt-return sequence, the - * callee-save registers will be correctly in registers, which - * is how we assume the compiler leaves them as we start doing - * the normal return-from-interrupt path after calling C code. - * Zero out the C ABI save area to mark the top of the stack. - */ - ksp = (unsigned long) childregs; - ksp -= C_ABI_SAVE_AREA_SIZE; /* interrupt-entry save area */ - ((long *)ksp)[0] = ((long *)ksp)[1] = 0; - ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long); - memcpy((void *)ksp, ®s->regs[CALLEE_SAVED_FIRST_REG], - CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long)); - ksp -= C_ABI_SAVE_AREA_SIZE; /* __switch_to() save area */ - ((long *)ksp)[0] = ((long *)ksp)[1] = 0; - p->thread.ksp = ksp; #if CHIP_HAS_TILE_DMA() /* @@ -246,20 +187,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb)); #endif -#if CHIP_HAS_SN_PROC() - /* Likewise, the new thread is not running static processor code. */ - p->thread.sn_proc_running = 0; - memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb)); -#endif - -#if CHIP_HAS_PROC_STATUS_SPR() /* New thread has its miscellaneous processor state bits clear. */ p->thread.proc_status = 0; -#endif #ifdef CONFIG_HARDWALL /* New thread does not own any networks. */ - p->thread.hardwall = NULL; + memset(&p->thread.hardwall[0], 0, + sizeof(struct hardwall_task) * HARDWALL_TYPES); #endif @@ -272,19 +206,32 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, return 0; } +int set_unalign_ctl(struct task_struct *tsk, unsigned int val) +{ + task_thread_info(tsk)->align_ctl = val; + return 0; +} + +int get_unalign_ctl(struct task_struct *tsk, unsigned long adr) +{ + return put_user(task_thread_info(tsk)->align_ctl, + (unsigned int __user *)adr); +} + +static struct task_struct corrupt_current = { .comm = "<corrupt>" }; + /* * Return "current" if it looks plausible, or else a pointer to a dummy. * This can be helpful if we are just trying to emit a clean panic. */ struct task_struct *validate_current(void) { - static struct task_struct corrupt = { .comm = "<corrupt>" }; struct task_struct *tsk = current; if (unlikely((unsigned long)tsk < PAGE_OFFSET || - (void *)tsk > high_memory || + (high_memory && (void *)tsk > high_memory) || ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) { pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer); - tsk = &corrupt; + tsk = &corrupt_current; } return tsk; } @@ -423,15 +370,11 @@ static void save_arch_state(struct thread_struct *t) t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2); t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3); t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS); -#if CHIP_HAS_PROC_STATUS_SPR() t->proc_status = __insn_mfspr(SPR_PROC_STATUS); -#endif #if !CHIP_HAS_FIXED_INTVEC_BASE() t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); #endif -#if CHIP_HAS_TILE_RTF_HWM() t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); -#endif #if CHIP_HAS_DSTREAM_PF() t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); #endif @@ -452,15 +395,11 @@ static void restore_arch_state(const struct thread_struct *t) __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]); __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]); __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0); -#if CHIP_HAS_PROC_STATUS_SPR() __insn_mtspr(SPR_PROC_STATUS, t->proc_status); -#endif #if !CHIP_HAS_FIXED_INTVEC_BASE() __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base); #endif -#if CHIP_HAS_TILE_RTF_HWM() __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm); -#endif #if CHIP_HAS_DSTREAM_PF() __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf); #endif @@ -469,26 +408,11 @@ static void restore_arch_state(const struct thread_struct *t) void _prepare_arch_switch(struct task_struct *next) { -#if CHIP_HAS_SN_PROC() - int snctl; -#endif #if CHIP_HAS_TILE_DMA() struct tile_dma_state *dma = ¤t->thread.tile_dma_state; if (dma->enabled) save_tile_dma_state(dma); #endif -#if CHIP_HAS_SN_PROC() - /* - * Suspend the static network processor if it was running. - * We do not suspend the fabric itself, just like we don't - * try to suspend the UDN. - */ - snctl = __insn_mfspr(SPR_SNCTL); - current->thread.sn_proc_running = - (snctl & SPR_SNCTL__FRZPROC_MASK) == 0; - if (current->thread.sn_proc_running) - __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK); -#endif } @@ -516,25 +440,9 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, /* Restore other arch state. */ restore_arch_state(&next->thread); -#if CHIP_HAS_SN_PROC() - /* - * Restart static network processor in the new process - * if it was running before. - */ - if (next->thread.sn_proc_running) { - int snctl = __insn_mfspr(SPR_SNCTL); - __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK); - } -#endif - #ifdef CONFIG_HARDWALL /* Enable or disable access to the network registers appropriately. */ - if (prev->thread.hardwall != NULL) { - if (next->thread.hardwall == NULL) - restrict_network_mpls(); - } else if (next->thread.hardwall != NULL) { - grant_network_mpls(); - } + hardwall_switch_tasks(prev, next); #endif /* @@ -546,61 +454,54 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, return __switch_to(prev, next, next_current_ksp0(next)); } -/* Note there is an implicit fifth argument if (clone_flags & CLONE_SETTLS). */ -SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, - void __user *, parent_tidptr, void __user *, child_tidptr, - struct pt_regs *, regs) -{ - if (!newsp) - newsp = regs->sp; - return do_fork(clone_flags, newsp, regs, 0, - parent_tidptr, child_tidptr); -} - /* - * sys_execve() executes a new program. + * This routine is called on return from interrupt if any of the + * TIF_WORK_MASK flags are set in thread_info->flags. It is + * entered with interrupts disabled so we don't miss an event + * that modified the thread_info flags. If any flag is set, we + * handle it and return, and the calling assembly code will + * re-disable interrupts, reload the thread flags, and call back + * if more flags need to be handled. + * + * We return whether we need to check the thread_info flags again + * or not. Note that we don't clear TIF_SINGLESTEP here, so it's + * important that it be tested last, and then claim that we don't + * need to recheck the flags. */ -SYSCALL_DEFINE4(execve, const char __user *, path, - const char __user *const __user *, argv, - const char __user *const __user *, envp, - struct pt_regs *, regs) +int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) { - long error; - char *filename; - - filename = getname(path); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = do_execve(filename, argv, envp, regs); - putname(filename); - if (error == 0) - single_step_execve(); -out: - return error; -} + /* If we enter in kernel mode, do nothing and exit the caller loop. */ + if (!user_mode(regs)) + return 0; -#ifdef CONFIG_COMPAT -long compat_sys_execve(const char __user *path, - const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp, - struct pt_regs *regs) -{ - long error; - char *filename; - - filename = getname(path); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = compat_do_execve(filename, argv, envp, regs); - putname(filename); - if (error == 0) - single_step_execve(); -out: - return error; -} + /* Enable interrupts; they are disabled again on return to caller. */ + local_irq_enable(); + + if (thread_info_flags & _TIF_NEED_RESCHED) { + schedule(); + return 1; + } +#if CHIP_HAS_TILE_DMA() + if (thread_info_flags & _TIF_ASYNC_TLB) { + do_async_page_fault(regs); + return 1; + } #endif + if (thread_info_flags & _TIF_SIGPENDING) { + do_signal(regs); + return 1; + } + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + return 1; + } + if (thread_info_flags & _TIF_SINGLESTEP) { + single_step_once(regs); + return 0; + } + panic("work_pending: bad flags %#x\n", thread_info_flags); +} unsigned long get_wchan(struct task_struct *p) { @@ -619,37 +520,6 @@ unsigned long get_wchan(struct task_struct *p) return 0; } -/* - * We pass in lr as zero (cleared in kernel_thread) and the caller - * part of the backtrace ABI on the stack also zeroed (in copy_thread) - * so that backtraces will stop with this function. - * Note that we don't use r0, since copy_thread() clears it. - */ -static void start_kernel_thread(int dummy, int (*fn)(int), int arg) -{ - do_exit(fn(arg)); -} - -/* - * Create a kernel thread - */ -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - regs.ex1 = PL_ICS_EX1(KERNEL_PL, 0); /* run at kernel PL, no ICS */ - regs.pc = (long) start_kernel_thread; - regs.flags = PT_FLAGS_CALLER_SAVES; /* need to restore r1 and r2 */ - regs.regs[1] = (long) fn; /* function pointer */ - regs.regs[2] = (long) arg; /* parameter register */ - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, - 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - /* Flush thread state. */ void flush_thread(void) { @@ -661,7 +531,15 @@ void flush_thread(void) */ void exit_thread(void) { - /* Nothing */ +#ifdef CONFIG_HARDWALL + /* + * Remove the task from the list of tasks that are associated + * with any live hardwalls. (If the task that is exiting held + * the last reference to a hardwall fd, it would already have + * been released and deactivated at this point.) + */ + hardwall_deactivate_all(current); +#endif } void show_regs(struct pt_regs *regs) @@ -670,24 +548,24 @@ void show_regs(struct pt_regs *regs) int i; pr_err("\n"); - pr_err(" Pid: %d, comm: %20s, CPU: %d\n", - tsk->pid, tsk->comm, smp_processor_id()); + if (tsk != &corrupt_current) + show_regs_print_info(KERN_ERR); #ifdef __tilegx__ - for (i = 0; i < 51; i += 3) + for (i = 0; i < 17; i++) pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n", - i, regs->regs[i], i+1, regs->regs[i+1], - i+2, regs->regs[i+2]); - pr_err(" r51: "REGFMT" r52: "REGFMT" tp : "REGFMT"\n", - regs->regs[51], regs->regs[52], regs->tp); + i, regs->regs[i], i+18, regs->regs[i+18], + i+36, regs->regs[i+36]); + pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n", + regs->regs[17], regs->regs[35], regs->tp); pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr); #else - for (i = 0; i < 52; i += 4) + for (i = 0; i < 13; i++) pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT " r%-2d: "REGFMT" r%-2d: "REGFMT"\n", - i, regs->regs[i], i+1, regs->regs[i+1], - i+2, regs->regs[i+2], i+3, regs->regs[i+3]); - pr_err(" r52: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n", - regs->regs[52], regs->tp, regs->sp, regs->lr); + i, regs->regs[i], i+14, regs->regs[i+14], + i+27, regs->regs[i+27], i+40, regs->regs[i+40]); + pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n", + regs->regs[13], regs->tp, regs->sp, regs->lr); #endif pr_err(" pc : "REGFMT" ex1: %ld faultnum: %ld\n", regs->pc, regs->ex1, regs->faultnum); diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index e92e40527d6..de98c6ddf13 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -19,7 +19,14 @@ #include <linux/kprobes.h> #include <linux/compat.h> #include <linux/uaccess.h> +#include <linux/regset.h> +#include <linux/elf.h> +#include <linux/tracehook.h> #include <asm/traps.h> +#include <arch/chip.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> void user_enable_single_step(struct task_struct *child) { @@ -45,6 +52,100 @@ void ptrace_disable(struct task_struct *child) clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } +/* + * Get registers from task and ready the result for userspace. + * Note that we localize the API issues to getregs() and putregs() at + * some cost in performance, e.g. we need a full pt_regs copy for + * PEEKUSR, and two copies for POKEUSR. But in general we expect + * GETREGS/PUTREGS to be the API of choice anyway. + */ +static char *getregs(struct task_struct *child, struct pt_regs *uregs) +{ + *uregs = *task_pt_regs(child); + + /* Set up flags ABI bits. */ + uregs->flags = 0; +#ifdef CONFIG_COMPAT + if (task_thread_info(child)->status & TS_COMPAT) + uregs->flags |= PT_FLAGS_COMPAT; +#endif + + return (char *)uregs; +} + +/* Put registers back to task. */ +static void putregs(struct task_struct *child, struct pt_regs *uregs) +{ + struct pt_regs *regs = task_pt_regs(child); + + /* Don't allow overwriting the kernel-internal flags word. */ + uregs->flags = regs->flags; + + /* Only allow setting the ICS bit in the ex1 word. */ + uregs->ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(uregs->ex1)); + + *regs = *uregs; +} + +enum tile_regset { + REGSET_GPR, +}; + +static int tile_gpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs regs; + + getregs(target, ®s); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, ®s, 0, + sizeof(regs)); +} + +static int tile_gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct pt_regs regs; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®s, 0, + sizeof(regs)); + if (ret) + return ret; + + putregs(target, ®s); + + return 0; +} + +static const struct user_regset tile_user_regset[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(elf_greg_t), + .align = sizeof(elf_greg_t), + .get = tile_gpr_get, + .set = tile_gpr_set, + }, +}; + +static const struct user_regset_view tile_user_regset_view = { + .name = CHIP_ARCH_NAME, + .e_machine = ELF_ARCH, + .ei_osabi = ELF_OSABI, + .regsets = tile_user_regset, + .n = ARRAY_SIZE(tile_user_regset), +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &tile_user_regset_view; +} + long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { @@ -53,14 +154,13 @@ long arch_ptrace(struct task_struct *child, long request, long ret = -EIO; char *childreg; struct pt_regs copyregs; - int ex1_offset; switch (request) { case PTRACE_PEEKUSR: /* Read register from pt_regs. */ if (addr >= PTREGS_SIZE) break; - childreg = (char *)task_pt_regs(child) + addr; + childreg = getregs(child, ©regs) + addr; #ifdef CONFIG_COMPAT if (is_compat_task()) { if (addr & (sizeof(compat_long_t)-1)) @@ -79,17 +179,7 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_POKEUSR: /* Write register in pt_regs. */ if (addr >= PTREGS_SIZE) break; - childreg = (char *)task_pt_regs(child) + addr; - - /* Guard against overwrites of the privilege level. */ - ex1_offset = PTREGS_OFFSET_EX1; -#if defined(CONFIG_COMPAT) && defined(__BIG_ENDIAN) - if (is_compat_task()) /* point at low word */ - ex1_offset += sizeof(compat_long_t); -#endif - if (addr == ex1_offset) - data = PL_ICS_EX1(USER_PL, EX1_ICS(data)); - + childreg = getregs(child, ©regs) + addr; #ifdef CONFIG_COMPAT if (is_compat_task()) { if (addr & (sizeof(compat_long_t)-1)) @@ -102,24 +192,20 @@ long arch_ptrace(struct task_struct *child, long request, break; *(long *)childreg = data; } + putregs(child, ©regs); ret = 0; break; case PTRACE_GETREGS: /* Get all registers from the child. */ - if (copy_to_user(datap, task_pt_regs(child), - sizeof(struct pt_regs)) == 0) { - ret = 0; - } + ret = copy_regset_to_user(child, &tile_user_regset_view, + REGSET_GPR, 0, + sizeof(struct pt_regs), datap); break; case PTRACE_SETREGS: /* Set all registers in the child. */ - if (copy_from_user(©regs, datap, - sizeof(struct pt_regs)) == 0) { - copyregs.ex1 = - PL_ICS_EX1(USER_PL, EX1_ICS(copyregs.ex1)); - *task_pt_regs(child) = copyregs; - ret = 0; - } + ret = copy_regset_from_user(child, &tile_user_regset_view, + REGSET_GPR, 0, + sizeof(struct pt_regs), datap); break; case PTRACE_GETFPREGS: /* Get the child FPU state. */ @@ -128,12 +214,16 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_SETOPTIONS: /* Support TILE-specific ptrace options. */ - child->ptrace &= ~PT_TRACE_MASK_TILE; + BUILD_BUG_ON(PTRACE_O_MASK_TILE & PTRACE_O_MASK); tmp = data & PTRACE_O_MASK_TILE; data &= ~PTRACE_O_MASK_TILE; ret = ptrace_request(child, request, addr, data); - if (tmp & PTRACE_O_TRACEMIGRATE) - child->ptrace |= PT_TRACE_MIGRATE; + if (ret == 0) { + unsigned int flags = child->ptrace; + flags &= ~(PTRACE_O_MASK_TILE << PT_OPT_FLAG_SHIFT); + flags |= (tmp << PT_OPT_FLAG_SHIFT); + child->ptrace = flags; + } break; default: @@ -160,32 +250,44 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, } #endif -void do_syscall_trace(void) +int do_syscall_trace_enter(struct pt_regs *regs) { - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; + if (test_thread_flag(TIF_SYSCALL_TRACE)) { + if (tracehook_report_syscall_entry(regs)) + regs->regs[TREG_SYSCALL_NR] = -1; + } - if (!(current->ptrace & PT_PTRACED)) - return; + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_enter(regs, regs->regs[TREG_SYSCALL_NR]); - /* - * The 0x80 provides a way for the tracing parent to distinguish - * between a syscall stop and SIGTRAP delivery - */ - ptrace_notify(SIGTRAP|((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); + return regs->regs[TREG_SYSCALL_NR]; +} + +void do_syscall_trace_exit(struct pt_regs *regs) +{ + long errno; /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl + * The standard tile calling convention returns the value (or negative + * errno) in r0, and zero (or positive errno) in r1. + * It saves a couple of cycles on the hot path to do this work in + * registers only as we return, rather than updating the in-memory + * struct ptregs. */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } + errno = (long) regs->regs[0]; + if (errno < 0 && errno > -4096) + regs->regs[1] = -errno; + else + regs->regs[1] = 0; + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, 0); + + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_exit(regs, regs->regs[0]); } -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs) { struct siginfo info; @@ -201,5 +303,5 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) /* Handle synthetic interrupt delivered only by the simulator. */ void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num) { - send_sigtrap(current, regs, fault_num); + send_sigtrap(current, regs); } diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c index baa3d905fee..6c5d2c070a1 100644 --- a/arch/tile/kernel/reboot.c +++ b/arch/tile/kernel/reboot.c @@ -16,6 +16,7 @@ #include <linux/reboot.h> #include <linux/smp.h> #include <linux/pm.h> +#include <linux/export.h> #include <asm/page.h> #include <asm/setup.h> #include <hv/hypervisor.h> @@ -26,7 +27,6 @@ void machine_halt(void) { - warn_early_printk(); arch_local_irq_disable_all(); smp_send_stop(); hv_halt(); @@ -34,7 +34,6 @@ void machine_halt(void) void machine_power_off(void) { - warn_early_printk(); arch_local_irq_disable_all(); smp_send_stop(); hv_power_off(); @@ -49,3 +48,4 @@ void machine_restart(char *cmd) /* No interesting distinction to be made here. */ void (*pm_power_off)(void) = NULL; +EXPORT_SYMBOL(pm_power_off); diff --git a/arch/tile/kernel/regs_32.S b/arch/tile/kernel/regs_32.S index caa13101c26..542cae17a93 100644 --- a/arch/tile/kernel/regs_32.S +++ b/arch/tile/kernel/regs_32.S @@ -13,14 +13,14 @@ */ #include <linux/linkage.h> -#include <asm/system.h> #include <asm/ptrace.h> #include <asm/asm-offsets.h> #include <arch/spr_def.h> #include <asm/processor.h> +#include <asm/switch_to.h> /* - * See <asm/system.h>; called with prev and next task_struct pointers. + * See <asm/switch_to.h>; called with prev and next task_struct pointers. * "prev" is returned in r0 for _switch_to and also for ret_from_fork. * * We want to save pc/sp in "prev", and get the new pc/sp from "next". @@ -39,7 +39,7 @@ */ #if CALLEE_SAVED_REGS_COUNT != 24 -# error Mismatch between <asm/system.h> and kernel/entry.S +# error Mismatch between <asm/switch_to.h> and kernel/entry.S #endif #define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 4) diff --git a/arch/tile/kernel/regs_64.S b/arch/tile/kernel/regs_64.S new file mode 100644 index 00000000000..bbffcc6f340 --- /dev/null +++ b/arch/tile/kernel/regs_64.S @@ -0,0 +1,145 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/linkage.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> +#include <arch/spr_def.h> +#include <asm/processor.h> +#include <asm/switch_to.h> + +/* + * See <asm/switch_to.h>; called with prev and next task_struct pointers. + * "prev" is returned in r0 for _switch_to and also for ret_from_fork. + * + * We want to save pc/sp in "prev", and get the new pc/sp from "next". + * We also need to save all the callee-saved registers on the stack. + * + * Intel enables/disables access to the hardware cycle counter in + * seccomp (secure computing) environments if necessary, based on + * has_secure_computing(). We might want to do this at some point, + * though it would require virtualizing the other SPRs under WORLD_ACCESS. + * + * Since we're saving to the stack, we omit sp from this list. + * And for parallels with other architectures, we save lr separately, + * in the thread_struct itself (as the "pc" field). + * + * This code also needs to be aligned with process.c copy_thread() + */ + +#if CALLEE_SAVED_REGS_COUNT != 24 +# error Mismatch between <asm/switch_to.h> and kernel/entry.S +#endif +#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 8) + +#define SAVE_REG(r) { st r12, r; addi r12, r12, 8 } +#define LOAD_REG(r) { ld r, r12; addi r12, r12, 8 } +#define FOR_EACH_CALLEE_SAVED_REG(f) \ + f(r30); f(r31); \ + f(r32); f(r33); f(r34); f(r35); f(r36); f(r37); f(r38); f(r39); \ + f(r40); f(r41); f(r42); f(r43); f(r44); f(r45); f(r46); f(r47); \ + f(r48); f(r49); f(r50); f(r51); f(r52); + +STD_ENTRY_SECTION(__switch_to, .sched.text) + { + move r10, sp + st sp, lr + } + { + addli r11, sp, -FRAME_SIZE + 8 + addli sp, sp, -FRAME_SIZE + } + { + st r11, r10 + addli r4, r1, TASK_STRUCT_THREAD_KSP_OFFSET + } + { + ld r13, r4 /* Load new sp to a temp register early. */ + addi r12, sp, 16 + } + FOR_EACH_CALLEE_SAVED_REG(SAVE_REG) + addli r3, r0, TASK_STRUCT_THREAD_KSP_OFFSET + { + st r3, sp + addli r3, r0, TASK_STRUCT_THREAD_PC_OFFSET + } + { + st r3, lr + addli r4, r1, TASK_STRUCT_THREAD_PC_OFFSET + } + { + ld lr, r4 + addi r12, r13, 16 + } + { + /* Update sp and ksp0 simultaneously to avoid backtracer warnings. */ + move sp, r13 + mtspr SPR_SYSTEM_SAVE_K_0, r2 + } + FOR_EACH_CALLEE_SAVED_REG(LOAD_REG) +.L__switch_to_pc: + { + addli sp, sp, FRAME_SIZE + jrp lr /* r0 is still valid here, so return it */ + } + STD_ENDPROC(__switch_to) + +/* Return a suitable address for the backtracer for suspended threads */ +STD_ENTRY_SECTION(get_switch_to_pc, .sched.text) + lnk r0 + { + addli r0, r0, .L__switch_to_pc - . + jrp lr + } + STD_ENDPROC(get_switch_to_pc) + +STD_ENTRY(get_pt_regs) + .irp reg, r0, r1, r2, r3, r4, r5, r6, r7, \ + r8, r9, r10, r11, r12, r13, r14, r15, \ + r16, r17, r18, r19, r20, r21, r22, r23, \ + r24, r25, r26, r27, r28, r29, r30, r31, \ + r32, r33, r34, r35, r36, r37, r38, r39, \ + r40, r41, r42, r43, r44, r45, r46, r47, \ + r48, r49, r50, r51, r52, tp, sp + { + st r0, \reg + addi r0, r0, 8 + } + .endr + { + st r0, lr + addi r0, r0, PTREGS_OFFSET_PC - PTREGS_OFFSET_LR + } + lnk r1 + { + st r0, r1 + addi r0, r0, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC + } + mfspr r1, INTERRUPT_CRITICAL_SECTION + shli r1, r1, SPR_EX_CONTEXT_1_1__ICS_SHIFT + ori r1, r1, KERNEL_PL + { + st r0, r1 + addi r0, r0, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 + } + { + st r0, zero /* clear faultnum */ + addi r0, r0, PTREGS_OFFSET_ORIG_R0 - PTREGS_OFFSET_FAULTNUM + } + { + st r0, zero /* clear orig_r0 */ + addli r0, r0, -PTREGS_OFFSET_ORIG_R0 /* restore r0 to base */ + } + jrp lr + STD_ENDPROC(get_pt_regs) diff --git a/arch/tile/kernel/relocate_kernel.S b/arch/tile/kernel/relocate_kernel_32.S index 010b418515f..e44fbcf8cbd 100644 --- a/arch/tile/kernel/relocate_kernel.S +++ b/arch/tile/kernel/relocate_kernel_32.S @@ -20,15 +20,6 @@ #include <asm/page.h> #include <hv/hypervisor.h> -#define ___hvb MEM_SV_INTRPT + HV_GLUE_START_CPA - -#define ___hv_dispatch(f) (___hvb + (HV_DISPATCH_ENTRY_SIZE * f)) - -#define ___hv_console_putc ___hv_dispatch(HV_DISPATCH_CONSOLE_PUTC) -#define ___hv_halt ___hv_dispatch(HV_DISPATCH_HALT) -#define ___hv_reexec ___hv_dispatch(HV_DISPATCH_REEXEC) -#define ___hv_flush_remote ___hv_dispatch(HV_DISPATCH_FLUSH_REMOTE) - #undef RELOCATE_NEW_KERNEL_VERBOSE STD_ENTRY(relocate_new_kernel) @@ -43,8 +34,8 @@ STD_ENTRY(relocate_new_kernel) addi sp, sp, -8 /* we now have a stack (whether we need one or not) */ - moveli r40, lo16(___hv_console_putc) - auli r40, r40, ha16(___hv_console_putc) + moveli r40, lo16(hv_console_putc) + auli r40, r40, ha16(hv_console_putc) #ifdef RELOCATE_NEW_KERNEL_VERBOSE moveli r0, 'r' @@ -86,7 +77,6 @@ STD_ENTRY(relocate_new_kernel) move r30, sp addi sp, sp, -8 -#if CHIP_HAS_CBOX_HOME_MAP() /* * On TILEPro, we need to flush all tiles' caches, since we may * have been doing hash-for-home caching there. Note that we @@ -114,15 +104,14 @@ STD_ENTRY(relocate_new_kernel) } { move r8, zero /* asids */ - moveli r20, lo16(___hv_flush_remote) + moveli r20, lo16(hv_flush_remote) } { move r9, zero /* asidcount */ - auli r20, r20, ha16(___hv_flush_remote) + auli r20, r20, ha16(hv_flush_remote) } jalr r20 -#endif /* r33 is destination pointer, default to zero */ @@ -175,8 +164,8 @@ STD_ENTRY(relocate_new_kernel) move r0, r32 moveli r1, 0 /* arg to hv_reexec is 64 bits */ - moveli r41, lo16(___hv_reexec) - auli r41, r41, ha16(___hv_reexec) + moveli r41, lo16(hv_reexec) + auli r41, r41, ha16(hv_reexec) jalr r41 @@ -267,8 +256,8 @@ STD_ENTRY(relocate_new_kernel) moveli r0, '\n' jalr r40 .Lhalt: - moveli r41, lo16(___hv_halt) - auli r41, r41, ha16(___hv_halt) + moveli r41, lo16(hv_halt) + auli r41, r41, ha16(hv_halt) jalr r41 STD_ENDPROC(relocate_new_kernel) diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S new file mode 100644 index 00000000000..d9d8cf6176e --- /dev/null +++ b/arch/tile/kernel/relocate_kernel_64.S @@ -0,0 +1,263 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * copy new kernel into place and then call hv_reexec + * + */ + +#include <linux/linkage.h> +#include <arch/chip.h> +#include <asm/page.h> +#include <hv/hypervisor.h> + +#undef RELOCATE_NEW_KERNEL_VERBOSE + +STD_ENTRY(relocate_new_kernel) + + move r30, r0 /* page list */ + move r31, r1 /* address of page we are on */ + move r32, r2 /* start address of new kernel */ + + shrui r1, r1, PAGE_SHIFT + addi r1, r1, 1 + shli sp, r1, PAGE_SHIFT + addi sp, sp, -8 + /* we now have a stack (whether we need one or not) */ + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r40, hw2_last(hv_console_putc) + shl16insli r40, r40, hw1(hv_console_putc) + shl16insli r40, r40, hw0(hv_console_putc) + + moveli r0, 'r' + jalr r40 + + moveli r0, '_' + jalr r40 + + moveli r0, 'n' + jalr r40 + + moveli r0, '_' + jalr r40 + + moveli r0, 'k' + jalr r40 + + moveli r0, '\n' + jalr r40 +#endif + + /* + * Throughout this code r30 is pointer to the element of page + * list we are working on. + * + * Normally we get to the next element of the page list by + * incrementing r30 by eight. The exception is if the element + * on the page list is an IND_INDIRECTION in which case we use + * the element with the low bits masked off as the new value + * of r30. + * + * To get this started, we need the value passed to us (which + * will always be an IND_INDIRECTION) in memory somewhere with + * r30 pointing at it. To do that, we push the value passed + * to us on the stack and make r30 point to it. + */ + + st sp, r30 + move r30, sp + addi sp, sp, -16 + + /* + * On TILE-GX, we need to flush all tiles' caches, since we may + * have been doing hash-for-home caching there. Note that we + * must do this _after_ we're completely done modifying any memory + * other than our output buffer (which we know is locally cached). + * We want the caches to be fully clean when we do the reexec, + * because the hypervisor is going to do this flush again at that + * point, and we don't want that second flush to overwrite any memory. + */ + { + move r0, zero /* cache_pa */ + moveli r1, hw2_last(HV_FLUSH_EVICT_L2) + } + { + shl16insli r1, r1, hw1(HV_FLUSH_EVICT_L2) + movei r2, -1 /* cache_cpumask; -1 means all client tiles */ + } + { + shl16insli r1, r1, hw0(HV_FLUSH_EVICT_L2) /* cache_control */ + move r3, zero /* tlb_va */ + } + { + move r4, zero /* tlb_length */ + move r5, zero /* tlb_pgsize */ + } + { + move r6, zero /* tlb_cpumask */ + move r7, zero /* asids */ + } + { + moveli r20, hw2_last(hv_flush_remote) + move r8, zero /* asidcount */ + } + shl16insli r20, r20, hw1(hv_flush_remote) + shl16insli r20, r20, hw0(hv_flush_remote) + + jalr r20 + + /* r33 is destination pointer, default to zero */ + + moveli r33, 0 + +.Lloop: ld r10, r30 + + andi r9, r10, 0xf /* low 4 bits tell us what type it is */ + xor r10, r10, r9 /* r10 is now value with low 4 bits stripped */ + + cmpeqi r0, r9, 0x1 /* IND_DESTINATION */ + beqzt r0, .Ltry2 + + move r33, r10 + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'd' + jalr r40 +#endif + + addi r30, r30, 8 + j .Lloop + +.Ltry2: + cmpeqi r0, r9, 0x2 /* IND_INDIRECTION */ + beqzt r0, .Ltry4 + + move r30, r10 + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'i' + jalr r40 +#endif + + j .Lloop + +.Ltry4: + cmpeqi r0, r9, 0x4 /* IND_DONE */ + beqzt r0, .Ltry8 + + mf + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'D' + jalr r40 + moveli r0, '\n' + jalr r40 +#endif + + move r0, r32 + + moveli r41, hw2_last(hv_reexec) + shl16insli r41, r41, hw1(hv_reexec) + shl16insli r41, r41, hw0(hv_reexec) + + jalr r41 + + /* we should not get here */ + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, '?' + jalr r40 + moveli r0, '\n' + jalr r40 +#endif + + j .Lhalt + +.Ltry8: cmpeqi r0, r9, 0x8 /* IND_SOURCE */ + beqz r0, .Lerr /* unknown type */ + + /* copy page at r10 to page at r33 */ + + move r11, r33 + + moveli r0, hw2_last(PAGE_SIZE) + shl16insli r0, r0, hw1(PAGE_SIZE) + shl16insli r0, r0, hw0(PAGE_SIZE) + add r33, r33, r0 + + /* copy word at r10 to word at r11 until r11 equals r33 */ + + /* We know page size must be multiple of 8, so we can unroll + * 8 times safely without any edge case checking. + * + * Issue a flush of the destination every 8 words to avoid + * incoherence when starting the new kernel. (Now this is + * just good paranoia because the hv_reexec call will also + * take care of this.) + */ + +1: + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0 } + { flush r11 ; addi r11, r11, 8 } + + cmpeq r0, r33, r11 + beqzt r0, 1b + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 's' + jalr r40 +#endif + + addi r30, r30, 8 + j .Lloop + + +.Lerr: +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'e' + jalr r40 + moveli r0, 'r' + jalr r40 + moveli r0, 'r' + jalr r40 + moveli r0, '\n' + jalr r40 +#endif +.Lhalt: + moveli r41, hw2_last(hv_halt) + shl16insli r41, r41, hw1(hv_halt) + shl16insli r41, r41, hw0(hv_halt) + + jalr r41 + STD_ENDPROC(relocate_new_kernel) + + .section .rodata,"a" + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .long .Lend_relocate_new_kernel - relocate_new_kernel diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 3696b183256..112ababa9e5 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -23,11 +23,15 @@ #include <linux/irq.h> #include <linux/kexec.h> #include <linux/pci.h> +#include <linux/swiotlb.h> #include <linux/initrd.h> #include <linux/io.h> #include <linux/highmem.h> #include <linux/smp.h> #include <linux/timex.h> +#include <linux/hugetlb.h> +#include <linux/start_kernel.h> +#include <linux/screen_info.h> #include <asm/setup.h> #include <asm/sections.h> #include <asm/cacheflush.h> @@ -46,24 +50,41 @@ static inline int ABS(int x) { return x >= 0 ? x : -x; } /* Chip information */ char chip_model[64] __write_once; +#ifdef CONFIG_VT +struct screen_info screen_info; +#endif + struct pglist_data node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -/* We only create bootmem data on node 0. */ -static bootmem_data_t __initdata node0_bdata; - /* Information on the NUMA nodes that we compute early */ -unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES]; -unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES]; +unsigned long node_start_pfn[MAX_NUMNODES]; +unsigned long node_end_pfn[MAX_NUMNODES]; unsigned long __initdata node_memmap_pfn[MAX_NUMNODES]; unsigned long __initdata node_percpu_pfn[MAX_NUMNODES]; unsigned long __initdata node_free_pfn[MAX_NUMNODES]; static unsigned long __initdata node_percpu[MAX_NUMNODES]; +/* + * per-CPU stack and boot info. + */ +DEFINE_PER_CPU(unsigned long, boot_sp) = + (unsigned long)init_stack + THREAD_SIZE; + +#ifdef CONFIG_SMP +DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel; +#else +/* + * The variable must be __initdata since it references __init code. + * With CONFIG_SMP it is per-cpu data, which is exempt from validation. + */ +unsigned long __initdata boot_pc = (unsigned long)start_kernel; +#endif + #ifdef CONFIG_HIGHMEM /* Page frame index of end of lowmem on each controller. */ -unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES]; +unsigned long node_lowmem_end_pfn[MAX_NUMNODES]; /* Number of pages that can be mapped into lowmem. */ static unsigned long __initdata mappable_physpages; @@ -94,7 +115,7 @@ static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = { }; static nodemask_t __initdata isolnodes; -#ifdef CONFIG_PCI +#if defined(CONFIG_PCI) && !defined(__tilegx__) enum { DEFAULT_PCI_RESERVE_MB = 64 }; static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB; unsigned long __initdata pci_reserve_start_pfn = -1U; @@ -103,13 +124,11 @@ unsigned long __initdata pci_reserve_end_pfn = -1U; static int __init setup_maxmem(char *str) { - long maxmem_mb; - if (str == NULL || strict_strtol(str, 0, &maxmem_mb) != 0 || - maxmem_mb == 0) + unsigned long long maxmem; + if (str == NULL || (maxmem = memparse(str, NULL)) == 0) return -EINVAL; - maxmem_pfn = (maxmem_mb >> (HPAGE_SHIFT - 20)) << - (HPAGE_SHIFT - PAGE_SHIFT); + maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used to no more than %dMB\n", maxmem_pfn >> (20 - PAGE_SHIFT)); return 0; @@ -119,14 +138,15 @@ early_param("maxmem", setup_maxmem); static int __init setup_maxnodemem(char *str) { char *endp; - long maxnodemem_mb, node; + unsigned long long maxnodemem; + long node; node = str ? simple_strtoul(str, &endp, 0) : INT_MAX; - if (node >= MAX_NUMNODES || *endp != ':' || - strict_strtol(endp+1, 0, &maxnodemem_mb) != 0) + if (node >= MAX_NUMNODES || *endp != ':') return -EINVAL; - maxnodemem_pfn[node] = (maxnodemem_mb >> (HPAGE_SHIFT - 20)) << + maxnodemem = memparse(endp+1, NULL); + maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used on node %ld to no more than %dMB\n", node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT)); @@ -134,6 +154,65 @@ static int __init setup_maxnodemem(char *str) } early_param("maxnodemem", setup_maxnodemem); +struct memmap_entry { + u64 addr; /* start of memory segment */ + u64 size; /* size of memory segment */ +}; +static struct memmap_entry memmap_map[64]; +static int memmap_nr; + +static void add_memmap_region(u64 addr, u64 size) +{ + if (memmap_nr >= ARRAY_SIZE(memmap_map)) { + pr_err("Ooops! Too many entries in the memory map!\n"); + return; + } + memmap_map[memmap_nr].addr = addr; + memmap_map[memmap_nr].size = size; + memmap_nr++; +} + +static int __init setup_memmap(char *p) +{ + char *oldp; + u64 start_at, mem_size; + + if (!p) + return -EINVAL; + + if (!strncmp(p, "exactmap", 8)) { + pr_err("\"memmap=exactmap\" not valid on tile\n"); + return 0; + } + + oldp = p; + mem_size = memparse(p, &p); + if (p == oldp) + return -EINVAL; + + if (*p == '@') { + pr_err("\"memmap=nn@ss\" (force RAM) invalid on tile\n"); + } else if (*p == '#') { + pr_err("\"memmap=nn#ss\" (force ACPI data) invalid on tile\n"); + } else if (*p == '$') { + start_at = memparse(p+1, &p); + add_memmap_region(start_at, mem_size); + } else { + if (mem_size == 0) + return -EINVAL; + maxmem_pfn = (mem_size >> HPAGE_SHIFT) << + (HPAGE_SHIFT - PAGE_SHIFT); + } + return *p == '\0' ? 0 : -EINVAL; +} +early_param("memmap", setup_memmap); + +static int __init setup_mem(char *str) +{ + return setup_maxmem(str); +} +early_param("mem", setup_mem); /* compatibility with x86 */ + static int __init setup_isolnodes(char *str) { char buf[MAX_NUMNODES * 5]; @@ -146,18 +225,15 @@ static int __init setup_isolnodes(char *str) } early_param("isolnodes", setup_isolnodes); -#ifdef CONFIG_PCI +#if defined(CONFIG_PCI) && !defined(__tilegx__) static int __init setup_pci_reserve(char* str) { - unsigned long mb; - - if (str == NULL || strict_strtoul(str, 0, &mb) != 0 || - mb > 3 * 1024) + if (str == NULL || kstrtouint(str, 0, &pci_reserve_mb) != 0 || + pci_reserve_mb > 3 * 1024) return -EINVAL; - pci_reserve_mb = mb; pr_info("Reserving %dMB for PCIE root complex mappings\n", - pci_reserve_mb); + pci_reserve_mb); return 0; } early_param("pci_reserve", setup_pci_reserve); @@ -189,7 +265,7 @@ early_param("vmalloc", parse_vmalloc); /* * Determine for each controller where its lowmem is mapped and how much of * it is mapped there. On controller zero, the first few megabytes are - * already mapped in as code at MEM_SV_INTRPT, so in principle we could + * already mapped in as code at MEM_SV_START, so in principle we could * start our data mappings higher up, but for now we don't bother, to avoid * additional confusion. * @@ -270,7 +346,7 @@ static void *__init setup_pa_va_mapping(void) * This is up to 4 mappings for lowmem, one mapping per memory * controller, plus one for our text segment. */ -static void __cpuinit store_permanent_mappings(void) +static void store_permanent_mappings(void) { int i; @@ -287,8 +363,8 @@ static void __cpuinit store_permanent_mappings(void) hv_store_mapping(addr, pages << PAGE_SHIFT, pa); } - hv_store_mapping((HV_VirtAddr)_stext, - (uint32_t)(_einittext - _stext), 0); + hv_store_mapping((HV_VirtAddr)_text, + (uint32_t)(_einittext - _text), 0); } /* @@ -309,6 +385,7 @@ static void __init setup_memory(void) #if defined(CONFIG_HIGHMEM) || defined(__tilegx__) long lowmem_pages; #endif + unsigned long physpages = 0; /* We are using a char to hold the cpu_2_node[] mapping */ BUILD_BUG_ON(MAX_NUMNODES > 127); @@ -368,8 +445,8 @@ static void __init setup_memory(void) continue; } } - if (num_physpages + PFN_DOWN(range.size) > maxmem_pfn) { - int max_size = maxmem_pfn - num_physpages; + if (physpages + PFN_DOWN(range.size) > maxmem_pfn) { + int max_size = maxmem_pfn - physpages; if (max_size > 0) { pr_err("Maxmem reduced node %d to %d pages\n", i, max_size); @@ -397,7 +474,7 @@ static void __init setup_memory(void) continue; } #endif -#ifdef CONFIG_PCI +#if defined(CONFIG_PCI) && !defined(__tilegx__) /* * Blocks that overlap the pci reserved region must * have enough space to hold the maximum percpu data @@ -426,7 +503,7 @@ static void __init setup_memory(void) node_start_pfn[i] = start; node_end_pfn[i] = end; node_controller[i] = range.controller; - num_physpages += size; + physpages += size; max_pfn = end; /* Mark node as online */ @@ -445,7 +522,7 @@ static void __init setup_memory(void) * we're willing to use at 8 million pages (32GB of 4KB pages). */ cap = 8 * 1024 * 1024; /* 8 million pages */ - if (num_physpages > cap) { + if (physpages > cap) { int num_nodes = num_online_nodes(); int cap_each = cap / num_nodes; unsigned long dropped_pages = 0; @@ -456,10 +533,10 @@ static void __init setup_memory(void) node_end_pfn[i] = node_start_pfn[i] + cap_each; } } - num_physpages -= dropped_pages; + physpages -= dropped_pages; pr_warning("Only using %ldMB memory;" " ignoring %ldMB.\n", - num_physpages >> (20 - PAGE_SHIFT), + physpages >> (20 - PAGE_SHIFT), dropped_pages >> (20 - PAGE_SHIFT)); pr_warning("Consider using a larger page size.\n"); } @@ -477,7 +554,7 @@ static void __init setup_memory(void) lowmem_pages = (mappable_physpages > MAXMEM_PFN) ? MAXMEM_PFN : mappable_physpages; - highmem_pages = (long) (num_physpages - lowmem_pages); + highmem_pages = (long) (physpages - lowmem_pages); pr_notice("%ldMB HIGHMEM available.\n", pages_to_mb(highmem_pages > 0 ? highmem_pages : 0)); @@ -494,7 +571,6 @@ static void __init setup_memory(void) pr_warning("Use a HIGHMEM enabled kernel.\n"); max_low_pfn = MAXMEM_PFN; max_pfn = MAXMEM_PFN; - num_physpages = MAXMEM_PFN; node_end_pfn[0] = MAXMEM_PFN; } else { pr_notice("%ldMB memory available.\n", @@ -519,42 +595,125 @@ static void __init setup_memory(void) #endif } -static void __init setup_bootmem_allocator(void) +/* + * On 32-bit machines, we only put bootmem on the low controller, + * since PAs > 4GB can't be used in bootmem. In principle one could + * imagine, e.g., multiple 1 GB controllers all of which could support + * bootmem, but in practice using controllers this small isn't a + * particularly interesting scenario, so we just keep it simple and + * use only the first controller for bootmem on 32-bit machines. + */ +static inline int node_has_bootmem(int nid) +{ +#ifdef CONFIG_64BIT + return 1; +#else + return nid == 0; +#endif +} + +static inline unsigned long alloc_bootmem_pfn(int nid, + unsigned long size, + unsigned long goal) +{ + void *kva = __alloc_bootmem_node(NODE_DATA(nid), size, + PAGE_SIZE, goal); + unsigned long pfn = kaddr_to_pfn(kva); + BUG_ON(goal && PFN_PHYS(pfn) != goal); + return pfn; +} + +static void __init setup_bootmem_allocator_node(int i) { - unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn; + unsigned long start, end, mapsize, mapstart; + + if (node_has_bootmem(i)) { + NODE_DATA(i)->bdata = &bootmem_node_data[i]; + } else { + /* Share controller zero's bdata for now. */ + NODE_DATA(i)->bdata = &bootmem_node_data[0]; + return; + } - /* Provide a node 0 bdata. */ - NODE_DATA(0)->bdata = &node0_bdata; + /* Skip up to after the bss in node 0. */ + start = (i == 0) ? min_low_pfn : node_start_pfn[i]; -#ifdef CONFIG_PCI - /* Don't let boot memory alias the PCI region. */ - last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn); + /* Only lowmem, if we're a HIGHMEM build. */ +#ifdef CONFIG_HIGHMEM + end = node_lowmem_end_pfn[i]; #else - last_alloc_pfn = max_low_pfn; + end = node_end_pfn[i]; #endif - /* - * Initialize the boot-time allocator (with low memory only): - * The first argument says where to put the bitmap, and the - * second says where the end of allocatable memory is. - */ - bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn); + /* No memory here. */ + if (end == start) + return; + + /* Figure out where the bootmem bitmap is located. */ + mapsize = bootmem_bootmap_pages(end - start); + if (i == 0) { + /* Use some space right before the heap on node 0. */ + mapstart = start; + start += mapsize; + } else { + /* Allocate bitmap on node 0 to avoid page table issues. */ + mapstart = alloc_bootmem_pfn(0, PFN_PHYS(mapsize), 0); + } + /* Initialize a node. */ + init_bootmem_node(NODE_DATA(i), mapstart, start, end); + + /* Free all the space back into the allocator. */ + free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start)); + +#if defined(CONFIG_PCI) && !defined(__tilegx__) /* - * Let the bootmem allocator use all the space we've given it - * except for its own bitmap. + * Throw away any memory aliased by the PCI region. */ - first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size); - if (first_alloc_pfn >= last_alloc_pfn) - early_panic("Not enough memory on controller 0 for bootmem\n"); + if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start) { + start = max(pci_reserve_start_pfn, start); + end = min(pci_reserve_end_pfn, end); + reserve_bootmem(PFN_PHYS(start), PFN_PHYS(end - start), + BOOTMEM_EXCLUSIVE); + } +#endif +} + +static void __init setup_bootmem_allocator(void) +{ + int i; + for (i = 0; i < MAX_NUMNODES; ++i) + setup_bootmem_allocator_node(i); + + /* Reserve any memory excluded by "memmap" arguments. */ + for (i = 0; i < memmap_nr; ++i) { + struct memmap_entry *m = &memmap_map[i]; + reserve_bootmem(m->addr, m->size, BOOTMEM_DEFAULT); + } - free_bootmem(PFN_PHYS(first_alloc_pfn), - PFN_PHYS(last_alloc_pfn - first_alloc_pfn)); +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) { + /* Make sure the initrd memory region is not modified. */ + if (reserve_bootmem(initrd_start, initrd_end - initrd_start, + BOOTMEM_EXCLUSIVE)) { + pr_crit("The initrd memory region has been polluted. Disabling it.\n"); + initrd_start = 0; + initrd_end = 0; + } else { + /* + * Translate initrd_start & initrd_end from PA to VA for + * future access. + */ + initrd_start += PAGE_OFFSET; + initrd_end += PAGE_OFFSET; + } + } +#endif #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) - reserve_bootmem(crashk_res.start, - crashk_res.end - crashk_res.start + 1, 0); + reserve_bootmem(crashk_res.start, resource_size(&crashk_res), + BOOTMEM_DEFAULT); #endif } @@ -581,19 +740,13 @@ static int __init percpu_size(void) return size; } -static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal) -{ - void *kva = __alloc_bootmem(size, PAGE_SIZE, goal); - unsigned long pfn = kaddr_to_pfn(kva); - BUG_ON(goal && PFN_PHYS(pfn) != goal); - return pfn; -} - static void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES] = { 0 }; int size = percpu_size(); int num_cpus = smp_height * smp_width; + const unsigned long dma_end = (1UL << (32 - PAGE_SHIFT)); + int i; for (i = 0; i < num_cpus; ++i) @@ -626,21 +779,22 @@ static void __init zone_sizes_init(void) * though, there'll be no lowmem, so we just alloc_bootmem * the memmap. There will be no percpu memory either. */ - if (__pfn_to_highbits(start) == 0) { - /* In low PAs, allocate via bootmem. */ + if (i != 0 && cpu_isset(i, isolnodes)) { + node_memmap_pfn[i] = + alloc_bootmem_pfn(0, memmap_size, 0); + BUG_ON(node_percpu[i] != 0); + } else if (node_has_bootmem(start)) { unsigned long goal = 0; node_memmap_pfn[i] = - alloc_bootmem_pfn(memmap_size, goal); + alloc_bootmem_pfn(i, memmap_size, 0); if (kdata_huge) goal = PFN_PHYS(lowmem_end) - node_percpu[i]; if (node_percpu[i]) node_percpu_pfn[i] = - alloc_bootmem_pfn(node_percpu[i], goal); - } else if (cpu_isset(i, isolnodes)) { - node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0); - BUG_ON(node_percpu[i] != 0); + alloc_bootmem_pfn(i, node_percpu[i], + goal); } else { - /* In high PAs, just reserve some pages. */ + /* In non-bootmem zones, just reserve some pages. */ node_memmap_pfn[i] = node_free_pfn[i]; node_free_pfn[i] += PFN_UP(memmap_size); if (!kdata_huge) { @@ -664,23 +818,24 @@ static void __init zone_sizes_init(void) zones_size[ZONE_NORMAL] = end - start; #endif - /* - * Everyone shares node 0's bootmem allocator, but - * we use alloc_remap(), above, to put the actual - * struct page array on the individual controllers, - * which is most of the data that we actually care about. - * We can't place bootmem allocators on the other - * controllers since the bootmem allocator can only - * operate on 32-bit physical addresses. - */ - NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; + if (start < dma_end) { + zones_size[ZONE_DMA] = min(zones_size[ZONE_NORMAL], + dma_end - start); + zones_size[ZONE_NORMAL] -= zones_size[ZONE_DMA]; + } else { + zones_size[ZONE_DMA] = 0; + } + + /* Take zone metadata from controller 0 if we're isolnode. */ + if (node_isset(i, isolnodes)) + NODE_DATA(i)->bdata = &bootmem_node_data[0]; free_area_init_node(i, zones_size, start, NULL); printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", PFN_UP(node_percpu[i])); /* Track the type of memory on each node */ - if (zones_size[ZONE_NORMAL]) + if (zones_size[ZONE_NORMAL] || zones_size[ZONE_DMA]) node_set_state(i, N_NORMAL_MEMORY); #ifdef CONFIG_HIGHMEM if (end != start) @@ -856,13 +1011,29 @@ subsys_initcall(topology_init); #endif /* CONFIG_NUMA */ +/* + * Initialize hugepage support on this cpu. We do this on all cores + * early in boot: before argument parsing for the boot cpu, and after + * argument parsing but before the init functions run on the secondaries. + * So the values we set up here in the hypervisor may be overridden on + * the boot cpu as arguments are parsed. + */ +static void init_super_pages(void) +{ +#ifdef CONFIG_HUGETLB_SUPER_PAGES + int i; + for (i = 0; i < HUGE_SHIFT_ENTRIES; ++i) + hv_set_pte_super_shift(i, huge_shift[i]); +#endif +} + /** * setup_cpu() - Do all necessary per-cpu, tile-specific initialization. * @boot: Is this the boot cpu? * * Called from setup_arch() on the boot cpu, or online_secondary(). */ -void __cpuinit setup_cpu(int boot) +void setup_cpu(int boot) { /* The boot cpu sets up its permanent mappings much earlier. */ if (!boot) @@ -873,9 +1044,6 @@ void __cpuinit setup_cpu(int boot) arch_local_irq_unmask(INT_DMATLB_MISS); arch_local_irq_unmask(INT_DMATLB_ACCESS); #endif -#if CHIP_HAS_SN_PROC() - arch_local_irq_unmask(INT_SNITLB_MISS); -#endif #ifdef __tilegx__ arch_local_irq_unmask(INT_SINGLE_STEP_K); #endif @@ -890,10 +1058,6 @@ void __cpuinit setup_cpu(int boot) /* Static network is not restricted. */ __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1); #endif -#if CHIP_HAS_SN_PROC() - __insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1); - __insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1); -#endif /* * Set the MPL for interrupt control 0 & 1 to the corresponding @@ -910,10 +1074,14 @@ void __cpuinit setup_cpu(int boot) /* Reset the network state on this cpu. */ reset_network_state(); #endif + + init_super_pages(); } +#ifdef CONFIG_BLK_DEV_INITRD + static int __initdata set_initramfs_file; -static char __initdata initramfs_file[128] = "initramfs.cpio.gz"; +static char __initdata initramfs_file[128] = "initramfs"; static int __init setup_initramfs_file(char *str) { @@ -927,9 +1095,9 @@ static int __init setup_initramfs_file(char *str) early_param("initramfs_file", setup_initramfs_file); /* - * We look for an additional "initramfs.cpio.gz" file in the hvfs. - * If there is one, we allocate some memory for it and it will be - * unpacked to the initramfs after any built-in initramfs_data. + * We look for a file called "initramfs" in the hvfs. If there is one, we + * allocate some memory for it and it will be unpacked to the initramfs. + * If it's compressed, the initd code will uncompress it first. */ static void __init load_hv_initrd(void) { @@ -937,12 +1105,22 @@ static void __init load_hv_initrd(void) int fd, rc; void *initrd; + /* If initrd has already been set, skip initramfs file in hvfs. */ + if (initrd_start) + return; + fd = hv_fs_findfile((HV_VirtAddr) initramfs_file); if (fd == HV_ENOENT) { - if (set_initramfs_file) + if (set_initramfs_file) { pr_warning("No such hvfs initramfs file '%s'\n", initramfs_file); - return; + return; + } else { + /* Try old backwards-compatible name. */ + fd = hv_fs_findfile((HV_VirtAddr)"initramfs.cpio.gz"); + if (fd == HV_ENOENT) + return; + } } BUG_ON(fd < 0); stat = hv_fs_fstat(fd); @@ -969,6 +1147,29 @@ void __init free_initrd_mem(unsigned long begin, unsigned long end) free_bootmem(__pa(begin), end - begin); } +static int __init setup_initrd(char *str) +{ + char *endp; + unsigned long initrd_size; + + initrd_size = str ? simple_strtoul(str, &endp, 0) : 0; + if (initrd_size == 0 || *endp != '@') + return -EINVAL; + + initrd_start = simple_strtoul(endp+1, &endp, 0); + if (initrd_start == 0) + return -EINVAL; + + initrd_end = initrd_start + initrd_size; + + return 0; +} +early_param("initrd", setup_initrd); + +#else +static inline void load_hv_initrd(void) {} +#endif /* CONFIG_BLK_DEV_INITRD */ + static void __init validate_hv(void) { /* @@ -1032,7 +1233,7 @@ static void __init validate_va(void) #ifndef __tilegx__ /* FIXME: GX: probably some validation relevant here */ /* * Similarly, make sure we're only using allowed VAs. - * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_INTRPT, + * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_START, * and 0 .. KERNEL_HIGH_VADDR. * In addition, make sure we CAN'T use the end of memory, since * we use the last chunk of each pgd for the pgd_list. @@ -1047,7 +1248,7 @@ static void __init validate_va(void) if (range.size == 0) break; if (range.start <= MEM_USER_INTRPT && - range.start + range.size >= MEM_HV_INTRPT) + range.start + range.size >= MEM_HV_START) user_kernel_ok = 1; if (range.start == 0) max_va = range.size; @@ -1065,8 +1266,7 @@ static void __init validate_va(void) if ((long)VMALLOC_START >= 0) early_panic( "Linux VMALLOC region below the 2GB line (%#lx)!\n" - "Reconfigure the kernel with fewer NR_HUGE_VMAPS\n" - "or smaller VMALLOC_RESERVE.\n", + "Reconfigure the kernel with smaller VMALLOC_RESERVE.\n", VMALLOC_START); #endif } @@ -1081,7 +1281,6 @@ static void __init validate_va(void) struct cpumask __write_once cpu_lotar_map; EXPORT_SYMBOL(cpu_lotar_map); -#if CHIP_HAS_CBOX_HOME_MAP() /* * hash_for_home_map lists all the tiles that hash-for-home data * will be cached on. Note that this may includes tiles that are not @@ -1091,11 +1290,10 @@ EXPORT_SYMBOL(cpu_lotar_map); */ struct cpumask hash_for_home_map; EXPORT_SYMBOL(hash_for_home_map); -#endif /* * cpu_cacheable_map lists all the cpus whose caches the hypervisor can - * flush on our behalf. It is set to cpu_possible_map OR'ed with + * flush on our behalf. It is set to cpu_possible_mask OR'ed with * hash_for_home_map, and it is what should be passed to * hv_flush_remote() to flush all caches. Note that if there are * dedicated hypervisor driver tiles that have authorized use of their @@ -1181,20 +1379,16 @@ static void __init setup_cpu_maps(void) sizeof(cpu_lotar_map)); if (rc < 0) { pr_err("warning: no HV_INQ_TILES_LOTAR; using AVAIL\n"); - cpu_lotar_map = cpu_possible_map; + cpu_lotar_map = *cpu_possible_mask; } -#if CHIP_HAS_CBOX_HOME_MAP() /* Retrieve set of CPUs used for hash-for-home caching */ rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE, (HV_VirtAddr) hash_for_home_map.bits, sizeof(hash_for_home_map)); if (rc < 0) early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc); - cpumask_or(&cpu_cacheable_map, &cpu_possible_map, &hash_for_home_map); -#else - cpu_cacheable_map = cpu_possible_map; -#endif + cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map); } @@ -1254,7 +1448,7 @@ void __init setup_arch(char **cmdline_p) setup_cpu_maps(); -#ifdef CONFIG_PCI +#if defined(CONFIG_PCI) && !defined(__tilegx__) /* * Initialize the PCI structures. This is done before memory * setup so that we know whether or not a pci_reserve region @@ -1283,6 +1477,10 @@ void __init setup_arch(char **cmdline_p) * any memory using the bootmem allocator. */ +#ifdef CONFIG_SWIOTLB + swiotlb_init(0); +#endif + paging_init(); setup_numa_mapping(); zone_sizes_init(); @@ -1385,26 +1583,26 @@ void __init setup_per_cpu_areas(void) for (i = 0; i < size; i += PAGE_SIZE, ++pfn, ++pg) { /* Update the vmalloc mapping and page home. */ - pte_t *ptep = - virt_to_pte(NULL, (unsigned long)ptr + i); + unsigned long addr = (unsigned long)ptr + i; + pte_t *ptep = virt_to_kpte(addr); pte_t pte = *ptep; BUG_ON(pfn != pte_pfn(pte)); pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3); pte = set_remote_cache_cpu(pte, cpu); - set_pte(ptep, pte); + set_pte_at(&init_mm, addr, ptep, pte); /* Update the lowmem mapping for consistency. */ lowmem_va = (unsigned long)pfn_to_kaddr(pfn); - ptep = virt_to_pte(NULL, lowmem_va); + ptep = virt_to_kpte(lowmem_va); if (pte_huge(*ptep)) { printk(KERN_DEBUG "early shatter of huge page" " at %#lx\n", lowmem_va); shatter_pmd((pmd_t *)ptep); - ptep = virt_to_pte(NULL, lowmem_va); + ptep = virt_to_kpte(lowmem_va); BUG_ON(pte_huge(*ptep)); } BUG_ON(pfn != pte_pfn(*ptep)); - set_pte(ptep, pte); + set_pte_at(&init_mm, lowmem_va, ptep, pte); } } @@ -1433,16 +1631,17 @@ static struct resource code_resource = { }; /* - * We reserve all resources above 4GB so that PCI won't try to put - * mappings above 4GB; the standard allows that for some devices but - * the probing code trunates values to 32 bits. + * On Pro, we reserve all resources above 4GB so that PCI won't try to put + * mappings above 4GB. */ -#ifdef CONFIG_PCI +#if defined(CONFIG_PCI) && !defined(__tilegx__) static struct resource* __init insert_non_bus_resource(void) { struct resource *res = kzalloc(sizeof(struct resource), GFP_ATOMIC); + if (!res) + return NULL; res->name = "Non-Bus Physical Address Space"; res->start = (1ULL << 32); res->end = -1LL; @@ -1456,11 +1655,13 @@ insert_non_bus_resource(void) #endif static struct resource* __init -insert_ram_resource(u64 start_pfn, u64 end_pfn) +insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved) { struct resource *res = kzalloc(sizeof(struct resource), GFP_ATOMIC); - res->name = "System RAM"; + if (!res) + return NULL; + res->name = reserved ? "Reserved" : "System RAM"; res->start = start_pfn << PAGE_SHIFT; res->end = (end_pfn << PAGE_SHIFT) - 1; res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; @@ -1480,10 +1681,9 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn) static int __init request_standard_resources(void) { int i; - enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; + enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; - iomem_resource.end = -1LL; -#ifdef CONFIG_PCI +#if defined(CONFIG_PCI) && !defined(__tilegx__) insert_non_bus_resource(); #endif @@ -1491,16 +1691,16 @@ static int __init request_standard_resources(void) u64 start_pfn = node_start_pfn[i]; u64 end_pfn = node_end_pfn[i]; -#ifdef CONFIG_PCI +#if defined(CONFIG_PCI) && !defined(__tilegx__) if (start_pfn <= pci_reserve_start_pfn && end_pfn > pci_reserve_start_pfn) { if (end_pfn > pci_reserve_end_pfn) insert_ram_resource(pci_reserve_end_pfn, - end_pfn); + end_pfn, 0); end_pfn = pci_reserve_start_pfn; } #endif - insert_ram_resource(start_pfn, end_pfn); + insert_ram_resource(start_pfn, end_pfn, 0); } code_resource.start = __pa(_text - CODE_DELTA); @@ -1511,6 +1711,13 @@ static int __init request_standard_resources(void) insert_resource(&iomem_resource, &code_resource); insert_resource(&iomem_resource, &data_resource); + /* Mark any "memmap" regions busy for the resource manager. */ + for (i = 0; i < memmap_nr; ++i) { + struct memmap_entry *m = &memmap_map[i]; + insert_ram_resource(PFN_DOWN(m->addr), + PFN_UP(m->addr + m->size - 1), 1); + } + #ifdef CONFIG_KEXEC insert_resource(&iomem_resource, &crashk_res); #endif diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 1260321155f..d1d026f0126 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -33,20 +33,11 @@ #include <asm/ucontext.h> #include <asm/sigframe.h> #include <asm/syscalls.h> +#include <asm/vdso.h> #include <arch/interrupts.h> #define DEBUG_SIG 0 -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - - -SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss, - stack_t __user *, uoss, struct pt_regs *, regs) -{ - return do_sigaltstack(uss, uoss, regs->sp); -} - - /* * Do a signal return; undo the signal stack. */ @@ -78,9 +69,17 @@ int restore_sigcontext(struct pt_regs *regs, return err; } +void signal_fault(const char *type, struct pt_regs *regs, + void __user *frame, int sig) +{ + trace_unhandled_signal(type, regs, (unsigned long)frame, SIGSEGV); + force_sigsegv(sig, current); +} + /* The assembly shim for this function arranges to ignore the return value. */ -SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) +SYSCALL_DEFINE0(rt_sigreturn) { + struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->sp); sigset_t set; @@ -90,22 +89,18 @@ SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; - if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) + if (restore_altstack(&frame->uc.uc_stack)) goto badframe; return 0; badframe: - force_sig(SIGSEGV, current); + signal_fault("bad sigreturn frame", regs, frame, 0); return 0; } @@ -190,17 +185,13 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __clear_user(&frame->save_area, sizeof(frame->save_area)); err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(NULL, &frame->uc.uc_link); - err |= __put_user((void __user *)(current->sas_ss_sp), - &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= __save_altstack(&frame->uc.uc_stack, regs->sp); err |= setup_sigcontext(&frame->uc.uc_mcontext, regs); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) goto give_sigsegv; - restorer = VDSO_BASE; + restorer = VDSO_SYM(&__vdso_rt_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = (unsigned long) ka->sa.sa_restorer; @@ -219,19 +210,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->regs[1] = (unsigned long) &frame->info; regs->regs[2] = (unsigned long) &frame->uc; regs->flags |= PT_FLAGS_CALLER_SAVES; - - /* - * Notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - return 0; give_sigsegv: - force_sigsegv(sig, current); + signal_fault("bad setup frame", regs, frame, sig); return -EFAULT; } @@ -239,13 +221,13 @@ give_sigsegv: * OK, we're invoking a handler */ -static int handle_signal(unsigned long sig, siginfo_t *info, - struct k_sigaction *ka, sigset_t *oldset, +static void handle_signal(unsigned long sig, siginfo_t *info, + struct k_sigaction *ka, struct pt_regs *regs) { + sigset_t *oldset = sigmask_to_save(); int ret; - /* Are we from a system call? */ if (regs->faultnum == INT_SWINT_1) { /* If so, check system call restarting.. */ @@ -276,21 +258,10 @@ static int handle_signal(unsigned long sig, siginfo_t *info, else #endif ret = setup_rt_frame(sig, ka, info, oldset, regs); - if (ret == 0) { - /* This code is only called from system calls or from - * the work_pending path in the return-to-user code, and - * either way we can re-enable interrupts unconditionally. - */ - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, - ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - - return ret; + if (ret) + return; + signal_delivered(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); } /* @@ -303,7 +274,6 @@ void do_signal(struct pt_regs *regs) siginfo_t info; int signr; struct k_sigaction ka; - sigset_t *oldset; /* * i386 will check if we're coming from kernel mode and bail out @@ -312,24 +282,10 @@ void do_signal(struct pt_regs *regs) * helpful, we can reinstate the check on "!user_mode(regs)". */ - if (current_thread_info()->status & TS_RESTORE_SIGMASK) - oldset = ¤t->saved_sigmask; - else - oldset = ¤t->blocked; - signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { /* Whee! Actually deliver the signal. */ - if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { - /* - * A signal was successfully delivered; the saved - * sigmask will have been stored in the signal frame, - * and will be restored by sigreturn, so we can simply - * clear the TS_RESTORE_SIGMASK flag. - */ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - } - + handle_signal(signr, &info, &ka, regs); goto done; } @@ -354,12 +310,123 @@ void do_signal(struct pt_regs *regs) } /* If there's no signal to deliver, just put the saved sigmask back. */ - if (current_thread_info()->status & TS_RESTORE_SIGMASK) { - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); - } + restore_saved_sigmask(); done: /* Avoid double syscall restart if there are nested signals. */ regs->faultnum = INT_SWINT_1_SIGRETURN; } + +int show_unhandled_signals = 1; + +static int __init crashinfo(char *str) +{ + const char *word; + + if (*str == '\0') + show_unhandled_signals = 2; + else if (*str != '=' || kstrtoint(++str, 0, &show_unhandled_signals) != 0) + return 0; + + switch (show_unhandled_signals) { + case 0: + word = "No"; + break; + case 1: + word = "One-line"; + break; + default: + word = "Detailed"; + break; + } + pr_info("%s crash reports will be generated on the console\n", word); + return 1; +} +__setup("crashinfo", crashinfo); + +static void dump_mem(void __user *address) +{ + void __user *addr; + enum { region_size = 256, bytes_per_line = 16 }; + int i, j, k; + int found_readable_mem = 0; + + pr_err("\n"); + if (!access_ok(VERIFY_READ, address, 1)) { + pr_err("Not dumping at address 0x%lx (kernel address)\n", + (unsigned long)address); + return; + } + + addr = (void __user *) + (((unsigned long)address & -bytes_per_line) - region_size/2); + if (addr > address) + addr = NULL; + for (i = 0; i < region_size; + addr += bytes_per_line, i += bytes_per_line) { + unsigned char buf[bytes_per_line]; + char line[100]; + if (copy_from_user(buf, addr, bytes_per_line)) + continue; + if (!found_readable_mem) { + pr_err("Dumping memory around address 0x%lx:\n", + (unsigned long)address); + found_readable_mem = 1; + } + j = sprintf(line, REGFMT":", (unsigned long)addr); + for (k = 0; k < bytes_per_line; ++k) + j += sprintf(&line[j], " %02x", buf[k]); + pr_err("%s\n", line); + } + if (!found_readable_mem) + pr_err("No readable memory around address 0x%lx\n", + (unsigned long)address); +} + +void trace_unhandled_signal(const char *type, struct pt_regs *regs, + unsigned long address, int sig) +{ + struct task_struct *tsk = current; + + if (show_unhandled_signals == 0) + return; + + /* If the signal is handled, don't show it here. */ + if (!is_global_init(tsk)) { + void __user *handler = + tsk->sighand->action[sig-1].sa.sa_handler; + if (handler != SIG_IGN && handler != SIG_DFL) + return; + } + + /* Rate-limit the one-line output, not the detailed output. */ + if (show_unhandled_signals <= 1 && !printk_ratelimit()) + return; + + printk("%s%s[%d]: %s at %lx pc "REGFMT" signal %d", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), type, address, regs->pc, sig); + + print_vma_addr(KERN_CONT " in ", regs->pc); + + printk(KERN_CONT "\n"); + + if (show_unhandled_signals > 1) { + switch (sig) { + case SIGILL: + case SIGFPE: + case SIGSEGV: + case SIGBUS: + pr_err("User crash: signal %d," + " trap %ld, address 0x%lx\n", + sig, regs->faultnum, address); + show_regs(regs); + dump_mem((void __user *)address); + break; + default: + pr_err("User crash: signal %d, trap %ld\n", + sig, regs->faultnum); + break; + } + } +} diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 84a729e06ec..de07fa7d131 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -12,41 +12,30 @@ * more details. * * A code-rewriter that enables instruction single-stepping. - * Derived from iLib's single-stepping code. */ -#ifndef __tilegx__ /* Hardware support for single step unavailable. */ - -/* These functions are only used on the TILE platform */ +#include <linux/smp.h> +#include <linux/ptrace.h> #include <linux/slab.h> #include <linux/thread_info.h> #include <linux/uaccess.h> #include <linux/mman.h> #include <linux/types.h> #include <linux/err.h> +#include <linux/prctl.h> #include <asm/cacheflush.h> -#include <asm/opcode-tile.h> -#include <asm/opcode_constants.h> +#include <asm/traps.h> +#include <asm/uaccess.h> +#include <asm/unaligned.h> #include <arch/abi.h> +#include <arch/spr_def.h> +#include <arch/opcode.h> -#define signExtend17(val) sign_extend((val), 17) -#define TILE_X1_MASK (0xffffffffULL << 31) - -int unaligned_printk; -static int __init setup_unaligned_printk(char *str) -{ - long val; - if (strict_strtol(str, 0, &val) != 0) - return 0; - unaligned_printk = val; - pr_info("Printk for each unaligned data accesses is %s\n", - unaligned_printk ? "enabled" : "disabled"); - return 1; -} -__setup("unaligned_printk=", setup_unaligned_printk); +#ifndef __tilegx__ /* Hardware support for single step unavailable. */ -unsigned int unaligned_fixup_count; +#define signExtend17(val) sign_extend((val), 17) +#define TILE_X1_MASK (0xffffffffULL << 31) enum mem_op { MEMOP_NONE, @@ -56,12 +45,13 @@ enum mem_op { MEMOP_STORE_POSTINCR }; -static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) +static inline tilepro_bundle_bits set_BrOff_X1(tilepro_bundle_bits n, + s32 offset) { - tile_bundle_bits result; + tilepro_bundle_bits result; /* mask out the old offset */ - tile_bundle_bits mask = create_BrOff_X1(-1); + tilepro_bundle_bits mask = create_BrOff_X1(-1); result = n & (~mask); /* or in the new offset */ @@ -70,10 +60,11 @@ static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) return result; } -static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src) +static inline tilepro_bundle_bits move_X1(tilepro_bundle_bits n, int dest, + int src) { - tile_bundle_bits result; - tile_bundle_bits op; + tilepro_bundle_bits result; + tilepro_bundle_bits op; result = n & (~TILE_X1_MASK); @@ -87,13 +78,13 @@ static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src) return result; } -static inline tile_bundle_bits nop_X1(tile_bundle_bits n) +static inline tilepro_bundle_bits nop_X1(tilepro_bundle_bits n) { return move_X1(n, TREG_ZERO, TREG_ZERO); } -static inline tile_bundle_bits addi_X1( - tile_bundle_bits n, int dest, int src, int imm) +static inline tilepro_bundle_bits addi_X1( + tilepro_bundle_bits n, int dest, int src, int imm) { n &= ~TILE_X1_MASK; @@ -107,18 +98,29 @@ static inline tile_bundle_bits addi_X1( return n; } -static tile_bundle_bits rewrite_load_store_unaligned( +static tilepro_bundle_bits rewrite_load_store_unaligned( struct single_step_state *state, - tile_bundle_bits bundle, + tilepro_bundle_bits bundle, struct pt_regs *regs, enum mem_op mem_op, int size, int sign_ext) { unsigned char __user *addr; int val_reg, addr_reg, err, val; + int align_ctl; + + align_ctl = unaligned_fixup; + switch (task_thread_info(current)->align_ctl) { + case PR_UNALIGN_NOPRINT: + align_ctl = 1; + break; + case PR_UNALIGN_SIGBUS: + align_ctl = 0; + break; + } /* Get address and value registers */ - if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) { + if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) { addr_reg = get_SrcA_Y2(bundle); val_reg = get_SrcBDest_Y2(bundle); } else if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { @@ -153,9 +155,25 @@ static tile_bundle_bits rewrite_load_store_unaligned( if (((unsigned long)addr % size) == 0) return bundle; -#ifndef __LITTLE_ENDIAN -# error We assume little-endian representation with copy_xx_user size 2 here -#endif + /* + * Return SIGBUS with the unaligned address, if requested. + * Note that we return SIGBUS even for completely invalid addresses + * as long as they are in fact unaligned; this matches what the + * tilepro hardware would be doing, if it could provide us with the + * actual bad address in an SPR, which it doesn't. + */ + if (align_ctl == 0) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = addr + }; + trace_unhandled_signal("unaligned trap", regs, + (unsigned long)addr, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return (tilepro_bundle_bits) 0; + } + /* Handle unaligned load/store */ if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { unsigned short val_16; @@ -176,28 +194,31 @@ static tile_bundle_bits rewrite_load_store_unaligned( state->update = 1; } } else { + unsigned short val_16; val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg]; - err = copy_to_user(addr, &val, size); + switch (size) { + case 2: + val_16 = val; + err = copy_to_user(addr, &val_16, sizeof(val_16)); + break; + case 4: + err = copy_to_user(addr, &val, sizeof(val)); + break; + default: + BUG(); + } } if (err) { siginfo_t info = { - .si_signo = SIGSEGV, - .si_code = SEGV_MAPERR, - .si_addr = addr - }; - force_sig_info(info.si_signo, &info, current); - return (tile_bundle_bits) 0; - } - - if (unaligned_fixup == 0) { - siginfo_t info = { .si_signo = SIGBUS, .si_code = BUS_ADRALN, .si_addr = addr }; + trace_unhandled_signal("bad address for unaligned fixup", regs, + (unsigned long)addr, SIGBUS); force_sig_info(info.si_signo, &info, current); - return (tile_bundle_bits) 0; + return (tilepro_bundle_bits) 0; } if (unaligned_printk || unaligned_fixup_count == 0) { @@ -225,7 +246,7 @@ P("\n"); } ++unaligned_fixup_count; - if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) { + if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) { /* Convert the Y2 instruction to a prefetch. */ bundle &= ~(create_SrcBDest_Y2(-1) | create_Opcode_Y2(-1)); @@ -266,7 +287,7 @@ void single_step_execve(void) ti->step_state = NULL; } -/** +/* * single_step_once() - entry point when single stepping has been triggered. * @regs: The machine register state * @@ -285,20 +306,31 @@ void single_step_execve(void) */ void single_step_once(struct pt_regs *regs) { - extern tile_bundle_bits __single_step_ill_insn; - extern tile_bundle_bits __single_step_j_insn; - extern tile_bundle_bits __single_step_addli_insn; - extern tile_bundle_bits __single_step_auli_insn; + extern tilepro_bundle_bits __single_step_ill_insn; + extern tilepro_bundle_bits __single_step_j_insn; + extern tilepro_bundle_bits __single_step_addli_insn; + extern tilepro_bundle_bits __single_step_auli_insn; struct thread_info *info = (void *)current_thread_info(); struct single_step_state *state = info->step_state; int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); - tile_bundle_bits __user *buffer, *pc; - tile_bundle_bits bundle; + tilepro_bundle_bits __user *buffer, *pc; + tilepro_bundle_bits bundle; int temp_reg; int target_reg = TREG_LR; int err; enum mem_op mem_op = MEMOP_NONE; int size = 0, sign_ext = 0; /* happy compiler */ + int align_ctl; + + align_ctl = unaligned_fixup; + switch (task_thread_info(current)->align_ctl) { + case PR_UNALIGN_NOPRINT: + align_ctl = 1; + break; + case PR_UNALIGN_SIGBUS: + align_ctl = 0; + break; + } asm( " .pushsection .rodata.single_step\n" @@ -318,6 +350,14 @@ void single_step_once(struct pt_regs *regs) " .popsection\n" ); + /* + * Enable interrupts here to allow touching userspace and the like. + * The callers expect this: do_trap() already has interrupts + * enabled, and do_work_pending() handles functions that enable + * interrupts internally. + */ + local_irq_enable(); + if (state == NULL) { /* allocate a page of writable, executable memory */ state = kmalloc(sizeof(struct single_step_state), GFP_KERNEL); @@ -327,12 +367,10 @@ void single_step_once(struct pt_regs *regs) } /* allocate a cache line of writable, executable memory */ - down_write(¤t->mm->mmap_sem); - buffer = (void __user *) do_mmap(NULL, 0, 64, + buffer = (void __user *) vm_mmap(NULL, 0, 64, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0); - up_write(¤t->mm->mmap_sem); if (IS_ERR((void __force *)buffer)) { kfree(state); @@ -365,7 +403,7 @@ void single_step_once(struct pt_regs *regs) if (regs->faultnum == INT_SWINT_1) regs->pc -= 8; - pc = (tile_bundle_bits __user *)(regs->pc); + pc = (tilepro_bundle_bits __user *)(regs->pc); if (get_user(bundle, pc) != 0) { pr_err("Couldn't read instruction at %p trying to step\n", pc); return; @@ -377,7 +415,7 @@ void single_step_once(struct pt_regs *regs) state->branch_next_pc = 0; state->update = 0; - if (!(bundle & TILE_BUNDLE_Y_ENCODING_MASK)) { + if (!(bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK)) { /* two wide, check for control flow */ int opcode = get_Opcode_X1(bundle); @@ -508,7 +546,6 @@ void single_step_once(struct pt_regs *regs) } break; -#if CHIP_HAS_WH64() /* postincrement operations */ case IMM_0_OPCODE_X1: switch (get_ImmOpcodeExtension_X1(bundle)) { @@ -543,7 +580,6 @@ void single_step_once(struct pt_regs *regs) break; } break; -#endif /* CHIP_HAS_WH64() */ } if (state->update) { @@ -602,9 +638,9 @@ void single_step_once(struct pt_regs *regs) /* * Check if we need to rewrite an unaligned load/store. - * Returning zero is a special value meaning we need to SIGSEGV. + * Returning zero is a special value meaning we generated a signal. */ - if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) { + if (mem_op != MEMOP_NONE && align_ctl >= 0) { bundle = rewrite_load_store_unaligned(state, bundle, regs, mem_op, size, sign_ext); if (bundle == 0) @@ -643,9 +679,9 @@ void single_step_once(struct pt_regs *regs) } /* End with a jump back to the next instruction */ - delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) - + delta = ((regs->pc + TILEPRO_BUNDLE_SIZE_IN_BYTES) - (unsigned long)buffer) >> - TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES; + TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES; bundle = __single_step_j_insn; bundle |= create_JOffLong_X1(delta); err |= __put_user(bundle, buffer++); @@ -673,9 +709,6 @@ void single_step_once(struct pt_regs *regs) } #else -#include <linux/smp.h> -#include <linux/ptrace.h> -#include <arch/spr_def.h> static DEFINE_PER_CPU(unsigned long, ss_saved_pc); @@ -718,10 +751,10 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num) } else if ((*ss_pc != regs->pc) || (!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) { - ptrace_notify(SIGTRAP); control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); + send_sigtrap(current, regs); } } diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index a4293102ef8..01e8ab29f43 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -20,8 +20,13 @@ #include <linux/irq.h> #include <linux/module.h> #include <asm/cacheflush.h> +#include <asm/homecache.h> -HV_Topology smp_topology __write_once; +/* + * We write to width and height with a single store in head_NN.S, + * so make the variable aligned to "long". + */ +HV_Topology smp_topology __write_once __aligned(sizeof(long)); EXPORT_SYMBOL(smp_topology); #if CHIP_HAS_IPI() @@ -87,25 +92,6 @@ void send_IPI_allbutself(int tag) send_IPI_many(&mask, tag); } - -/* - * Provide smp_call_function_mask, but also run function locally - * if specified in the mask. - */ -void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *), - void *info, bool wait) -{ - int cpu = get_cpu(); - smp_call_function_many(mask, func, info, wait); - if (cpumask_test_cpu(cpu, mask)) { - local_irq_disable(); - func(info); - local_irq_enable(); - } - put_cpu(); -} - - /* * Functions related to starting/stopping cpus. */ @@ -119,10 +105,10 @@ static void smp_start_cpu_interrupt(void) /* Handler to stop the current cpu. */ static void smp_stop_cpu_interrupt(void) { - set_cpu_online(smp_processor_id(), 0); arch_local_irq_disable_all(); + set_cpu_online(smp_processor_id(), 0); for (;;) - asm("nap"); + asm("nap; nop"); } /* This function calls the 'stop' function on all other CPUs in the system. */ @@ -132,6 +118,12 @@ void smp_send_stop(void) send_IPI_allbutself(MSG_TAG_STOP_CPU); } +/* On panic, just wait; we may get an smp_send_stop() later on. */ +void panic_smp_self_stop(void) +{ + while (1) + asm("nap; nop"); +} /* * Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages. @@ -180,21 +172,24 @@ static void ipi_flush_icache_range(void *info) void flush_icache_range(unsigned long start, unsigned long end) { struct ipi_flush flush = { start, end }; - preempt_disable(); - on_each_cpu(ipi_flush_icache_range, &flush, 1); - preempt_enable(); + + /* If invoked with irqs disabled, we can not issue IPIs. */ + if (irqs_disabled()) + flush_remote(0, HV_FLUSH_EVICT_L1I, NULL, 0, 0, 0, + NULL, NULL, 0); + else { + preempt_disable(); + on_each_cpu(ipi_flush_icache_range, &flush, 1); + preempt_enable(); + } } /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */ static irqreturn_t handle_reschedule_ipi(int irq, void *token) { - /* - * Nothing to do here; when we return from interrupt, the - * rescheduling will occur there. But do bump the interrupt - * profiler count in the meantime. - */ __get_cpu_var(irq_stat).irq_resched_count++; + scheduler_ipi(); return IRQ_HANDLED; } @@ -220,7 +215,7 @@ void __init ipi_init(void) if (hv_get_ipi_pte(tile, KERNEL_PL, &pte) != 0) panic("Failed to initialize IPI for cpu %d\n", cpu); - offset = hv_pte_get_pfn(pte) << PAGE_SHIFT; + offset = PFN_PHYS(pte_pfn(pte)); ipi_mappings[cpu] = ioremap_prot(offset, PAGE_SIZE, pte); } #endif diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index b949edcec20..732e9d13866 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -133,22 +133,24 @@ static __init int reset_init_affinity(void) } late_initcall(reset_init_affinity); -static struct cpumask cpu_started __cpuinitdata; +static struct cpumask cpu_started; /* * Activate a secondary processor. Very minimal; don't add anything * to this path without knowing what you're doing, since SMP booting * is pretty fragile. */ -static void __cpuinit start_secondary(void) +static void start_secondary(void) { - int cpuid = smp_processor_id(); + int cpuid; + + preempt_disable(); + + cpuid = smp_processor_id(); /* Set our thread pointer appropriately. */ set_my_cpu_offset(__per_cpu_offset[cpuid]); - preempt_disable(); - /* * In large machines even this will slow us down, since we * will be contending for for the printk spinlock. @@ -183,7 +185,7 @@ static void __cpuinit start_secondary(void) /* * Bring a secondary processor online. */ -void __cpuinit online_secondary(void) +void online_secondary(void) { /* * low-memory mappings have been cleared, flush them from @@ -196,17 +198,9 @@ void __cpuinit online_secondary(void) /* This must be done before setting cpu_online_mask */ wmb(); - /* - * We need to hold call_lock, so there is no inconsistency - * between the time smp_call_function() determines number of - * IPI recipients, and the time when the determination is made - * for which cpus receive the IPI. Holding this - * lock helps us to not include this cpu in a currently in progress - * smp_call_function(). - */ - ipi_call_lock(); + notify_cpu_starting(smp_processor_id()); + set_cpu_online(smp_processor_id(), 1); - ipi_call_unlock(); __get_cpu_var(cpu_state) = CPU_ONLINE; /* Set up tile-specific state for this cpu. */ @@ -215,12 +209,10 @@ void __cpuinit online_secondary(void) /* Set up tile-timer clock-event device on this cpu */ setup_tile_timer(); - preempt_enable(); - - cpu_idle(); + cpu_startup_entry(CPUHP_ONLINE); } -int __cpuinit __cpu_up(unsigned int cpu) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { /* Wait 5s total for all CPUs for them to come online */ static int timeout; diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index dd81713a90d..c93977a6211 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -21,12 +21,16 @@ #include <linux/stacktrace.h> #include <linux/uaccess.h> #include <linux/mmzone.h> +#include <linux/dcache.h> +#include <linux/fs.h> +#include <linux/string.h> #include <asm/backtrace.h> #include <asm/page.h> -#include <asm/tlbflush.h> #include <asm/ucontext.h> +#include <asm/switch_to.h> #include <asm/sigframe.h> #include <asm/stack.h> +#include <asm/vdso.h> #include <arch/abi.h> #include <arch/interrupts.h> @@ -36,7 +40,7 @@ #define KBT_LOOP 3 /* Backtrace entered a loop */ /* Is address on the specified kernel stack? */ -static int in_kernel_stack(struct KBacktraceIterator *kbt, VirtualAddress sp) +static int in_kernel_stack(struct KBacktraceIterator *kbt, unsigned long sp) { ulong kstack_base = (ulong) kbt->task->stack; if (kstack_base == 0) /* corrupt task pointer; just follow stack... */ @@ -44,72 +48,23 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, VirtualAddress sp) return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; } -/* Is address valid for reading? */ -static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address) -{ - HV_PTE *l1_pgtable = kbt->pgtable; - HV_PTE *l2_pgtable; - unsigned long pfn; - HV_PTE pte; - struct page *page; - - if (l1_pgtable == NULL) - return 0; /* can't read user space in other tasks */ - -#ifdef CONFIG_64BIT - /* Find the real l1_pgtable by looking in the l0_pgtable. */ - pte = l1_pgtable[HV_L0_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("L0 huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - page = pfn_to_page(pfn); - BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */ - l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); -#endif - pte = l1_pgtable[HV_L1_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - - page = pfn_to_page(pfn); - if (PageHighMem(page)) { - pr_err("L2 page table not in LOWMEM (%#llx)\n", - HV_PFN_TO_CPA(pfn)); - return 0; - } - l2_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); - pte = l2_pgtable[HV_L2_INDEX(address)]; - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); -} - /* Callback for backtracer; basically a glorified memcpy */ -static bool read_memory_func(void *result, VirtualAddress address, +static bool read_memory_func(void *result, unsigned long address, unsigned int size, void *vkbt) { int retval; struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; + + if (address == 0) + return 0; if (__kernel_text_address(address)) { /* OK to read kernel code. */ } else if (address >= PAGE_OFFSET) { /* We only tolerate kernel-space reads of this task's stack */ if (!in_kernel_stack(kbt, address)) return 0; - } else if (!valid_address(kbt, address)) { - return 0; /* invalid user-space address */ + } else if (!kbt->is_current) { + return 0; /* can't read from other user address spaces */ } pagefault_disable(); retval = __copy_from_user_inatomic(result, @@ -124,9 +79,11 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) { const char *fault = NULL; /* happy compiler */ char fault_buf[64]; - VirtualAddress sp = kbt->it.sp; + unsigned long sp = kbt->it.sp; struct pt_regs *p; + if (sp % sizeof(long) != 0) + return NULL; if (!in_kernel_stack(kbt, sp)) return NULL; if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1)) @@ -147,9 +104,8 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) p->sp >= sp) { if (kbt->verbose) pr_err(" <%s while in kernel mode>\n", fault); - } else if (EX1_PL(p->ex1) == USER_PL && - p->pc < PAGE_OFFSET && - p->sp < PAGE_OFFSET) { + } else if (user_mode(p) && + p->sp < PAGE_OFFSET && p->sp != 0) { if (kbt->verbose) pr_err(" <%s while in user mode>\n", fault); } else if (kbt->verbose) { @@ -157,39 +113,39 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) p->pc, p->sp, p->ex1); p = NULL; } - if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0) + if (!kbt->profile || ((1ULL << p->faultnum) & QUEUED_INTERRUPTS) == 0) return p; return NULL; } /* Is the pc pointing to a sigreturn trampoline? */ -static int is_sigreturn(VirtualAddress pc) +static int is_sigreturn(unsigned long pc) { - return (pc == VDSO_BASE); + return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn)); } /* Return a pt_regs pointer for a valid signal handler frame */ -static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt) +static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, + struct rt_sigframe* kframe) { BacktraceIterator *b = &kbt->it; - if (b->pc == VDSO_BASE) { - struct rt_sigframe *frame; - unsigned long sigframe_top = - b->sp + sizeof(struct rt_sigframe) - 1; - if (!valid_address(kbt, b->sp) || - !valid_address(kbt, sigframe_top)) { - if (kbt->verbose) - pr_err(" (odd signal: sp %#lx?)\n", - (unsigned long)(b->sp)); + if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET && + b->sp % sizeof(long) == 0) { + int retval; + pagefault_disable(); + retval = __copy_from_user_inatomic( + kframe, (void __user __force *)b->sp, + sizeof(*kframe)); + pagefault_enable(); + if (retval != 0 || + (unsigned int)(kframe->info.si_signo) >= _NSIG) return NULL; - } - frame = (struct rt_sigframe *)b->sp; if (kbt->verbose) { pr_err(" <received signal %d>\n", - frame->info.si_signo); + kframe->info.si_signo); } - return (struct pt_regs *)&frame->uc.uc_mcontext; + return (struct pt_regs *)&kframe->uc.uc_mcontext; } return NULL; } @@ -202,10 +158,11 @@ static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt) static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt) { struct pt_regs *p; + struct rt_sigframe kframe; p = valid_fault_handler(kbt); if (p == NULL) - p = valid_sigframe(kbt); + p = valid_sigframe(kbt, &kframe); if (p == NULL) return 0; backtrace_init(&kbt->it, read_memory_func, kbt, @@ -239,67 +196,45 @@ static int KBacktraceIterator_next_item_inclusive( */ static void validate_stack(struct pt_regs *regs) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); unsigned long ksp0 = get_current_ksp0(); - unsigned long ksp0_base = ksp0 - THREAD_SIZE; + unsigned long ksp0_base = ksp0 & -THREAD_SIZE; unsigned long sp = stack_pointer; if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) { - pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n" + pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx underrun!\n" " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", - cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr); } else if (sp < ksp0_base + sizeof(struct thread_info)) { - pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n" + pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx overrun!\n" " sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n", - cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr); + cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr); } } void KBacktraceIterator_init(struct KBacktraceIterator *kbt, struct task_struct *t, struct pt_regs *regs) { - VirtualAddress pc, lr, sp, r52; + unsigned long pc, lr, sp, r52; int is_current; /* * Set up callback information. We grab the kernel stack base - * so we will allow reads of that address range, and if we're - * asking about the current process we grab the page table - * so we can check user accesses before trying to read them. - * We flush the TLB to avoid any weird skew issues. + * so we will allow reads of that address range. */ - is_current = (t == NULL); + is_current = (t == NULL || t == current); kbt->is_current = is_current; if (is_current) t = validate_current(); kbt->task = t; - kbt->pgtable = NULL; kbt->verbose = 0; /* override in caller if desired */ kbt->profile = 0; /* override in caller if desired */ kbt->end = KBT_ONGOING; - kbt->new_context = 0; - if (is_current) { - HV_PhysAddr pgdir_pa = hv_inquire_context().page_table; - if (pgdir_pa == (unsigned long)swapper_pg_dir - PAGE_OFFSET) { - /* - * Not just an optimization: this also allows - * this to work at all before va/pa mappings - * are set up. - */ - kbt->pgtable = swapper_pg_dir; - } else { - struct page *page = pfn_to_page(PFN_DOWN(pgdir_pa)); - if (!PageHighMem(page)) - kbt->pgtable = __va(pgdir_pa); - else - pr_err("page table not in LOWMEM" - " (%#llx)\n", pgdir_pa); - } - local_flush_tlb_all(); + kbt->new_context = 1; + if (is_current) validate_stack(regs); - } if (regs == NULL) { if (is_current || t->state == TASK_RUNNING) { @@ -331,7 +266,7 @@ EXPORT_SYMBOL(KBacktraceIterator_end); void KBacktraceIterator_next(struct KBacktraceIterator *kbt) { - VirtualAddress old_pc = kbt->it.pc, old_sp = kbt->it.sp; + unsigned long old_pc = kbt->it.pc, old_sp = kbt->it.sp; kbt->new_context = 0; if (!backtrace_next(&kbt->it) && !KBacktraceIterator_restart(kbt)) { kbt->end = KBT_DONE; @@ -345,6 +280,95 @@ void KBacktraceIterator_next(struct KBacktraceIterator *kbt) } EXPORT_SYMBOL(KBacktraceIterator_next); +static void describe_addr(struct KBacktraceIterator *kbt, + unsigned long address, + int have_mmap_sem, char *buf, size_t bufsize) +{ + struct vm_area_struct *vma; + size_t namelen, remaining; + unsigned long size, offset, adjust; + char *p, *modname; + const char *name; + int rc; + + /* + * Look one byte back for every caller frame (i.e. those that + * aren't a new context) so we look up symbol data for the + * call itself, not the following instruction, which may be on + * a different line (or in a different function). + */ + adjust = !kbt->new_context; + address -= adjust; + + if (address >= PAGE_OFFSET) { + /* Handle kernel symbols. */ + BUG_ON(bufsize < KSYM_NAME_LEN); + name = kallsyms_lookup(address, &size, &offset, + &modname, buf); + if (name == NULL) { + buf[0] = '\0'; + return; + } + namelen = strlen(buf); + remaining = (bufsize - 1) - namelen; + p = buf + namelen; + rc = snprintf(p, remaining, "+%#lx/%#lx ", + offset + adjust, size); + if (modname && rc < remaining) + snprintf(p + rc, remaining - rc, "[%s] ", modname); + buf[bufsize-1] = '\0'; + return; + } + + /* If we don't have the mmap_sem, we can't show any more info. */ + buf[0] = '\0'; + if (!have_mmap_sem) + return; + + /* Find vma info. */ + vma = find_vma(kbt->task->mm, address); + if (vma == NULL || address < vma->vm_start) { + snprintf(buf, bufsize, "[unmapped address] "); + return; + } + + if (vma->vm_file) { + p = d_path(&vma->vm_file->f_path, buf, bufsize); + if (IS_ERR(p)) + p = "?"; + name = kbasename(p); + } else { + name = "anon"; + } + + /* Generate a string description of the vma info. */ + namelen = strlen(name); + remaining = (bufsize - 1) - namelen; + memmove(buf, name, namelen); + snprintf(buf + namelen, remaining, "[%lx+%lx] ", + vma->vm_start, vma->vm_end - vma->vm_start); +} + +/* + * Avoid possible crash recursion during backtrace. If it happens, it + * makes it easy to lose the actual root cause of the failure, so we + * put a simple guard on all the backtrace loops. + */ +static bool start_backtrace(void) +{ + if (current->thread.in_backtrace) { + pr_err("Backtrace requested while in backtrace!\n"); + return false; + } + current->thread.in_backtrace = true; + return true; +} + +static void end_backtrace(void) +{ + current->thread.in_backtrace = false; +} + /* * This method wraps the backtracer's more generic support. * It is only invoked from the architecture-specific code; show_stack() @@ -353,7 +377,10 @@ EXPORT_SYMBOL(KBacktraceIterator_next); void tile_show_stack(struct KBacktraceIterator *kbt, int headers) { int i; + int have_mmap_sem = 0; + if (!start_backtrace()) + return; if (headers) { /* * Add a blank line since if we are called from panic(), @@ -364,36 +391,21 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) pr_err("Starting stack dump of tid %d, pid %d (%s)" " on cpu %d at cycle %lld\n", kbt->task->pid, kbt->task->tgid, kbt->task->comm, - smp_processor_id(), get_cycles()); + raw_smp_processor_id(), get_cycles()); } kbt->verbose = 1; i = 0; for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) { - char *modname; - const char *name; - unsigned long address = kbt->it.pc; - unsigned long offset, size; char namebuf[KSYM_NAME_LEN+100]; + unsigned long address = kbt->it.pc; - if (address >= PAGE_OFFSET) - name = kallsyms_lookup(address, &size, &offset, - &modname, namebuf); - else - name = NULL; - - if (!name) - namebuf[0] = '\0'; - else { - size_t namelen = strlen(namebuf); - size_t remaining = (sizeof(namebuf) - 1) - namelen; - char *p = namebuf + namelen; - int rc = snprintf(p, remaining, "+%#lx/%#lx ", - offset, size); - if (modname && rc < remaining) - snprintf(p + rc, remaining - rc, - "[%s] ", modname); - namebuf[sizeof(namebuf)-1] = '\0'; - } + /* Try to acquire the mmap_sem as we pass into userspace. */ + if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm) + have_mmap_sem = + down_read_trylock(&kbt->task->mm->mmap_sem); + + describe_addr(kbt, address, have_mmap_sem, + namebuf, sizeof(namebuf)); pr_err(" frame %d: 0x%lx %s(sp 0x%lx)\n", i++, address, namebuf, (unsigned long)(kbt->it.sp)); @@ -408,6 +420,9 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) pr_err("Stack dump stopped; next frame identical to this one\n"); if (headers) pr_err("Stack dump complete\n"); + if (have_mmap_sem) + up_read(&kbt->task->mm->mmap_sem); + end_backtrace(); } EXPORT_SYMBOL(tile_show_stack); @@ -448,7 +463,7 @@ void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc, regs_to_pt_regs(®s, pc, lr, sp, r52)); } -/* This is called only from kernel/sched.c, with esp == NULL */ +/* This is called only from kernel/sched/core.c, with esp == NULL */ void show_stack(struct task_struct *task, unsigned long *esp) { struct KBacktraceIterator kbt; @@ -469,6 +484,8 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace) int skip = trace->skip; int i = 0; + if (!start_backtrace()) + goto done; if (task == NULL || task == current) KBacktraceIterator_init_current(&kbt); else @@ -482,6 +499,8 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace) break; trace->entries[i++] = kbt.it.pc; } + end_backtrace(); +done: trace->nr_entries = i; } EXPORT_SYMBOL(save_stack_trace_tsk); @@ -490,6 +509,7 @@ void save_stack_trace(struct stack_trace *trace) { save_stack_trace_tsk(NULL, trace); } +EXPORT_SYMBOL_GPL(save_stack_trace); #endif diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c index e2187d24a9b..38debe70606 100644 --- a/arch/tile/kernel/sys.c +++ b/arch/tile/kernel/sys.c @@ -32,11 +32,19 @@ #include <asm/syscalls.h> #include <asm/pgtable.h> #include <asm/homecache.h> +#include <asm/cachectl.h> #include <arch/chip.h> -SYSCALL_DEFINE0(flush_cache) +SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len, + unsigned long, flags) { - homecache_evict(cpumask_of(smp_processor_id())); + /* DCACHE is not particularly effective if not bound to one cpu. */ + if (flags & DCACHE) + homecache_evict(cpumask_of(raw_smp_processor_id())); + + if (flags & ICACHE) + flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(current->mm), + 0, 0, 0, NULL, NULL, 0); return 0; } @@ -56,13 +64,6 @@ ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count) return sys_readahead(fd, ((loff_t)offset_hi << 32) | offset_lo, count); } -long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi, - u32 len, int advice) -{ - return sys_fadvise64_64(fd, ((loff_t)offset_hi << 32) | offset_lo, - len, advice); -} - int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi, u32 len_lo, u32 len_hi, int advice) { @@ -103,20 +104,14 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, #ifndef __tilegx__ /* See comments at the top of the file. */ -#define sys_fadvise64 sys32_fadvise64 #define sys_fadvise64_64 sys32_fadvise64_64 #define sys_readahead sys32_readahead -#define sys_sync_file_range sys_sync_file_range2 #endif -/* Call the trampolines to manage pt_regs where necessary. */ -#define sys_execve _sys_execve -#define sys_sigaltstack _sys_sigaltstack +/* Call the assembly trampolines where necessary. */ +#undef sys_rt_sigreturn #define sys_rt_sigreturn _sys_rt_sigreturn #define sys_clone _sys_clone -#ifndef __tilegx__ -#define sys_cmpxchg_badaddr _sys_cmpxchg_badaddr -#endif /* * Note that we can't include <linux/unistd.h> here since the header diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c new file mode 100644 index 00000000000..a3ed12f8f83 --- /dev/null +++ b/arch/tile/kernel/sysfs.c @@ -0,0 +1,269 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * /sys entry support. + */ + +#include <linux/device.h> +#include <linux/cpu.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/stat.h> +#include <hv/hypervisor.h> + +/* Return a string queried from the hypervisor, truncated to page size. */ +static ssize_t get_hv_confstr(char *page, int query) +{ + ssize_t n = hv_confstr(query, (unsigned long)page, PAGE_SIZE - 1); + n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1) - 1; + if (n) + page[n++] = '\n'; + page[n] = '\0'; + return n; +} + +static ssize_t chip_width_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_width); +} +static DEVICE_ATTR(chip_width, 0444, chip_width_show, NULL); + +static ssize_t chip_height_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_height); +} +static DEVICE_ATTR(chip_height, 0444, chip_height_show, NULL); + +static ssize_t chip_serial_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_SERIAL_NUM); +} +static DEVICE_ATTR(chip_serial, 0444, chip_serial_show, NULL); + +static ssize_t chip_revision_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_REV); +} +static DEVICE_ATTR(chip_revision, 0444, chip_revision_show, NULL); + + +static ssize_t type_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + return sprintf(page, "tilera\n"); +} +static DEVICE_ATTR(type, 0444, type_show, NULL); + +#define HV_CONF_ATTR(name, conf) \ + static ssize_t name ## _show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ + { \ + return get_hv_confstr(page, conf); \ + } \ + static DEVICE_ATTR(name, 0444, name ## _show, NULL); + +HV_CONF_ATTR(version, HV_CONFSTR_HV_SW_VER) +HV_CONF_ATTR(config_version, HV_CONFSTR_HV_CONFIG_VER) + +HV_CONF_ATTR(board_part, HV_CONFSTR_BOARD_PART_NUM) +HV_CONF_ATTR(board_serial, HV_CONFSTR_BOARD_SERIAL_NUM) +HV_CONF_ATTR(board_revision, HV_CONFSTR_BOARD_REV) +HV_CONF_ATTR(board_description, HV_CONFSTR_BOARD_DESC) +HV_CONF_ATTR(mezz_part, HV_CONFSTR_MEZZ_PART_NUM) +HV_CONF_ATTR(mezz_serial, HV_CONFSTR_MEZZ_SERIAL_NUM) +HV_CONF_ATTR(mezz_revision, HV_CONFSTR_MEZZ_REV) +HV_CONF_ATTR(mezz_description, HV_CONFSTR_MEZZ_DESC) +HV_CONF_ATTR(cpumod_part, HV_CONFSTR_CPUMOD_PART_NUM) +HV_CONF_ATTR(cpumod_serial, HV_CONFSTR_CPUMOD_SERIAL_NUM) +HV_CONF_ATTR(cpumod_revision, HV_CONFSTR_CPUMOD_REV) +HV_CONF_ATTR(cpumod_description,HV_CONFSTR_CPUMOD_DESC) +HV_CONF_ATTR(switch_control, HV_CONFSTR_SWITCH_CONTROL) + +static struct attribute *board_attrs[] = { + &dev_attr_board_part.attr, + &dev_attr_board_serial.attr, + &dev_attr_board_revision.attr, + &dev_attr_board_description.attr, + &dev_attr_mezz_part.attr, + &dev_attr_mezz_serial.attr, + &dev_attr_mezz_revision.attr, + &dev_attr_mezz_description.attr, + &dev_attr_cpumod_part.attr, + &dev_attr_cpumod_serial.attr, + &dev_attr_cpumod_revision.attr, + &dev_attr_cpumod_description.attr, + &dev_attr_switch_control.attr, + NULL +}; + +static struct attribute_group board_attr_group = { + .name = "board", + .attrs = board_attrs, +}; + + +static struct bin_attribute hvconfig_bin; + +static ssize_t +hvconfig_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + static size_t size; + + /* Lazily learn the true size (minus the trailing NUL). */ + if (size == 0) + size = hv_confstr(HV_CONFSTR_HV_CONFIG, 0, 0) - 1; + + /* Check and adjust input parameters. */ + if (off > size) + return -EINVAL; + if (count > size - off) + count = size - off; + + if (count) { + /* Get a copy of the hvc and copy out the relevant portion. */ + char *hvc; + + size = off + count; + hvc = kmalloc(size, GFP_KERNEL); + if (hvc == NULL) + return -ENOMEM; + hv_confstr(HV_CONFSTR_HV_CONFIG, (unsigned long)hvc, size); + memcpy(buf, hvc + off, count); + kfree(hvc); + } + + return count; +} + +static ssize_t hv_stats_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + int cpu = dev->id; + long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu)); + + ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS, + (unsigned long)page, PAGE_SIZE - 1, + lotar, 0); + n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1); + page[n] = '\0'; + return n; +} + +static ssize_t hv_stats_store(struct device *dev, + struct device_attribute *attr, + const char *page, + size_t count) +{ + int cpu = dev->id; + long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu)); + + ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS, 0, 0, lotar, 1); + return n < 0 ? n : count; +} + +static DEVICE_ATTR(hv_stats, 0644, hv_stats_show, hv_stats_store); + +static int hv_stats_device_add(struct device *dev, struct subsys_interface *sif) +{ + int err, cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + + err = sysfs_create_file(&dev->kobj, &dev_attr_hv_stats.attr); + + return err; +} + +static int hv_stats_device_remove(struct device *dev, + struct subsys_interface *sif) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + + sysfs_remove_file(&dev->kobj, &dev_attr_hv_stats.attr); + return 0; +} + + +static struct subsys_interface hv_stats_interface = { + .name = "hv_stats", + .subsys = &cpu_subsys, + .add_dev = hv_stats_device_add, + .remove_dev = hv_stats_device_remove, +}; + +static int __init create_sysfs_entries(void) +{ + int err = 0; + +#define create_cpu_attr(name) \ + if (!err) \ + err = device_create_file(cpu_subsys.dev_root, &dev_attr_##name); + create_cpu_attr(chip_width); + create_cpu_attr(chip_height); + create_cpu_attr(chip_serial); + create_cpu_attr(chip_revision); + +#define create_hv_attr(name) \ + if (!err) \ + err = sysfs_create_file(hypervisor_kobj, &dev_attr_##name.attr); + create_hv_attr(type); + create_hv_attr(version); + create_hv_attr(config_version); + + if (!err) + err = sysfs_create_group(hypervisor_kobj, &board_attr_group); + + if (!err) { + sysfs_bin_attr_init(&hvconfig_bin); + hvconfig_bin.attr.name = "hvconfig"; + hvconfig_bin.attr.mode = S_IRUGO; + hvconfig_bin.read = hvconfig_bin_read; + hvconfig_bin.size = PAGE_SIZE; + err = sysfs_create_bin_file(hypervisor_kobj, &hvconfig_bin); + } + + if (!err) { + /* + * Don't bother adding the hv_stats files on each CPU if + * our hypervisor doesn't supply statistics. + */ + int cpu = raw_smp_processor_id(); + long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu)); + char dummy; + ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS, + (unsigned long) &dummy, 1, + lotar, 0); + if (n >= 0) + err = subsys_interface_register(&hv_stats_interface); + } + + return err; +} +subsys_initcall(create_sysfs_entries); diff --git a/arch/tile/kernel/tile-desc_32.c b/arch/tile/kernel/tile-desc_32.c index 69af0e150f7..dd7bd1d8563 100644 --- a/arch/tile/kernel/tile-desc_32.c +++ b/arch/tile/kernel/tile-desc_32.c @@ -1,3 +1,23 @@ +/* TILEPro opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + /* This define is BFD_RELOC_##x for real bfd, or -1 for everyone else. */ #define BFD_RELOC(x) -1 @@ -6,1217 +26,1217 @@ #define TREG_SN 56 #define TREG_ZERO 63 -/* FIXME: Rename this. */ -#include <asm/opcode-tile.h> - #include <linux/stddef.h> +#include <asm/tile-desc.h> -const struct tile_opcode tile_opcodes[395] = +const struct tilepro_opcode tilepro_opcodes[395] = { - { "bpt", TILE_OPC_BPT, 0x2, 0, TREG_ZERO, 0, + { "bpt", TILEPRO_OPC_BPT, 0x2, 0, TREG_ZERO, 0, { { 0, }, { }, { 0, }, { 0, }, { 0, } }, }, - { "info", TILE_OPC_INFO, 0xf, 1, TREG_ZERO, 1, + { "info", TILEPRO_OPC_INFO, 0xf, 1, TREG_ZERO, 1, { { 0 }, { 1 }, { 2 }, { 3 }, { 0, } }, }, - { "infol", TILE_OPC_INFOL, 0x3, 1, TREG_ZERO, 1, + { "infol", TILEPRO_OPC_INFOL, 0x3, 1, TREG_ZERO, 1, { { 4 }, { 5 }, { 0, }, { 0, }, { 0, } }, }, - { "j", TILE_OPC_J, 0x2, 1, TREG_ZERO, 1, + { "j", TILEPRO_OPC_J, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 6 }, { 0, }, { 0, }, { 0, } }, }, - { "jal", TILE_OPC_JAL, 0x2, 1, TREG_LR, 1, + { "jal", TILEPRO_OPC_JAL, 0x2, 1, TREG_LR, 1, { { 0, }, { 6 }, { 0, }, { 0, }, { 0, } }, }, - { "move", TILE_OPC_MOVE, 0xf, 2, TREG_ZERO, 1, + { "move", TILEPRO_OPC_MOVE, 0xf, 2, TREG_ZERO, 1, { { 7, 8 }, { 9, 10 }, { 11, 12 }, { 13, 14 }, { 0, } }, }, - { "move.sn", TILE_OPC_MOVE_SN, 0x3, 2, TREG_SN, 1, + { "move.sn", TILEPRO_OPC_MOVE_SN, 0x3, 2, TREG_SN, 1, { { 7, 8 }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, }, - { "movei", TILE_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1, + { "movei", TILEPRO_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1, { { 7, 0 }, { 9, 1 }, { 11, 2 }, { 13, 3 }, { 0, } }, }, - { "movei.sn", TILE_OPC_MOVEI_SN, 0x3, 2, TREG_SN, 1, + { "movei.sn", TILEPRO_OPC_MOVEI_SN, 0x3, 2, TREG_SN, 1, { { 7, 0 }, { 9, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "moveli", TILE_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1, + { "moveli", TILEPRO_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1, { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } }, }, - { "moveli.sn", TILE_OPC_MOVELI_SN, 0x3, 2, TREG_SN, 1, + { "moveli.sn", TILEPRO_OPC_MOVELI_SN, 0x3, 2, TREG_SN, 1, { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } }, }, - { "movelis", TILE_OPC_MOVELIS, 0x3, 2, TREG_SN, 1, + { "movelis", TILEPRO_OPC_MOVELIS, 0x3, 2, TREG_SN, 1, { { 7, 4 }, { 9, 5 }, { 0, }, { 0, }, { 0, } }, }, - { "prefetch", TILE_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1, + { "prefetch", TILEPRO_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1, { { 0, }, { 10 }, { 0, }, { 0, }, { 15 } }, }, - { "raise", TILE_OPC_RAISE, 0x2, 0, TREG_ZERO, 1, + { "raise", TILEPRO_OPC_RAISE, 0x2, 0, TREG_ZERO, 1, { { 0, }, { }, { 0, }, { 0, }, { 0, } }, }, - { "add", TILE_OPC_ADD, 0xf, 3, TREG_ZERO, 1, + { "add", TILEPRO_OPC_ADD, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "add.sn", TILE_OPC_ADD_SN, 0x3, 3, TREG_SN, 1, + { "add.sn", TILEPRO_OPC_ADD_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "addb", TILE_OPC_ADDB, 0x3, 3, TREG_ZERO, 1, + { "addb", TILEPRO_OPC_ADDB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "addb.sn", TILE_OPC_ADDB_SN, 0x3, 3, TREG_SN, 1, + { "addb.sn", TILEPRO_OPC_ADDB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "addbs_u", TILE_OPC_ADDBS_U, 0x3, 3, TREG_ZERO, 1, + { "addbs_u", TILEPRO_OPC_ADDBS_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "addbs_u.sn", TILE_OPC_ADDBS_U_SN, 0x3, 3, TREG_SN, 1, + { "addbs_u.sn", TILEPRO_OPC_ADDBS_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "addh", TILE_OPC_ADDH, 0x3, 3, TREG_ZERO, 1, + { "addh", TILEPRO_OPC_ADDH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "addh.sn", TILE_OPC_ADDH_SN, 0x3, 3, TREG_SN, 1, + { "addh.sn", TILEPRO_OPC_ADDH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "addhs", TILE_OPC_ADDHS, 0x3, 3, TREG_ZERO, 1, + { "addhs", TILEPRO_OPC_ADDHS, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "addhs.sn", TILE_OPC_ADDHS_SN, 0x3, 3, TREG_SN, 1, + { "addhs.sn", TILEPRO_OPC_ADDHS_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "addi", TILE_OPC_ADDI, 0xf, 3, TREG_ZERO, 1, + { "addi", TILEPRO_OPC_ADDI, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, }, - { "addi.sn", TILE_OPC_ADDI_SN, 0x3, 3, TREG_SN, 1, + { "addi.sn", TILEPRO_OPC_ADDI_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "addib", TILE_OPC_ADDIB, 0x3, 3, TREG_ZERO, 1, + { "addib", TILEPRO_OPC_ADDIB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "addib.sn", TILE_OPC_ADDIB_SN, 0x3, 3, TREG_SN, 1, + { "addib.sn", TILEPRO_OPC_ADDIB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "addih", TILE_OPC_ADDIH, 0x3, 3, TREG_ZERO, 1, + { "addih", TILEPRO_OPC_ADDIH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "addih.sn", TILE_OPC_ADDIH_SN, 0x3, 3, TREG_SN, 1, + { "addih.sn", TILEPRO_OPC_ADDIH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "addli", TILE_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1, + { "addli", TILEPRO_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, }, - { "addli.sn", TILE_OPC_ADDLI_SN, 0x3, 3, TREG_SN, 1, + { "addli.sn", TILEPRO_OPC_ADDLI_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, }, - { "addlis", TILE_OPC_ADDLIS, 0x3, 3, TREG_SN, 1, + { "addlis", TILEPRO_OPC_ADDLIS, 0x3, 3, TREG_SN, 1, { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, }, - { "adds", TILE_OPC_ADDS, 0x3, 3, TREG_ZERO, 1, + { "adds", TILEPRO_OPC_ADDS, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "adds.sn", TILE_OPC_ADDS_SN, 0x3, 3, TREG_SN, 1, + { "adds.sn", TILEPRO_OPC_ADDS_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "adiffb_u", TILE_OPC_ADIFFB_U, 0x1, 3, TREG_ZERO, 1, + { "adiffb_u", TILEPRO_OPC_ADIFFB_U, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "adiffb_u.sn", TILE_OPC_ADIFFB_U_SN, 0x1, 3, TREG_SN, 1, + { "adiffb_u.sn", TILEPRO_OPC_ADIFFB_U_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "adiffh", TILE_OPC_ADIFFH, 0x1, 3, TREG_ZERO, 1, + { "adiffh", TILEPRO_OPC_ADIFFH, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "adiffh.sn", TILE_OPC_ADIFFH_SN, 0x1, 3, TREG_SN, 1, + { "adiffh.sn", TILEPRO_OPC_ADIFFH_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "and", TILE_OPC_AND, 0xf, 3, TREG_ZERO, 1, + { "and", TILEPRO_OPC_AND, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "and.sn", TILE_OPC_AND_SN, 0x3, 3, TREG_SN, 1, + { "and.sn", TILEPRO_OPC_AND_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "andi", TILE_OPC_ANDI, 0xf, 3, TREG_ZERO, 1, + { "andi", TILEPRO_OPC_ANDI, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, }, - { "andi.sn", TILE_OPC_ANDI_SN, 0x3, 3, TREG_SN, 1, + { "andi.sn", TILEPRO_OPC_ANDI_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "auli", TILE_OPC_AULI, 0x3, 3, TREG_ZERO, 1, + { "auli", TILEPRO_OPC_AULI, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 4 }, { 9, 10, 5 }, { 0, }, { 0, }, { 0, } }, }, - { "avgb_u", TILE_OPC_AVGB_U, 0x1, 3, TREG_ZERO, 1, + { "avgb_u", TILEPRO_OPC_AVGB_U, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "avgb_u.sn", TILE_OPC_AVGB_U_SN, 0x1, 3, TREG_SN, 1, + { "avgb_u.sn", TILEPRO_OPC_AVGB_U_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "avgh", TILE_OPC_AVGH, 0x1, 3, TREG_ZERO, 1, + { "avgh", TILEPRO_OPC_AVGH, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "avgh.sn", TILE_OPC_AVGH_SN, 0x1, 3, TREG_SN, 1, + { "avgh.sn", TILEPRO_OPC_AVGH_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "bbns", TILE_OPC_BBNS, 0x2, 2, TREG_ZERO, 1, + { "bbns", TILEPRO_OPC_BBNS, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bbns.sn", TILE_OPC_BBNS_SN, 0x2, 2, TREG_SN, 1, + { "bbns.sn", TILEPRO_OPC_BBNS_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bbnst", TILE_OPC_BBNST, 0x2, 2, TREG_ZERO, 1, + { "bbnst", TILEPRO_OPC_BBNST, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bbnst.sn", TILE_OPC_BBNST_SN, 0x2, 2, TREG_SN, 1, + { "bbnst.sn", TILEPRO_OPC_BBNST_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bbs", TILE_OPC_BBS, 0x2, 2, TREG_ZERO, 1, + { "bbs", TILEPRO_OPC_BBS, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bbs.sn", TILE_OPC_BBS_SN, 0x2, 2, TREG_SN, 1, + { "bbs.sn", TILEPRO_OPC_BBS_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bbst", TILE_OPC_BBST, 0x2, 2, TREG_ZERO, 1, + { "bbst", TILEPRO_OPC_BBST, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bbst.sn", TILE_OPC_BBST_SN, 0x2, 2, TREG_SN, 1, + { "bbst.sn", TILEPRO_OPC_BBST_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bgez", TILE_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1, + { "bgez", TILEPRO_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bgez.sn", TILE_OPC_BGEZ_SN, 0x2, 2, TREG_SN, 1, + { "bgez.sn", TILEPRO_OPC_BGEZ_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bgezt", TILE_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1, + { "bgezt", TILEPRO_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bgezt.sn", TILE_OPC_BGEZT_SN, 0x2, 2, TREG_SN, 1, + { "bgezt.sn", TILEPRO_OPC_BGEZT_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bgz", TILE_OPC_BGZ, 0x2, 2, TREG_ZERO, 1, + { "bgz", TILEPRO_OPC_BGZ, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bgz.sn", TILE_OPC_BGZ_SN, 0x2, 2, TREG_SN, 1, + { "bgz.sn", TILEPRO_OPC_BGZ_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bgzt", TILE_OPC_BGZT, 0x2, 2, TREG_ZERO, 1, + { "bgzt", TILEPRO_OPC_BGZT, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bgzt.sn", TILE_OPC_BGZT_SN, 0x2, 2, TREG_SN, 1, + { "bgzt.sn", TILEPRO_OPC_BGZT_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bitx", TILE_OPC_BITX, 0x5, 2, TREG_ZERO, 1, + { "bitx", TILEPRO_OPC_BITX, 0x5, 2, TREG_ZERO, 1, { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, }, - { "bitx.sn", TILE_OPC_BITX_SN, 0x1, 2, TREG_SN, 1, + { "bitx.sn", TILEPRO_OPC_BITX_SN, 0x1, 2, TREG_SN, 1, { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "blez", TILE_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1, + { "blez", TILEPRO_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "blez.sn", TILE_OPC_BLEZ_SN, 0x2, 2, TREG_SN, 1, + { "blez.sn", TILEPRO_OPC_BLEZ_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "blezt", TILE_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1, + { "blezt", TILEPRO_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "blezt.sn", TILE_OPC_BLEZT_SN, 0x2, 2, TREG_SN, 1, + { "blezt.sn", TILEPRO_OPC_BLEZT_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "blz", TILE_OPC_BLZ, 0x2, 2, TREG_ZERO, 1, + { "blz", TILEPRO_OPC_BLZ, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "blz.sn", TILE_OPC_BLZ_SN, 0x2, 2, TREG_SN, 1, + { "blz.sn", TILEPRO_OPC_BLZ_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "blzt", TILE_OPC_BLZT, 0x2, 2, TREG_ZERO, 1, + { "blzt", TILEPRO_OPC_BLZT, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "blzt.sn", TILE_OPC_BLZT_SN, 0x2, 2, TREG_SN, 1, + { "blzt.sn", TILEPRO_OPC_BLZT_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bnz", TILE_OPC_BNZ, 0x2, 2, TREG_ZERO, 1, + { "bnz", TILEPRO_OPC_BNZ, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bnz.sn", TILE_OPC_BNZ_SN, 0x2, 2, TREG_SN, 1, + { "bnz.sn", TILEPRO_OPC_BNZ_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bnzt", TILE_OPC_BNZT, 0x2, 2, TREG_ZERO, 1, + { "bnzt", TILEPRO_OPC_BNZT, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bnzt.sn", TILE_OPC_BNZT_SN, 0x2, 2, TREG_SN, 1, + { "bnzt.sn", TILEPRO_OPC_BNZT_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bytex", TILE_OPC_BYTEX, 0x5, 2, TREG_ZERO, 1, + { "bytex", TILEPRO_OPC_BYTEX, 0x5, 2, TREG_ZERO, 1, { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, }, - { "bytex.sn", TILE_OPC_BYTEX_SN, 0x1, 2, TREG_SN, 1, + { "bytex.sn", TILEPRO_OPC_BYTEX_SN, 0x1, 2, TREG_SN, 1, { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "bz", TILE_OPC_BZ, 0x2, 2, TREG_ZERO, 1, + { "bz", TILEPRO_OPC_BZ, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bz.sn", TILE_OPC_BZ_SN, 0x2, 2, TREG_SN, 1, + { "bz.sn", TILEPRO_OPC_BZ_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bzt", TILE_OPC_BZT, 0x2, 2, TREG_ZERO, 1, + { "bzt", TILEPRO_OPC_BZT, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "bzt.sn", TILE_OPC_BZT_SN, 0x2, 2, TREG_SN, 1, + { "bzt.sn", TILEPRO_OPC_BZT_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 10, 20 }, { 0, }, { 0, }, { 0, } }, }, - { "clz", TILE_OPC_CLZ, 0x5, 2, TREG_ZERO, 1, + { "clz", TILEPRO_OPC_CLZ, 0x5, 2, TREG_ZERO, 1, { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, }, - { "clz.sn", TILE_OPC_CLZ_SN, 0x1, 2, TREG_SN, 1, + { "clz.sn", TILEPRO_OPC_CLZ_SN, 0x1, 2, TREG_SN, 1, { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "crc32_32", TILE_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1, + { "crc32_32", TILEPRO_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "crc32_32.sn", TILE_OPC_CRC32_32_SN, 0x1, 3, TREG_SN, 1, + { "crc32_32.sn", TILEPRO_OPC_CRC32_32_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "crc32_8", TILE_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1, + { "crc32_8", TILEPRO_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "crc32_8.sn", TILE_OPC_CRC32_8_SN, 0x1, 3, TREG_SN, 1, + { "crc32_8.sn", TILEPRO_OPC_CRC32_8_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "ctz", TILE_OPC_CTZ, 0x5, 2, TREG_ZERO, 1, + { "ctz", TILEPRO_OPC_CTZ, 0x5, 2, TREG_ZERO, 1, { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, }, - { "ctz.sn", TILE_OPC_CTZ_SN, 0x1, 2, TREG_SN, 1, + { "ctz.sn", TILEPRO_OPC_CTZ_SN, 0x1, 2, TREG_SN, 1, { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "drain", TILE_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0, + { "drain", TILEPRO_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0, { { 0, }, { }, { 0, }, { 0, }, { 0, } }, }, - { "dtlbpr", TILE_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1, + { "dtlbpr", TILEPRO_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, }, - { "dword_align", TILE_OPC_DWORD_ALIGN, 0x1, 3, TREG_ZERO, 1, + { "dword_align", TILEPRO_OPC_DWORD_ALIGN, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "dword_align.sn", TILE_OPC_DWORD_ALIGN_SN, 0x1, 3, TREG_SN, 1, + { "dword_align.sn", TILEPRO_OPC_DWORD_ALIGN_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "finv", TILE_OPC_FINV, 0x2, 1, TREG_ZERO, 1, + { "finv", TILEPRO_OPC_FINV, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, }, - { "flush", TILE_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1, + { "flush", TILEPRO_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, }, - { "fnop", TILE_OPC_FNOP, 0xf, 0, TREG_ZERO, 1, + { "fnop", TILEPRO_OPC_FNOP, 0xf, 0, TREG_ZERO, 1, { { }, { }, { }, { }, { 0, } }, }, - { "icoh", TILE_OPC_ICOH, 0x2, 1, TREG_ZERO, 1, + { "icoh", TILEPRO_OPC_ICOH, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, }, - { "ill", TILE_OPC_ILL, 0xa, 0, TREG_ZERO, 1, + { "ill", TILEPRO_OPC_ILL, 0xa, 0, TREG_ZERO, 1, { { 0, }, { }, { 0, }, { }, { 0, } }, }, - { "inthb", TILE_OPC_INTHB, 0x3, 3, TREG_ZERO, 1, + { "inthb", TILEPRO_OPC_INTHB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "inthb.sn", TILE_OPC_INTHB_SN, 0x3, 3, TREG_SN, 1, + { "inthb.sn", TILEPRO_OPC_INTHB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "inthh", TILE_OPC_INTHH, 0x3, 3, TREG_ZERO, 1, + { "inthh", TILEPRO_OPC_INTHH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "inthh.sn", TILE_OPC_INTHH_SN, 0x3, 3, TREG_SN, 1, + { "inthh.sn", TILEPRO_OPC_INTHH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "intlb", TILE_OPC_INTLB, 0x3, 3, TREG_ZERO, 1, + { "intlb", TILEPRO_OPC_INTLB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "intlb.sn", TILE_OPC_INTLB_SN, 0x3, 3, TREG_SN, 1, + { "intlb.sn", TILEPRO_OPC_INTLB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "intlh", TILE_OPC_INTLH, 0x3, 3, TREG_ZERO, 1, + { "intlh", TILEPRO_OPC_INTLH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "intlh.sn", TILE_OPC_INTLH_SN, 0x3, 3, TREG_SN, 1, + { "intlh.sn", TILEPRO_OPC_INTLH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "inv", TILE_OPC_INV, 0x2, 1, TREG_ZERO, 1, + { "inv", TILEPRO_OPC_INV, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, }, - { "iret", TILE_OPC_IRET, 0x2, 0, TREG_ZERO, 1, + { "iret", TILEPRO_OPC_IRET, 0x2, 0, TREG_ZERO, 1, { { 0, }, { }, { 0, }, { 0, }, { 0, } }, }, - { "jalb", TILE_OPC_JALB, 0x2, 1, TREG_LR, 1, + { "jalb", TILEPRO_OPC_JALB, 0x2, 1, TREG_LR, 1, { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, }, - { "jalf", TILE_OPC_JALF, 0x2, 1, TREG_LR, 1, + { "jalf", TILEPRO_OPC_JALF, 0x2, 1, TREG_LR, 1, { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, }, - { "jalr", TILE_OPC_JALR, 0x2, 1, TREG_LR, 1, + { "jalr", TILEPRO_OPC_JALR, 0x2, 1, TREG_LR, 1, { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, }, - { "jalrp", TILE_OPC_JALRP, 0x2, 1, TREG_LR, 1, + { "jalrp", TILEPRO_OPC_JALRP, 0x2, 1, TREG_LR, 1, { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, }, - { "jb", TILE_OPC_JB, 0x2, 1, TREG_ZERO, 1, + { "jb", TILEPRO_OPC_JB, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, }, - { "jf", TILE_OPC_JF, 0x2, 1, TREG_ZERO, 1, + { "jf", TILEPRO_OPC_JF, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 22 }, { 0, }, { 0, }, { 0, } }, }, - { "jr", TILE_OPC_JR, 0x2, 1, TREG_ZERO, 1, + { "jr", TILEPRO_OPC_JR, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, }, - { "jrp", TILE_OPC_JRP, 0x2, 1, TREG_ZERO, 1, + { "jrp", TILEPRO_OPC_JRP, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, }, - { "lb", TILE_OPC_LB, 0x12, 2, TREG_ZERO, 1, + { "lb", TILEPRO_OPC_LB, 0x12, 2, TREG_ZERO, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, }, - { "lb.sn", TILE_OPC_LB_SN, 0x2, 2, TREG_SN, 1, + { "lb.sn", TILEPRO_OPC_LB_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, }, - { "lb_u", TILE_OPC_LB_U, 0x12, 2, TREG_ZERO, 1, + { "lb_u", TILEPRO_OPC_LB_U, 0x12, 2, TREG_ZERO, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, }, - { "lb_u.sn", TILE_OPC_LB_U_SN, 0x2, 2, TREG_SN, 1, + { "lb_u.sn", TILEPRO_OPC_LB_U_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, }, - { "lbadd", TILE_OPC_LBADD, 0x2, 3, TREG_ZERO, 1, + { "lbadd", TILEPRO_OPC_LBADD, 0x2, 3, TREG_ZERO, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "lbadd.sn", TILE_OPC_LBADD_SN, 0x2, 3, TREG_SN, 1, + { "lbadd.sn", TILEPRO_OPC_LBADD_SN, 0x2, 3, TREG_SN, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "lbadd_u", TILE_OPC_LBADD_U, 0x2, 3, TREG_ZERO, 1, + { "lbadd_u", TILEPRO_OPC_LBADD_U, 0x2, 3, TREG_ZERO, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "lbadd_u.sn", TILE_OPC_LBADD_U_SN, 0x2, 3, TREG_SN, 1, + { "lbadd_u.sn", TILEPRO_OPC_LBADD_U_SN, 0x2, 3, TREG_SN, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "lh", TILE_OPC_LH, 0x12, 2, TREG_ZERO, 1, + { "lh", TILEPRO_OPC_LH, 0x12, 2, TREG_ZERO, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, }, - { "lh.sn", TILE_OPC_LH_SN, 0x2, 2, TREG_SN, 1, + { "lh.sn", TILEPRO_OPC_LH_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, }, - { "lh_u", TILE_OPC_LH_U, 0x12, 2, TREG_ZERO, 1, + { "lh_u", TILEPRO_OPC_LH_U, 0x12, 2, TREG_ZERO, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, }, - { "lh_u.sn", TILE_OPC_LH_U_SN, 0x2, 2, TREG_SN, 1, + { "lh_u.sn", TILEPRO_OPC_LH_U_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, }, - { "lhadd", TILE_OPC_LHADD, 0x2, 3, TREG_ZERO, 1, + { "lhadd", TILEPRO_OPC_LHADD, 0x2, 3, TREG_ZERO, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "lhadd.sn", TILE_OPC_LHADD_SN, 0x2, 3, TREG_SN, 1, + { "lhadd.sn", TILEPRO_OPC_LHADD_SN, 0x2, 3, TREG_SN, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "lhadd_u", TILE_OPC_LHADD_U, 0x2, 3, TREG_ZERO, 1, + { "lhadd_u", TILEPRO_OPC_LHADD_U, 0x2, 3, TREG_ZERO, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "lhadd_u.sn", TILE_OPC_LHADD_U_SN, 0x2, 3, TREG_SN, 1, + { "lhadd_u.sn", TILEPRO_OPC_LHADD_U_SN, 0x2, 3, TREG_SN, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "lnk", TILE_OPC_LNK, 0x2, 1, TREG_ZERO, 1, + { "lnk", TILEPRO_OPC_LNK, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, }, - { "lnk.sn", TILE_OPC_LNK_SN, 0x2, 1, TREG_SN, 1, + { "lnk.sn", TILEPRO_OPC_LNK_SN, 0x2, 1, TREG_SN, 1, { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, }, - { "lw", TILE_OPC_LW, 0x12, 2, TREG_ZERO, 1, + { "lw", TILEPRO_OPC_LW, 0x12, 2, TREG_ZERO, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 23, 15 } }, }, - { "lw.sn", TILE_OPC_LW_SN, 0x2, 2, TREG_SN, 1, + { "lw.sn", TILEPRO_OPC_LW_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, }, - { "lw_na", TILE_OPC_LW_NA, 0x2, 2, TREG_ZERO, 1, + { "lw_na", TILEPRO_OPC_LW_NA, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, }, - { "lw_na.sn", TILE_OPC_LW_NA_SN, 0x2, 2, TREG_SN, 1, + { "lw_na.sn", TILEPRO_OPC_LW_NA_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, }, - { "lwadd", TILE_OPC_LWADD, 0x2, 3, TREG_ZERO, 1, + { "lwadd", TILEPRO_OPC_LWADD, 0x2, 3, TREG_ZERO, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "lwadd.sn", TILE_OPC_LWADD_SN, 0x2, 3, TREG_SN, 1, + { "lwadd.sn", TILEPRO_OPC_LWADD_SN, 0x2, 3, TREG_SN, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "lwadd_na", TILE_OPC_LWADD_NA, 0x2, 3, TREG_ZERO, 1, + { "lwadd_na", TILEPRO_OPC_LWADD_NA, 0x2, 3, TREG_ZERO, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "lwadd_na.sn", TILE_OPC_LWADD_NA_SN, 0x2, 3, TREG_SN, 1, + { "lwadd_na.sn", TILEPRO_OPC_LWADD_NA_SN, 0x2, 3, TREG_SN, 1, { { 0, }, { 9, 24, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "maxb_u", TILE_OPC_MAXB_U, 0x3, 3, TREG_ZERO, 1, + { "maxb_u", TILEPRO_OPC_MAXB_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "maxb_u.sn", TILE_OPC_MAXB_U_SN, 0x3, 3, TREG_SN, 1, + { "maxb_u.sn", TILEPRO_OPC_MAXB_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "maxh", TILE_OPC_MAXH, 0x3, 3, TREG_ZERO, 1, + { "maxh", TILEPRO_OPC_MAXH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "maxh.sn", TILE_OPC_MAXH_SN, 0x3, 3, TREG_SN, 1, + { "maxh.sn", TILEPRO_OPC_MAXH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "maxib_u", TILE_OPC_MAXIB_U, 0x3, 3, TREG_ZERO, 1, + { "maxib_u", TILEPRO_OPC_MAXIB_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "maxib_u.sn", TILE_OPC_MAXIB_U_SN, 0x3, 3, TREG_SN, 1, + { "maxib_u.sn", TILEPRO_OPC_MAXIB_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "maxih", TILE_OPC_MAXIH, 0x3, 3, TREG_ZERO, 1, + { "maxih", TILEPRO_OPC_MAXIH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "maxih.sn", TILE_OPC_MAXIH_SN, 0x3, 3, TREG_SN, 1, + { "maxih.sn", TILEPRO_OPC_MAXIH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "mf", TILE_OPC_MF, 0x2, 0, TREG_ZERO, 1, + { "mf", TILEPRO_OPC_MF, 0x2, 0, TREG_ZERO, 1, { { 0, }, { }, { 0, }, { 0, }, { 0, } }, }, - { "mfspr", TILE_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1, + { "mfspr", TILEPRO_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 9, 25 }, { 0, }, { 0, }, { 0, } }, }, - { "minb_u", TILE_OPC_MINB_U, 0x3, 3, TREG_ZERO, 1, + { "minb_u", TILEPRO_OPC_MINB_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "minb_u.sn", TILE_OPC_MINB_U_SN, 0x3, 3, TREG_SN, 1, + { "minb_u.sn", TILEPRO_OPC_MINB_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "minh", TILE_OPC_MINH, 0x3, 3, TREG_ZERO, 1, + { "minh", TILEPRO_OPC_MINH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "minh.sn", TILE_OPC_MINH_SN, 0x3, 3, TREG_SN, 1, + { "minh.sn", TILEPRO_OPC_MINH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "minib_u", TILE_OPC_MINIB_U, 0x3, 3, TREG_ZERO, 1, + { "minib_u", TILEPRO_OPC_MINIB_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "minib_u.sn", TILE_OPC_MINIB_U_SN, 0x3, 3, TREG_SN, 1, + { "minib_u.sn", TILEPRO_OPC_MINIB_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "minih", TILE_OPC_MINIH, 0x3, 3, TREG_ZERO, 1, + { "minih", TILEPRO_OPC_MINIH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "minih.sn", TILE_OPC_MINIH_SN, 0x3, 3, TREG_SN, 1, + { "minih.sn", TILEPRO_OPC_MINIH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "mm", TILE_OPC_MM, 0x3, 5, TREG_ZERO, 1, + { "mm", TILEPRO_OPC_MM, 0x3, 5, TREG_ZERO, 1, { { 7, 8, 16, 26, 27 }, { 9, 10, 17, 28, 29 }, { 0, }, { 0, }, { 0, } }, }, - { "mnz", TILE_OPC_MNZ, 0xf, 3, TREG_ZERO, 1, + { "mnz", TILEPRO_OPC_MNZ, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "mnz.sn", TILE_OPC_MNZ_SN, 0x3, 3, TREG_SN, 1, + { "mnz.sn", TILEPRO_OPC_MNZ_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "mnzb", TILE_OPC_MNZB, 0x3, 3, TREG_ZERO, 1, + { "mnzb", TILEPRO_OPC_MNZB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "mnzb.sn", TILE_OPC_MNZB_SN, 0x3, 3, TREG_SN, 1, + { "mnzb.sn", TILEPRO_OPC_MNZB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "mnzh", TILE_OPC_MNZH, 0x3, 3, TREG_ZERO, 1, + { "mnzh", TILEPRO_OPC_MNZH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "mnzh.sn", TILE_OPC_MNZH_SN, 0x3, 3, TREG_SN, 1, + { "mnzh.sn", TILEPRO_OPC_MNZH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "mtspr", TILE_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1, + { "mtspr", TILEPRO_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 30, 10 }, { 0, }, { 0, }, { 0, } }, }, - { "mulhh_ss", TILE_OPC_MULHH_SS, 0x5, 3, TREG_ZERO, 1, + { "mulhh_ss", TILEPRO_OPC_MULHH_SS, 0x5, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, }, - { "mulhh_ss.sn", TILE_OPC_MULHH_SS_SN, 0x1, 3, TREG_SN, 1, + { "mulhh_ss.sn", TILEPRO_OPC_MULHH_SS_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhh_su", TILE_OPC_MULHH_SU, 0x1, 3, TREG_ZERO, 1, + { "mulhh_su", TILEPRO_OPC_MULHH_SU, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhh_su.sn", TILE_OPC_MULHH_SU_SN, 0x1, 3, TREG_SN, 1, + { "mulhh_su.sn", TILEPRO_OPC_MULHH_SU_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhh_uu", TILE_OPC_MULHH_UU, 0x5, 3, TREG_ZERO, 1, + { "mulhh_uu", TILEPRO_OPC_MULHH_UU, 0x5, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, }, - { "mulhh_uu.sn", TILE_OPC_MULHH_UU_SN, 0x1, 3, TREG_SN, 1, + { "mulhh_uu.sn", TILEPRO_OPC_MULHH_UU_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhha_ss", TILE_OPC_MULHHA_SS, 0x5, 3, TREG_ZERO, 1, + { "mulhha_ss", TILEPRO_OPC_MULHHA_SS, 0x5, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, }, - { "mulhha_ss.sn", TILE_OPC_MULHHA_SS_SN, 0x1, 3, TREG_SN, 1, + { "mulhha_ss.sn", TILEPRO_OPC_MULHHA_SS_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhha_su", TILE_OPC_MULHHA_SU, 0x1, 3, TREG_ZERO, 1, + { "mulhha_su", TILEPRO_OPC_MULHHA_SU, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhha_su.sn", TILE_OPC_MULHHA_SU_SN, 0x1, 3, TREG_SN, 1, + { "mulhha_su.sn", TILEPRO_OPC_MULHHA_SU_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhha_uu", TILE_OPC_MULHHA_UU, 0x5, 3, TREG_ZERO, 1, + { "mulhha_uu", TILEPRO_OPC_MULHHA_UU, 0x5, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, }, - { "mulhha_uu.sn", TILE_OPC_MULHHA_UU_SN, 0x1, 3, TREG_SN, 1, + { "mulhha_uu.sn", TILEPRO_OPC_MULHHA_UU_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhhsa_uu", TILE_OPC_MULHHSA_UU, 0x1, 3, TREG_ZERO, 1, + { "mulhhsa_uu", TILEPRO_OPC_MULHHSA_UU, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhhsa_uu.sn", TILE_OPC_MULHHSA_UU_SN, 0x1, 3, TREG_SN, 1, + { "mulhhsa_uu.sn", TILEPRO_OPC_MULHHSA_UU_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhl_ss", TILE_OPC_MULHL_SS, 0x1, 3, TREG_ZERO, 1, + { "mulhl_ss", TILEPRO_OPC_MULHL_SS, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhl_ss.sn", TILE_OPC_MULHL_SS_SN, 0x1, 3, TREG_SN, 1, + { "mulhl_ss.sn", TILEPRO_OPC_MULHL_SS_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhl_su", TILE_OPC_MULHL_SU, 0x1, 3, TREG_ZERO, 1, + { "mulhl_su", TILEPRO_OPC_MULHL_SU, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhl_su.sn", TILE_OPC_MULHL_SU_SN, 0x1, 3, TREG_SN, 1, + { "mulhl_su.sn", TILEPRO_OPC_MULHL_SU_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhl_us", TILE_OPC_MULHL_US, 0x1, 3, TREG_ZERO, 1, + { "mulhl_us", TILEPRO_OPC_MULHL_US, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhl_us.sn", TILE_OPC_MULHL_US_SN, 0x1, 3, TREG_SN, 1, + { "mulhl_us.sn", TILEPRO_OPC_MULHL_US_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhl_uu", TILE_OPC_MULHL_UU, 0x1, 3, TREG_ZERO, 1, + { "mulhl_uu", TILEPRO_OPC_MULHL_UU, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhl_uu.sn", TILE_OPC_MULHL_UU_SN, 0x1, 3, TREG_SN, 1, + { "mulhl_uu.sn", TILEPRO_OPC_MULHL_UU_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhla_ss", TILE_OPC_MULHLA_SS, 0x1, 3, TREG_ZERO, 1, + { "mulhla_ss", TILEPRO_OPC_MULHLA_SS, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhla_ss.sn", TILE_OPC_MULHLA_SS_SN, 0x1, 3, TREG_SN, 1, + { "mulhla_ss.sn", TILEPRO_OPC_MULHLA_SS_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhla_su", TILE_OPC_MULHLA_SU, 0x1, 3, TREG_ZERO, 1, + { "mulhla_su", TILEPRO_OPC_MULHLA_SU, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhla_su.sn", TILE_OPC_MULHLA_SU_SN, 0x1, 3, TREG_SN, 1, + { "mulhla_su.sn", TILEPRO_OPC_MULHLA_SU_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhla_us", TILE_OPC_MULHLA_US, 0x1, 3, TREG_ZERO, 1, + { "mulhla_us", TILEPRO_OPC_MULHLA_US, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhla_us.sn", TILE_OPC_MULHLA_US_SN, 0x1, 3, TREG_SN, 1, + { "mulhla_us.sn", TILEPRO_OPC_MULHLA_US_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhla_uu", TILE_OPC_MULHLA_UU, 0x1, 3, TREG_ZERO, 1, + { "mulhla_uu", TILEPRO_OPC_MULHLA_UU, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhla_uu.sn", TILE_OPC_MULHLA_UU_SN, 0x1, 3, TREG_SN, 1, + { "mulhla_uu.sn", TILEPRO_OPC_MULHLA_UU_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulhlsa_uu", TILE_OPC_MULHLSA_UU, 0x5, 3, TREG_ZERO, 1, + { "mulhlsa_uu", TILEPRO_OPC_MULHLSA_UU, 0x5, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, }, - { "mulhlsa_uu.sn", TILE_OPC_MULHLSA_UU_SN, 0x1, 3, TREG_SN, 1, + { "mulhlsa_uu.sn", TILEPRO_OPC_MULHLSA_UU_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulll_ss", TILE_OPC_MULLL_SS, 0x5, 3, TREG_ZERO, 1, + { "mulll_ss", TILEPRO_OPC_MULLL_SS, 0x5, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, }, - { "mulll_ss.sn", TILE_OPC_MULLL_SS_SN, 0x1, 3, TREG_SN, 1, + { "mulll_ss.sn", TILEPRO_OPC_MULLL_SS_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulll_su", TILE_OPC_MULLL_SU, 0x1, 3, TREG_ZERO, 1, + { "mulll_su", TILEPRO_OPC_MULLL_SU, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulll_su.sn", TILE_OPC_MULLL_SU_SN, 0x1, 3, TREG_SN, 1, + { "mulll_su.sn", TILEPRO_OPC_MULLL_SU_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulll_uu", TILE_OPC_MULLL_UU, 0x5, 3, TREG_ZERO, 1, + { "mulll_uu", TILEPRO_OPC_MULLL_UU, 0x5, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 11, 12, 18 }, { 0, }, { 0, } }, }, - { "mulll_uu.sn", TILE_OPC_MULLL_UU_SN, 0x1, 3, TREG_SN, 1, + { "mulll_uu.sn", TILEPRO_OPC_MULLL_UU_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mullla_ss", TILE_OPC_MULLLA_SS, 0x5, 3, TREG_ZERO, 1, + { "mullla_ss", TILEPRO_OPC_MULLLA_SS, 0x5, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, }, - { "mullla_ss.sn", TILE_OPC_MULLLA_SS_SN, 0x1, 3, TREG_SN, 1, + { "mullla_ss.sn", TILEPRO_OPC_MULLLA_SS_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mullla_su", TILE_OPC_MULLLA_SU, 0x1, 3, TREG_ZERO, 1, + { "mullla_su", TILEPRO_OPC_MULLLA_SU, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mullla_su.sn", TILE_OPC_MULLLA_SU_SN, 0x1, 3, TREG_SN, 1, + { "mullla_su.sn", TILEPRO_OPC_MULLLA_SU_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mullla_uu", TILE_OPC_MULLLA_UU, 0x5, 3, TREG_ZERO, 1, + { "mullla_uu", TILEPRO_OPC_MULLLA_UU, 0x5, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, }, - { "mullla_uu.sn", TILE_OPC_MULLLA_UU_SN, 0x1, 3, TREG_SN, 1, + { "mullla_uu.sn", TILEPRO_OPC_MULLLA_UU_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulllsa_uu", TILE_OPC_MULLLSA_UU, 0x1, 3, TREG_ZERO, 1, + { "mulllsa_uu", TILEPRO_OPC_MULLLSA_UU, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mulllsa_uu.sn", TILE_OPC_MULLLSA_UU_SN, 0x1, 3, TREG_SN, 1, + { "mulllsa_uu.sn", TILEPRO_OPC_MULLLSA_UU_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mvnz", TILE_OPC_MVNZ, 0x5, 3, TREG_ZERO, 1, + { "mvnz", TILEPRO_OPC_MVNZ, 0x5, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, }, - { "mvnz.sn", TILE_OPC_MVNZ_SN, 0x1, 3, TREG_SN, 1, + { "mvnz.sn", TILEPRO_OPC_MVNZ_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mvz", TILE_OPC_MVZ, 0x5, 3, TREG_ZERO, 1, + { "mvz", TILEPRO_OPC_MVZ, 0x5, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 31, 12, 18 }, { 0, }, { 0, } }, }, - { "mvz.sn", TILE_OPC_MVZ_SN, 0x1, 3, TREG_SN, 1, + { "mvz.sn", TILEPRO_OPC_MVZ_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "mz", TILE_OPC_MZ, 0xf, 3, TREG_ZERO, 1, + { "mz", TILEPRO_OPC_MZ, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "mz.sn", TILE_OPC_MZ_SN, 0x3, 3, TREG_SN, 1, + { "mz.sn", TILEPRO_OPC_MZ_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "mzb", TILE_OPC_MZB, 0x3, 3, TREG_ZERO, 1, + { "mzb", TILEPRO_OPC_MZB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "mzb.sn", TILE_OPC_MZB_SN, 0x3, 3, TREG_SN, 1, + { "mzb.sn", TILEPRO_OPC_MZB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "mzh", TILE_OPC_MZH, 0x3, 3, TREG_ZERO, 1, + { "mzh", TILEPRO_OPC_MZH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "mzh.sn", TILE_OPC_MZH_SN, 0x3, 3, TREG_SN, 1, + { "mzh.sn", TILEPRO_OPC_MZH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "nap", TILE_OPC_NAP, 0x2, 0, TREG_ZERO, 0, + { "nap", TILEPRO_OPC_NAP, 0x2, 0, TREG_ZERO, 0, { { 0, }, { }, { 0, }, { 0, }, { 0, } }, }, - { "nop", TILE_OPC_NOP, 0xf, 0, TREG_ZERO, 1, + { "nop", TILEPRO_OPC_NOP, 0xf, 0, TREG_ZERO, 1, { { }, { }, { }, { }, { 0, } }, }, - { "nor", TILE_OPC_NOR, 0xf, 3, TREG_ZERO, 1, + { "nor", TILEPRO_OPC_NOR, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "nor.sn", TILE_OPC_NOR_SN, 0x3, 3, TREG_SN, 1, + { "nor.sn", TILEPRO_OPC_NOR_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "or", TILE_OPC_OR, 0xf, 3, TREG_ZERO, 1, + { "or", TILEPRO_OPC_OR, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "or.sn", TILE_OPC_OR_SN, 0x3, 3, TREG_SN, 1, + { "or.sn", TILEPRO_OPC_OR_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "ori", TILE_OPC_ORI, 0xf, 3, TREG_ZERO, 1, + { "ori", TILEPRO_OPC_ORI, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, }, - { "ori.sn", TILE_OPC_ORI_SN, 0x3, 3, TREG_SN, 1, + { "ori.sn", TILEPRO_OPC_ORI_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "packbs_u", TILE_OPC_PACKBS_U, 0x3, 3, TREG_ZERO, 1, + { "packbs_u", TILEPRO_OPC_PACKBS_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "packbs_u.sn", TILE_OPC_PACKBS_U_SN, 0x3, 3, TREG_SN, 1, + { "packbs_u.sn", TILEPRO_OPC_PACKBS_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "packhb", TILE_OPC_PACKHB, 0x3, 3, TREG_ZERO, 1, + { "packhb", TILEPRO_OPC_PACKHB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "packhb.sn", TILE_OPC_PACKHB_SN, 0x3, 3, TREG_SN, 1, + { "packhb.sn", TILEPRO_OPC_PACKHB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "packhs", TILE_OPC_PACKHS, 0x3, 3, TREG_ZERO, 1, + { "packhs", TILEPRO_OPC_PACKHS, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "packhs.sn", TILE_OPC_PACKHS_SN, 0x3, 3, TREG_SN, 1, + { "packhs.sn", TILEPRO_OPC_PACKHS_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "packlb", TILE_OPC_PACKLB, 0x3, 3, TREG_ZERO, 1, + { "packlb", TILEPRO_OPC_PACKLB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "packlb.sn", TILE_OPC_PACKLB_SN, 0x3, 3, TREG_SN, 1, + { "packlb.sn", TILEPRO_OPC_PACKLB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "pcnt", TILE_OPC_PCNT, 0x5, 2, TREG_ZERO, 1, + { "pcnt", TILEPRO_OPC_PCNT, 0x5, 2, TREG_ZERO, 1, { { 7, 8 }, { 0, }, { 11, 12 }, { 0, }, { 0, } }, }, - { "pcnt.sn", TILE_OPC_PCNT_SN, 0x1, 2, TREG_SN, 1, + { "pcnt.sn", TILEPRO_OPC_PCNT_SN, 0x1, 2, TREG_SN, 1, { { 7, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "rl", TILE_OPC_RL, 0xf, 3, TREG_ZERO, 1, + { "rl", TILEPRO_OPC_RL, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "rl.sn", TILE_OPC_RL_SN, 0x3, 3, TREG_SN, 1, + { "rl.sn", TILEPRO_OPC_RL_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "rli", TILE_OPC_RLI, 0xf, 3, TREG_ZERO, 1, + { "rli", TILEPRO_OPC_RLI, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, }, - { "rli.sn", TILE_OPC_RLI_SN, 0x3, 3, TREG_SN, 1, + { "rli.sn", TILEPRO_OPC_RLI_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "s1a", TILE_OPC_S1A, 0xf, 3, TREG_ZERO, 1, + { "s1a", TILEPRO_OPC_S1A, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "s1a.sn", TILE_OPC_S1A_SN, 0x3, 3, TREG_SN, 1, + { "s1a.sn", TILEPRO_OPC_S1A_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "s2a", TILE_OPC_S2A, 0xf, 3, TREG_ZERO, 1, + { "s2a", TILEPRO_OPC_S2A, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "s2a.sn", TILE_OPC_S2A_SN, 0x3, 3, TREG_SN, 1, + { "s2a.sn", TILEPRO_OPC_S2A_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "s3a", TILE_OPC_S3A, 0xf, 3, TREG_ZERO, 1, + { "s3a", TILEPRO_OPC_S3A, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "s3a.sn", TILE_OPC_S3A_SN, 0x3, 3, TREG_SN, 1, + { "s3a.sn", TILEPRO_OPC_S3A_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "sadab_u", TILE_OPC_SADAB_U, 0x1, 3, TREG_ZERO, 1, + { "sadab_u", TILEPRO_OPC_SADAB_U, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sadab_u.sn", TILE_OPC_SADAB_U_SN, 0x1, 3, TREG_SN, 1, + { "sadab_u.sn", TILEPRO_OPC_SADAB_U_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sadah", TILE_OPC_SADAH, 0x1, 3, TREG_ZERO, 1, + { "sadah", TILEPRO_OPC_SADAH, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sadah.sn", TILE_OPC_SADAH_SN, 0x1, 3, TREG_SN, 1, + { "sadah.sn", TILEPRO_OPC_SADAH_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sadah_u", TILE_OPC_SADAH_U, 0x1, 3, TREG_ZERO, 1, + { "sadah_u", TILEPRO_OPC_SADAH_U, 0x1, 3, TREG_ZERO, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sadah_u.sn", TILE_OPC_SADAH_U_SN, 0x1, 3, TREG_SN, 1, + { "sadah_u.sn", TILEPRO_OPC_SADAH_U_SN, 0x1, 3, TREG_SN, 1, { { 21, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sadb_u", TILE_OPC_SADB_U, 0x1, 3, TREG_ZERO, 1, + { "sadb_u", TILEPRO_OPC_SADB_U, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sadb_u.sn", TILE_OPC_SADB_U_SN, 0x1, 3, TREG_SN, 1, + { "sadb_u.sn", TILEPRO_OPC_SADB_U_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sadh", TILE_OPC_SADH, 0x1, 3, TREG_ZERO, 1, + { "sadh", TILEPRO_OPC_SADH, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sadh.sn", TILE_OPC_SADH_SN, 0x1, 3, TREG_SN, 1, + { "sadh.sn", TILEPRO_OPC_SADH_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sadh_u", TILE_OPC_SADH_U, 0x1, 3, TREG_ZERO, 1, + { "sadh_u", TILEPRO_OPC_SADH_U, 0x1, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sadh_u.sn", TILE_OPC_SADH_U_SN, 0x1, 3, TREG_SN, 1, + { "sadh_u.sn", TILEPRO_OPC_SADH_U_SN, 0x1, 3, TREG_SN, 1, { { 7, 8, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "sb", TILE_OPC_SB, 0x12, 2, TREG_ZERO, 1, + { "sb", TILEPRO_OPC_SB, 0x12, 2, TREG_ZERO, 1, { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } }, }, - { "sbadd", TILE_OPC_SBADD, 0x2, 3, TREG_ZERO, 1, + { "sbadd", TILEPRO_OPC_SBADD, 0x2, 3, TREG_ZERO, 1, { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } }, }, - { "seq", TILE_OPC_SEQ, 0xf, 3, TREG_ZERO, 1, + { "seq", TILEPRO_OPC_SEQ, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "seq.sn", TILE_OPC_SEQ_SN, 0x3, 3, TREG_SN, 1, + { "seq.sn", TILEPRO_OPC_SEQ_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "seqb", TILE_OPC_SEQB, 0x3, 3, TREG_ZERO, 1, + { "seqb", TILEPRO_OPC_SEQB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "seqb.sn", TILE_OPC_SEQB_SN, 0x3, 3, TREG_SN, 1, + { "seqb.sn", TILEPRO_OPC_SEQB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "seqh", TILE_OPC_SEQH, 0x3, 3, TREG_ZERO, 1, + { "seqh", TILEPRO_OPC_SEQH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "seqh.sn", TILE_OPC_SEQH_SN, 0x3, 3, TREG_SN, 1, + { "seqh.sn", TILEPRO_OPC_SEQH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "seqi", TILE_OPC_SEQI, 0xf, 3, TREG_ZERO, 1, + { "seqi", TILEPRO_OPC_SEQI, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, }, - { "seqi.sn", TILE_OPC_SEQI_SN, 0x3, 3, TREG_SN, 1, + { "seqi.sn", TILEPRO_OPC_SEQI_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "seqib", TILE_OPC_SEQIB, 0x3, 3, TREG_ZERO, 1, + { "seqib", TILEPRO_OPC_SEQIB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "seqib.sn", TILE_OPC_SEQIB_SN, 0x3, 3, TREG_SN, 1, + { "seqib.sn", TILEPRO_OPC_SEQIB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "seqih", TILE_OPC_SEQIH, 0x3, 3, TREG_ZERO, 1, + { "seqih", TILEPRO_OPC_SEQIH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "seqih.sn", TILE_OPC_SEQIH_SN, 0x3, 3, TREG_SN, 1, + { "seqih.sn", TILEPRO_OPC_SEQIH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "sh", TILE_OPC_SH, 0x12, 2, TREG_ZERO, 1, + { "sh", TILEPRO_OPC_SH, 0x12, 2, TREG_ZERO, 1, { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } }, }, - { "shadd", TILE_OPC_SHADD, 0x2, 3, TREG_ZERO, 1, + { "shadd", TILEPRO_OPC_SHADD, 0x2, 3, TREG_ZERO, 1, { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } }, }, - { "shl", TILE_OPC_SHL, 0xf, 3, TREG_ZERO, 1, + { "shl", TILEPRO_OPC_SHL, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "shl.sn", TILE_OPC_SHL_SN, 0x3, 3, TREG_SN, 1, + { "shl.sn", TILEPRO_OPC_SHL_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "shlb", TILE_OPC_SHLB, 0x3, 3, TREG_ZERO, 1, + { "shlb", TILEPRO_OPC_SHLB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "shlb.sn", TILE_OPC_SHLB_SN, 0x3, 3, TREG_SN, 1, + { "shlb.sn", TILEPRO_OPC_SHLB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "shlh", TILE_OPC_SHLH, 0x3, 3, TREG_ZERO, 1, + { "shlh", TILEPRO_OPC_SHLH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "shlh.sn", TILE_OPC_SHLH_SN, 0x3, 3, TREG_SN, 1, + { "shlh.sn", TILEPRO_OPC_SHLH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "shli", TILE_OPC_SHLI, 0xf, 3, TREG_ZERO, 1, + { "shli", TILEPRO_OPC_SHLI, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, }, - { "shli.sn", TILE_OPC_SHLI_SN, 0x3, 3, TREG_SN, 1, + { "shli.sn", TILEPRO_OPC_SHLI_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "shlib", TILE_OPC_SHLIB, 0x3, 3, TREG_ZERO, 1, + { "shlib", TILEPRO_OPC_SHLIB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "shlib.sn", TILE_OPC_SHLIB_SN, 0x3, 3, TREG_SN, 1, + { "shlib.sn", TILEPRO_OPC_SHLIB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "shlih", TILE_OPC_SHLIH, 0x3, 3, TREG_ZERO, 1, + { "shlih", TILEPRO_OPC_SHLIH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "shlih.sn", TILE_OPC_SHLIH_SN, 0x3, 3, TREG_SN, 1, + { "shlih.sn", TILEPRO_OPC_SHLIH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "shr", TILE_OPC_SHR, 0xf, 3, TREG_ZERO, 1, + { "shr", TILEPRO_OPC_SHR, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "shr.sn", TILE_OPC_SHR_SN, 0x3, 3, TREG_SN, 1, + { "shr.sn", TILEPRO_OPC_SHR_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "shrb", TILE_OPC_SHRB, 0x3, 3, TREG_ZERO, 1, + { "shrb", TILEPRO_OPC_SHRB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "shrb.sn", TILE_OPC_SHRB_SN, 0x3, 3, TREG_SN, 1, + { "shrb.sn", TILEPRO_OPC_SHRB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "shrh", TILE_OPC_SHRH, 0x3, 3, TREG_ZERO, 1, + { "shrh", TILEPRO_OPC_SHRH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "shrh.sn", TILE_OPC_SHRH_SN, 0x3, 3, TREG_SN, 1, + { "shrh.sn", TILEPRO_OPC_SHRH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "shri", TILE_OPC_SHRI, 0xf, 3, TREG_ZERO, 1, + { "shri", TILEPRO_OPC_SHRI, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, }, - { "shri.sn", TILE_OPC_SHRI_SN, 0x3, 3, TREG_SN, 1, + { "shri.sn", TILEPRO_OPC_SHRI_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "shrib", TILE_OPC_SHRIB, 0x3, 3, TREG_ZERO, 1, + { "shrib", TILEPRO_OPC_SHRIB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "shrib.sn", TILE_OPC_SHRIB_SN, 0x3, 3, TREG_SN, 1, + { "shrib.sn", TILEPRO_OPC_SHRIB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "shrih", TILE_OPC_SHRIH, 0x3, 3, TREG_ZERO, 1, + { "shrih", TILEPRO_OPC_SHRIH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "shrih.sn", TILE_OPC_SHRIH_SN, 0x3, 3, TREG_SN, 1, + { "shrih.sn", TILEPRO_OPC_SHRIH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "slt", TILE_OPC_SLT, 0xf, 3, TREG_ZERO, 1, + { "slt", TILEPRO_OPC_SLT, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "slt.sn", TILE_OPC_SLT_SN, 0x3, 3, TREG_SN, 1, + { "slt.sn", TILEPRO_OPC_SLT_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slt_u", TILE_OPC_SLT_U, 0xf, 3, TREG_ZERO, 1, + { "slt_u", TILEPRO_OPC_SLT_U, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "slt_u.sn", TILE_OPC_SLT_U_SN, 0x3, 3, TREG_SN, 1, + { "slt_u.sn", TILEPRO_OPC_SLT_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "sltb", TILE_OPC_SLTB, 0x3, 3, TREG_ZERO, 1, + { "sltb", TILEPRO_OPC_SLTB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "sltb.sn", TILE_OPC_SLTB_SN, 0x3, 3, TREG_SN, 1, + { "sltb.sn", TILEPRO_OPC_SLTB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "sltb_u", TILE_OPC_SLTB_U, 0x3, 3, TREG_ZERO, 1, + { "sltb_u", TILEPRO_OPC_SLTB_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "sltb_u.sn", TILE_OPC_SLTB_U_SN, 0x3, 3, TREG_SN, 1, + { "sltb_u.sn", TILEPRO_OPC_SLTB_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slte", TILE_OPC_SLTE, 0xf, 3, TREG_ZERO, 1, + { "slte", TILEPRO_OPC_SLTE, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "slte.sn", TILE_OPC_SLTE_SN, 0x3, 3, TREG_SN, 1, + { "slte.sn", TILEPRO_OPC_SLTE_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slte_u", TILE_OPC_SLTE_U, 0xf, 3, TREG_ZERO, 1, + { "slte_u", TILEPRO_OPC_SLTE_U, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "slte_u.sn", TILE_OPC_SLTE_U_SN, 0x3, 3, TREG_SN, 1, + { "slte_u.sn", TILEPRO_OPC_SLTE_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slteb", TILE_OPC_SLTEB, 0x3, 3, TREG_ZERO, 1, + { "slteb", TILEPRO_OPC_SLTEB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slteb.sn", TILE_OPC_SLTEB_SN, 0x3, 3, TREG_SN, 1, + { "slteb.sn", TILEPRO_OPC_SLTEB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slteb_u", TILE_OPC_SLTEB_U, 0x3, 3, TREG_ZERO, 1, + { "slteb_u", TILEPRO_OPC_SLTEB_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slteb_u.sn", TILE_OPC_SLTEB_U_SN, 0x3, 3, TREG_SN, 1, + { "slteb_u.sn", TILEPRO_OPC_SLTEB_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slteh", TILE_OPC_SLTEH, 0x3, 3, TREG_ZERO, 1, + { "slteh", TILEPRO_OPC_SLTEH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slteh.sn", TILE_OPC_SLTEH_SN, 0x3, 3, TREG_SN, 1, + { "slteh.sn", TILEPRO_OPC_SLTEH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slteh_u", TILE_OPC_SLTEH_U, 0x3, 3, TREG_ZERO, 1, + { "slteh_u", TILEPRO_OPC_SLTEH_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slteh_u.sn", TILE_OPC_SLTEH_U_SN, 0x3, 3, TREG_SN, 1, + { "slteh_u.sn", TILEPRO_OPC_SLTEH_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slth", TILE_OPC_SLTH, 0x3, 3, TREG_ZERO, 1, + { "slth", TILEPRO_OPC_SLTH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slth.sn", TILE_OPC_SLTH_SN, 0x3, 3, TREG_SN, 1, + { "slth.sn", TILEPRO_OPC_SLTH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slth_u", TILE_OPC_SLTH_U, 0x3, 3, TREG_ZERO, 1, + { "slth_u", TILEPRO_OPC_SLTH_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slth_u.sn", TILE_OPC_SLTH_U_SN, 0x3, 3, TREG_SN, 1, + { "slth_u.sn", TILEPRO_OPC_SLTH_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "slti", TILE_OPC_SLTI, 0xf, 3, TREG_ZERO, 1, + { "slti", TILEPRO_OPC_SLTI, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, }, - { "slti.sn", TILE_OPC_SLTI_SN, 0x3, 3, TREG_SN, 1, + { "slti.sn", TILEPRO_OPC_SLTI_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "slti_u", TILE_OPC_SLTI_U, 0xf, 3, TREG_ZERO, 1, + { "slti_u", TILEPRO_OPC_SLTI_U, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 11, 12, 2 }, { 13, 14, 3 }, { 0, } }, }, - { "slti_u.sn", TILE_OPC_SLTI_U_SN, 0x3, 3, TREG_SN, 1, + { "slti_u.sn", TILEPRO_OPC_SLTI_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "sltib", TILE_OPC_SLTIB, 0x3, 3, TREG_ZERO, 1, + { "sltib", TILEPRO_OPC_SLTIB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "sltib.sn", TILE_OPC_SLTIB_SN, 0x3, 3, TREG_SN, 1, + { "sltib.sn", TILEPRO_OPC_SLTIB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "sltib_u", TILE_OPC_SLTIB_U, 0x3, 3, TREG_ZERO, 1, + { "sltib_u", TILEPRO_OPC_SLTIB_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "sltib_u.sn", TILE_OPC_SLTIB_U_SN, 0x3, 3, TREG_SN, 1, + { "sltib_u.sn", TILEPRO_OPC_SLTIB_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "sltih", TILE_OPC_SLTIH, 0x3, 3, TREG_ZERO, 1, + { "sltih", TILEPRO_OPC_SLTIH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "sltih.sn", TILE_OPC_SLTIH_SN, 0x3, 3, TREG_SN, 1, + { "sltih.sn", TILEPRO_OPC_SLTIH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "sltih_u", TILE_OPC_SLTIH_U, 0x3, 3, TREG_ZERO, 1, + { "sltih_u", TILEPRO_OPC_SLTIH_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "sltih_u.sn", TILE_OPC_SLTIH_U_SN, 0x3, 3, TREG_SN, 1, + { "sltih_u.sn", TILEPRO_OPC_SLTIH_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "sne", TILE_OPC_SNE, 0xf, 3, TREG_ZERO, 1, + { "sne", TILEPRO_OPC_SNE, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "sne.sn", TILE_OPC_SNE_SN, 0x3, 3, TREG_SN, 1, + { "sne.sn", TILEPRO_OPC_SNE_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "sneb", TILE_OPC_SNEB, 0x3, 3, TREG_ZERO, 1, + { "sneb", TILEPRO_OPC_SNEB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "sneb.sn", TILE_OPC_SNEB_SN, 0x3, 3, TREG_SN, 1, + { "sneb.sn", TILEPRO_OPC_SNEB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "sneh", TILE_OPC_SNEH, 0x3, 3, TREG_ZERO, 1, + { "sneh", TILEPRO_OPC_SNEH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "sneh.sn", TILE_OPC_SNEH_SN, 0x3, 3, TREG_SN, 1, + { "sneh.sn", TILEPRO_OPC_SNEH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "sra", TILE_OPC_SRA, 0xf, 3, TREG_ZERO, 1, + { "sra", TILEPRO_OPC_SRA, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "sra.sn", TILE_OPC_SRA_SN, 0x3, 3, TREG_SN, 1, + { "sra.sn", TILEPRO_OPC_SRA_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "srab", TILE_OPC_SRAB, 0x3, 3, TREG_ZERO, 1, + { "srab", TILEPRO_OPC_SRAB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "srab.sn", TILE_OPC_SRAB_SN, 0x3, 3, TREG_SN, 1, + { "srab.sn", TILEPRO_OPC_SRAB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "srah", TILE_OPC_SRAH, 0x3, 3, TREG_ZERO, 1, + { "srah", TILEPRO_OPC_SRAH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "srah.sn", TILE_OPC_SRAH_SN, 0x3, 3, TREG_SN, 1, + { "srah.sn", TILEPRO_OPC_SRAH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "srai", TILE_OPC_SRAI, 0xf, 3, TREG_ZERO, 1, + { "srai", TILEPRO_OPC_SRAI, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 11, 12, 34 }, { 13, 14, 35 }, { 0, } }, }, - { "srai.sn", TILE_OPC_SRAI_SN, 0x3, 3, TREG_SN, 1, + { "srai.sn", TILEPRO_OPC_SRAI_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "sraib", TILE_OPC_SRAIB, 0x3, 3, TREG_ZERO, 1, + { "sraib", TILEPRO_OPC_SRAIB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "sraib.sn", TILE_OPC_SRAIB_SN, 0x3, 3, TREG_SN, 1, + { "sraib.sn", TILEPRO_OPC_SRAIB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "sraih", TILE_OPC_SRAIH, 0x3, 3, TREG_ZERO, 1, + { "sraih", TILEPRO_OPC_SRAIH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "sraih.sn", TILE_OPC_SRAIH_SN, 0x3, 3, TREG_SN, 1, + { "sraih.sn", TILEPRO_OPC_SRAIH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 32 }, { 9, 10, 33 }, { 0, }, { 0, }, { 0, } }, }, - { "sub", TILE_OPC_SUB, 0xf, 3, TREG_ZERO, 1, + { "sub", TILEPRO_OPC_SUB, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "sub.sn", TILE_OPC_SUB_SN, 0x3, 3, TREG_SN, 1, + { "sub.sn", TILEPRO_OPC_SUB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "subb", TILE_OPC_SUBB, 0x3, 3, TREG_ZERO, 1, + { "subb", TILEPRO_OPC_SUBB, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "subb.sn", TILE_OPC_SUBB_SN, 0x3, 3, TREG_SN, 1, + { "subb.sn", TILEPRO_OPC_SUBB_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "subbs_u", TILE_OPC_SUBBS_U, 0x3, 3, TREG_ZERO, 1, + { "subbs_u", TILEPRO_OPC_SUBBS_U, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "subbs_u.sn", TILE_OPC_SUBBS_U_SN, 0x3, 3, TREG_SN, 1, + { "subbs_u.sn", TILEPRO_OPC_SUBBS_U_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "subh", TILE_OPC_SUBH, 0x3, 3, TREG_ZERO, 1, + { "subh", TILEPRO_OPC_SUBH, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "subh.sn", TILE_OPC_SUBH_SN, 0x3, 3, TREG_SN, 1, + { "subh.sn", TILEPRO_OPC_SUBH_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "subhs", TILE_OPC_SUBHS, 0x3, 3, TREG_ZERO, 1, + { "subhs", TILEPRO_OPC_SUBHS, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "subhs.sn", TILE_OPC_SUBHS_SN, 0x3, 3, TREG_SN, 1, + { "subhs.sn", TILEPRO_OPC_SUBHS_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "subs", TILE_OPC_SUBS, 0x3, 3, TREG_ZERO, 1, + { "subs", TILEPRO_OPC_SUBS, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "subs.sn", TILE_OPC_SUBS_SN, 0x3, 3, TREG_SN, 1, + { "subs.sn", TILEPRO_OPC_SUBS_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "sw", TILE_OPC_SW, 0x12, 2, TREG_ZERO, 1, + { "sw", TILEPRO_OPC_SW, 0x12, 2, TREG_ZERO, 1, { { 0, }, { 10, 17 }, { 0, }, { 0, }, { 15, 36 } }, }, - { "swadd", TILE_OPC_SWADD, 0x2, 3, TREG_ZERO, 1, + { "swadd", TILEPRO_OPC_SWADD, 0x2, 3, TREG_ZERO, 1, { { 0, }, { 24, 17, 37 }, { 0, }, { 0, }, { 0, } }, }, - { "swint0", TILE_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0, + { "swint0", TILEPRO_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0, { { 0, }, { }, { 0, }, { 0, }, { 0, } }, }, - { "swint1", TILE_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0, + { "swint1", TILEPRO_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0, { { 0, }, { }, { 0, }, { 0, }, { 0, } }, }, - { "swint2", TILE_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0, + { "swint2", TILEPRO_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0, { { 0, }, { }, { 0, }, { 0, }, { 0, } }, }, - { "swint3", TILE_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0, + { "swint3", TILEPRO_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0, { { 0, }, { }, { 0, }, { 0, }, { 0, } }, }, - { "tblidxb0", TILE_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1, + { "tblidxb0", TILEPRO_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1, { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, }, - { "tblidxb0.sn", TILE_OPC_TBLIDXB0_SN, 0x1, 2, TREG_SN, 1, + { "tblidxb0.sn", TILEPRO_OPC_TBLIDXB0_SN, 0x1, 2, TREG_SN, 1, { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "tblidxb1", TILE_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1, + { "tblidxb1", TILEPRO_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1, { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, }, - { "tblidxb1.sn", TILE_OPC_TBLIDXB1_SN, 0x1, 2, TREG_SN, 1, + { "tblidxb1.sn", TILEPRO_OPC_TBLIDXB1_SN, 0x1, 2, TREG_SN, 1, { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "tblidxb2", TILE_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1, + { "tblidxb2", TILEPRO_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1, { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, }, - { "tblidxb2.sn", TILE_OPC_TBLIDXB2_SN, 0x1, 2, TREG_SN, 1, + { "tblidxb2.sn", TILEPRO_OPC_TBLIDXB2_SN, 0x1, 2, TREG_SN, 1, { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "tblidxb3", TILE_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1, + { "tblidxb3", TILEPRO_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1, { { 21, 8 }, { 0, }, { 31, 12 }, { 0, }, { 0, } }, }, - { "tblidxb3.sn", TILE_OPC_TBLIDXB3_SN, 0x1, 2, TREG_SN, 1, + { "tblidxb3.sn", TILEPRO_OPC_TBLIDXB3_SN, 0x1, 2, TREG_SN, 1, { { 21, 8 }, { 0, }, { 0, }, { 0, }, { 0, } }, }, - { "tns", TILE_OPC_TNS, 0x2, 2, TREG_ZERO, 1, + { "tns", TILEPRO_OPC_TNS, 0x2, 2, TREG_ZERO, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, }, - { "tns.sn", TILE_OPC_TNS_SN, 0x2, 2, TREG_SN, 1, + { "tns.sn", TILEPRO_OPC_TNS_SN, 0x2, 2, TREG_SN, 1, { { 0, }, { 9, 10 }, { 0, }, { 0, }, { 0, } }, }, - { "wh64", TILE_OPC_WH64, 0x2, 1, TREG_ZERO, 1, + { "wh64", TILEPRO_OPC_WH64, 0x2, 1, TREG_ZERO, 1, { { 0, }, { 10 }, { 0, }, { 0, }, { 0, } }, }, - { "xor", TILE_OPC_XOR, 0xf, 3, TREG_ZERO, 1, + { "xor", TILEPRO_OPC_XOR, 0xf, 3, TREG_ZERO, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 11, 12, 18 }, { 13, 14, 19 }, { 0, } }, }, - { "xor.sn", TILE_OPC_XOR_SN, 0x3, 3, TREG_SN, 1, + { "xor.sn", TILEPRO_OPC_XOR_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 16 }, { 9, 10, 17 }, { 0, }, { 0, }, { 0, } }, }, - { "xori", TILE_OPC_XORI, 0x3, 3, TREG_ZERO, 1, + { "xori", TILEPRO_OPC_XORI, 0x3, 3, TREG_ZERO, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { "xori.sn", TILE_OPC_XORI_SN, 0x3, 3, TREG_SN, 1, + { "xori.sn", TILEPRO_OPC_XORI_SN, 0x3, 3, TREG_SN, 1, { { 7, 8, 0 }, { 9, 10, 1 }, { 0, }, { 0, }, { 0, } }, }, - { NULL, TILE_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } }, + { NULL, TILEPRO_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } }, } }; #define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6)) -#define CHILD(array_index) (TILE_OPC_NONE + (array_index)) +#define CHILD(array_index) (TILEPRO_OPC_NONE + (array_index)) static const unsigned short decode_X0_fsm[1153] = { BITFIELD(22, 9) /* index 0 */, CHILD(513), CHILD(530), CHILD(547), CHILD(564), CHILD(596), CHILD(613), - CHILD(630), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, CHILD(663), CHILD(680), CHILD(697), CHILD(714), CHILD(746), - CHILD(763), CHILD(780), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, CHILD(813), CHILD(813), CHILD(813), + CHILD(630), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(663), CHILD(680), CHILD(697), + CHILD(714), CHILD(746), CHILD(763), CHILD(780), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), @@ -1227,7 +1247,8 @@ static const unsigned short decode_X0_fsm[1153] = CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(813), - CHILD(813), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), + CHILD(813), CHILD(813), CHILD(813), CHILD(813), CHILD(828), CHILD(828), + CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), @@ -1237,7 +1258,7 @@ static const unsigned short decode_X0_fsm[1153] = CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), - CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(828), CHILD(843), + CHILD(828), CHILD(828), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), @@ -1248,333 +1269,371 @@ static const unsigned short decode_X0_fsm[1153] = CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), - CHILD(843), CHILD(843), CHILD(843), CHILD(873), CHILD(878), CHILD(883), - CHILD(903), CHILD(908), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(913), - CHILD(918), CHILD(923), CHILD(943), CHILD(948), TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, CHILD(953), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(988), TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, CHILD(993), - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, CHILD(1076), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + CHILD(873), CHILD(878), CHILD(883), CHILD(903), CHILD(908), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(913), + CHILD(918), CHILD(923), CHILD(943), CHILD(948), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(953), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(988), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, CHILD(993), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(1076), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(18, 4) /* index 513 */, - TILE_OPC_NONE, TILE_OPC_ADDB, TILE_OPC_ADDH, TILE_OPC_ADD, - TILE_OPC_ADIFFB_U, TILE_OPC_ADIFFH, TILE_OPC_AND, TILE_OPC_AVGB_U, - TILE_OPC_AVGH, TILE_OPC_CRC32_32, TILE_OPC_CRC32_8, TILE_OPC_INTHB, - TILE_OPC_INTHH, TILE_OPC_INTLB, TILE_OPC_INTLH, TILE_OPC_MAXB_U, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB, TILEPRO_OPC_ADDH, TILEPRO_OPC_ADD, + TILEPRO_OPC_ADIFFB_U, TILEPRO_OPC_ADIFFH, TILEPRO_OPC_AND, + TILEPRO_OPC_AVGB_U, TILEPRO_OPC_AVGH, TILEPRO_OPC_CRC32_32, + TILEPRO_OPC_CRC32_8, TILEPRO_OPC_INTHB, TILEPRO_OPC_INTHH, + TILEPRO_OPC_INTLB, TILEPRO_OPC_INTLH, TILEPRO_OPC_MAXB_U, BITFIELD(18, 4) /* index 530 */, - TILE_OPC_MAXH, TILE_OPC_MINB_U, TILE_OPC_MINH, TILE_OPC_MNZB, TILE_OPC_MNZH, - TILE_OPC_MNZ, TILE_OPC_MULHHA_SS, TILE_OPC_MULHHA_SU, TILE_OPC_MULHHA_UU, - TILE_OPC_MULHHSA_UU, TILE_OPC_MULHH_SS, TILE_OPC_MULHH_SU, - TILE_OPC_MULHH_UU, TILE_OPC_MULHLA_SS, TILE_OPC_MULHLA_SU, - TILE_OPC_MULHLA_US, + TILEPRO_OPC_MAXH, TILEPRO_OPC_MINB_U, TILEPRO_OPC_MINH, TILEPRO_OPC_MNZB, + TILEPRO_OPC_MNZH, TILEPRO_OPC_MNZ, TILEPRO_OPC_MULHHA_SS, + TILEPRO_OPC_MULHHA_SU, TILEPRO_OPC_MULHHA_UU, TILEPRO_OPC_MULHHSA_UU, + TILEPRO_OPC_MULHH_SS, TILEPRO_OPC_MULHH_SU, TILEPRO_OPC_MULHH_UU, + TILEPRO_OPC_MULHLA_SS, TILEPRO_OPC_MULHLA_SU, TILEPRO_OPC_MULHLA_US, BITFIELD(18, 4) /* index 547 */, - TILE_OPC_MULHLA_UU, TILE_OPC_MULHLSA_UU, TILE_OPC_MULHL_SS, - TILE_OPC_MULHL_SU, TILE_OPC_MULHL_US, TILE_OPC_MULHL_UU, TILE_OPC_MULLLA_SS, - TILE_OPC_MULLLA_SU, TILE_OPC_MULLLA_UU, TILE_OPC_MULLLSA_UU, - TILE_OPC_MULLL_SS, TILE_OPC_MULLL_SU, TILE_OPC_MULLL_UU, TILE_OPC_MVNZ, - TILE_OPC_MVZ, TILE_OPC_MZB, + TILEPRO_OPC_MULHLA_UU, TILEPRO_OPC_MULHLSA_UU, TILEPRO_OPC_MULHL_SS, + TILEPRO_OPC_MULHL_SU, TILEPRO_OPC_MULHL_US, TILEPRO_OPC_MULHL_UU, + TILEPRO_OPC_MULLLA_SS, TILEPRO_OPC_MULLLA_SU, TILEPRO_OPC_MULLLA_UU, + TILEPRO_OPC_MULLLSA_UU, TILEPRO_OPC_MULLL_SS, TILEPRO_OPC_MULLL_SU, + TILEPRO_OPC_MULLL_UU, TILEPRO_OPC_MVNZ, TILEPRO_OPC_MVZ, TILEPRO_OPC_MZB, BITFIELD(18, 4) /* index 564 */, - TILE_OPC_MZH, TILE_OPC_MZ, TILE_OPC_NOR, CHILD(581), TILE_OPC_PACKHB, - TILE_OPC_PACKLB, TILE_OPC_RL, TILE_OPC_S1A, TILE_OPC_S2A, TILE_OPC_S3A, - TILE_OPC_SADAB_U, TILE_OPC_SADAH, TILE_OPC_SADAH_U, TILE_OPC_SADB_U, - TILE_OPC_SADH, TILE_OPC_SADH_U, + TILEPRO_OPC_MZH, TILEPRO_OPC_MZ, TILEPRO_OPC_NOR, CHILD(581), + TILEPRO_OPC_PACKHB, TILEPRO_OPC_PACKLB, TILEPRO_OPC_RL, TILEPRO_OPC_S1A, + TILEPRO_OPC_S2A, TILEPRO_OPC_S3A, TILEPRO_OPC_SADAB_U, TILEPRO_OPC_SADAH, + TILEPRO_OPC_SADAH_U, TILEPRO_OPC_SADB_U, TILEPRO_OPC_SADH, + TILEPRO_OPC_SADH_U, BITFIELD(12, 2) /* index 581 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(586), + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(586), BITFIELD(14, 2) /* index 586 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(591), + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(591), BITFIELD(16, 2) /* index 591 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_MOVE, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, BITFIELD(18, 4) /* index 596 */, - TILE_OPC_SEQB, TILE_OPC_SEQH, TILE_OPC_SEQ, TILE_OPC_SHLB, TILE_OPC_SHLH, - TILE_OPC_SHL, TILE_OPC_SHRB, TILE_OPC_SHRH, TILE_OPC_SHR, TILE_OPC_SLTB, - TILE_OPC_SLTB_U, TILE_OPC_SLTEB, TILE_OPC_SLTEB_U, TILE_OPC_SLTEH, - TILE_OPC_SLTEH_U, TILE_OPC_SLTE, + TILEPRO_OPC_SEQB, TILEPRO_OPC_SEQH, TILEPRO_OPC_SEQ, TILEPRO_OPC_SHLB, + TILEPRO_OPC_SHLH, TILEPRO_OPC_SHL, TILEPRO_OPC_SHRB, TILEPRO_OPC_SHRH, + TILEPRO_OPC_SHR, TILEPRO_OPC_SLTB, TILEPRO_OPC_SLTB_U, TILEPRO_OPC_SLTEB, + TILEPRO_OPC_SLTEB_U, TILEPRO_OPC_SLTEH, TILEPRO_OPC_SLTEH_U, + TILEPRO_OPC_SLTE, BITFIELD(18, 4) /* index 613 */, - TILE_OPC_SLTE_U, TILE_OPC_SLTH, TILE_OPC_SLTH_U, TILE_OPC_SLT, - TILE_OPC_SLT_U, TILE_OPC_SNEB, TILE_OPC_SNEH, TILE_OPC_SNE, TILE_OPC_SRAB, - TILE_OPC_SRAH, TILE_OPC_SRA, TILE_OPC_SUBB, TILE_OPC_SUBH, TILE_OPC_SUB, - TILE_OPC_XOR, TILE_OPC_DWORD_ALIGN, + TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLTH, TILEPRO_OPC_SLTH_U, TILEPRO_OPC_SLT, + TILEPRO_OPC_SLT_U, TILEPRO_OPC_SNEB, TILEPRO_OPC_SNEH, TILEPRO_OPC_SNE, + TILEPRO_OPC_SRAB, TILEPRO_OPC_SRAH, TILEPRO_OPC_SRA, TILEPRO_OPC_SUBB, + TILEPRO_OPC_SUBH, TILEPRO_OPC_SUB, TILEPRO_OPC_XOR, TILEPRO_OPC_DWORD_ALIGN, BITFIELD(18, 3) /* index 630 */, CHILD(639), CHILD(642), CHILD(645), CHILD(648), CHILD(651), CHILD(654), CHILD(657), CHILD(660), BITFIELD(21, 1) /* index 639 */, - TILE_OPC_ADDS, TILE_OPC_NONE, + TILEPRO_OPC_ADDS, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 642 */, - TILE_OPC_SUBS, TILE_OPC_NONE, + TILEPRO_OPC_SUBS, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 645 */, - TILE_OPC_ADDBS_U, TILE_OPC_NONE, + TILEPRO_OPC_ADDBS_U, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 648 */, - TILE_OPC_ADDHS, TILE_OPC_NONE, + TILEPRO_OPC_ADDHS, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 651 */, - TILE_OPC_SUBBS_U, TILE_OPC_NONE, + TILEPRO_OPC_SUBBS_U, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 654 */, - TILE_OPC_SUBHS, TILE_OPC_NONE, + TILEPRO_OPC_SUBHS, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 657 */, - TILE_OPC_PACKHS, TILE_OPC_NONE, + TILEPRO_OPC_PACKHS, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 660 */, - TILE_OPC_PACKBS_U, TILE_OPC_NONE, + TILEPRO_OPC_PACKBS_U, TILEPRO_OPC_NONE, BITFIELD(18, 4) /* index 663 */, - TILE_OPC_NONE, TILE_OPC_ADDB_SN, TILE_OPC_ADDH_SN, TILE_OPC_ADD_SN, - TILE_OPC_ADIFFB_U_SN, TILE_OPC_ADIFFH_SN, TILE_OPC_AND_SN, - TILE_OPC_AVGB_U_SN, TILE_OPC_AVGH_SN, TILE_OPC_CRC32_32_SN, - TILE_OPC_CRC32_8_SN, TILE_OPC_INTHB_SN, TILE_OPC_INTHH_SN, - TILE_OPC_INTLB_SN, TILE_OPC_INTLH_SN, TILE_OPC_MAXB_U_SN, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB_SN, TILEPRO_OPC_ADDH_SN, + TILEPRO_OPC_ADD_SN, TILEPRO_OPC_ADIFFB_U_SN, TILEPRO_OPC_ADIFFH_SN, + TILEPRO_OPC_AND_SN, TILEPRO_OPC_AVGB_U_SN, TILEPRO_OPC_AVGH_SN, + TILEPRO_OPC_CRC32_32_SN, TILEPRO_OPC_CRC32_8_SN, TILEPRO_OPC_INTHB_SN, + TILEPRO_OPC_INTHH_SN, TILEPRO_OPC_INTLB_SN, TILEPRO_OPC_INTLH_SN, + TILEPRO_OPC_MAXB_U_SN, BITFIELD(18, 4) /* index 680 */, - TILE_OPC_MAXH_SN, TILE_OPC_MINB_U_SN, TILE_OPC_MINH_SN, TILE_OPC_MNZB_SN, - TILE_OPC_MNZH_SN, TILE_OPC_MNZ_SN, TILE_OPC_MULHHA_SS_SN, - TILE_OPC_MULHHA_SU_SN, TILE_OPC_MULHHA_UU_SN, TILE_OPC_MULHHSA_UU_SN, - TILE_OPC_MULHH_SS_SN, TILE_OPC_MULHH_SU_SN, TILE_OPC_MULHH_UU_SN, - TILE_OPC_MULHLA_SS_SN, TILE_OPC_MULHLA_SU_SN, TILE_OPC_MULHLA_US_SN, + TILEPRO_OPC_MAXH_SN, TILEPRO_OPC_MINB_U_SN, TILEPRO_OPC_MINH_SN, + TILEPRO_OPC_MNZB_SN, TILEPRO_OPC_MNZH_SN, TILEPRO_OPC_MNZ_SN, + TILEPRO_OPC_MULHHA_SS_SN, TILEPRO_OPC_MULHHA_SU_SN, + TILEPRO_OPC_MULHHA_UU_SN, TILEPRO_OPC_MULHHSA_UU_SN, + TILEPRO_OPC_MULHH_SS_SN, TILEPRO_OPC_MULHH_SU_SN, TILEPRO_OPC_MULHH_UU_SN, + TILEPRO_OPC_MULHLA_SS_SN, TILEPRO_OPC_MULHLA_SU_SN, + TILEPRO_OPC_MULHLA_US_SN, BITFIELD(18, 4) /* index 697 */, - TILE_OPC_MULHLA_UU_SN, TILE_OPC_MULHLSA_UU_SN, TILE_OPC_MULHL_SS_SN, - TILE_OPC_MULHL_SU_SN, TILE_OPC_MULHL_US_SN, TILE_OPC_MULHL_UU_SN, - TILE_OPC_MULLLA_SS_SN, TILE_OPC_MULLLA_SU_SN, TILE_OPC_MULLLA_UU_SN, - TILE_OPC_MULLLSA_UU_SN, TILE_OPC_MULLL_SS_SN, TILE_OPC_MULLL_SU_SN, - TILE_OPC_MULLL_UU_SN, TILE_OPC_MVNZ_SN, TILE_OPC_MVZ_SN, TILE_OPC_MZB_SN, + TILEPRO_OPC_MULHLA_UU_SN, TILEPRO_OPC_MULHLSA_UU_SN, + TILEPRO_OPC_MULHL_SS_SN, TILEPRO_OPC_MULHL_SU_SN, TILEPRO_OPC_MULHL_US_SN, + TILEPRO_OPC_MULHL_UU_SN, TILEPRO_OPC_MULLLA_SS_SN, TILEPRO_OPC_MULLLA_SU_SN, + TILEPRO_OPC_MULLLA_UU_SN, TILEPRO_OPC_MULLLSA_UU_SN, + TILEPRO_OPC_MULLL_SS_SN, TILEPRO_OPC_MULLL_SU_SN, TILEPRO_OPC_MULLL_UU_SN, + TILEPRO_OPC_MVNZ_SN, TILEPRO_OPC_MVZ_SN, TILEPRO_OPC_MZB_SN, BITFIELD(18, 4) /* index 714 */, - TILE_OPC_MZH_SN, TILE_OPC_MZ_SN, TILE_OPC_NOR_SN, CHILD(731), - TILE_OPC_PACKHB_SN, TILE_OPC_PACKLB_SN, TILE_OPC_RL_SN, TILE_OPC_S1A_SN, - TILE_OPC_S2A_SN, TILE_OPC_S3A_SN, TILE_OPC_SADAB_U_SN, TILE_OPC_SADAH_SN, - TILE_OPC_SADAH_U_SN, TILE_OPC_SADB_U_SN, TILE_OPC_SADH_SN, - TILE_OPC_SADH_U_SN, + TILEPRO_OPC_MZH_SN, TILEPRO_OPC_MZ_SN, TILEPRO_OPC_NOR_SN, CHILD(731), + TILEPRO_OPC_PACKHB_SN, TILEPRO_OPC_PACKLB_SN, TILEPRO_OPC_RL_SN, + TILEPRO_OPC_S1A_SN, TILEPRO_OPC_S2A_SN, TILEPRO_OPC_S3A_SN, + TILEPRO_OPC_SADAB_U_SN, TILEPRO_OPC_SADAH_SN, TILEPRO_OPC_SADAH_U_SN, + TILEPRO_OPC_SADB_U_SN, TILEPRO_OPC_SADH_SN, TILEPRO_OPC_SADH_U_SN, BITFIELD(12, 2) /* index 731 */, - TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, CHILD(736), + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(736), BITFIELD(14, 2) /* index 736 */, - TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, CHILD(741), + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(741), BITFIELD(16, 2) /* index 741 */, - TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_MOVE_SN, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, + TILEPRO_OPC_MOVE_SN, BITFIELD(18, 4) /* index 746 */, - TILE_OPC_SEQB_SN, TILE_OPC_SEQH_SN, TILE_OPC_SEQ_SN, TILE_OPC_SHLB_SN, - TILE_OPC_SHLH_SN, TILE_OPC_SHL_SN, TILE_OPC_SHRB_SN, TILE_OPC_SHRH_SN, - TILE_OPC_SHR_SN, TILE_OPC_SLTB_SN, TILE_OPC_SLTB_U_SN, TILE_OPC_SLTEB_SN, - TILE_OPC_SLTEB_U_SN, TILE_OPC_SLTEH_SN, TILE_OPC_SLTEH_U_SN, - TILE_OPC_SLTE_SN, + TILEPRO_OPC_SEQB_SN, TILEPRO_OPC_SEQH_SN, TILEPRO_OPC_SEQ_SN, + TILEPRO_OPC_SHLB_SN, TILEPRO_OPC_SHLH_SN, TILEPRO_OPC_SHL_SN, + TILEPRO_OPC_SHRB_SN, TILEPRO_OPC_SHRH_SN, TILEPRO_OPC_SHR_SN, + TILEPRO_OPC_SLTB_SN, TILEPRO_OPC_SLTB_U_SN, TILEPRO_OPC_SLTEB_SN, + TILEPRO_OPC_SLTEB_U_SN, TILEPRO_OPC_SLTEH_SN, TILEPRO_OPC_SLTEH_U_SN, + TILEPRO_OPC_SLTE_SN, BITFIELD(18, 4) /* index 763 */, - TILE_OPC_SLTE_U_SN, TILE_OPC_SLTH_SN, TILE_OPC_SLTH_U_SN, TILE_OPC_SLT_SN, - TILE_OPC_SLT_U_SN, TILE_OPC_SNEB_SN, TILE_OPC_SNEH_SN, TILE_OPC_SNE_SN, - TILE_OPC_SRAB_SN, TILE_OPC_SRAH_SN, TILE_OPC_SRA_SN, TILE_OPC_SUBB_SN, - TILE_OPC_SUBH_SN, TILE_OPC_SUB_SN, TILE_OPC_XOR_SN, TILE_OPC_DWORD_ALIGN_SN, + TILEPRO_OPC_SLTE_U_SN, TILEPRO_OPC_SLTH_SN, TILEPRO_OPC_SLTH_U_SN, + TILEPRO_OPC_SLT_SN, TILEPRO_OPC_SLT_U_SN, TILEPRO_OPC_SNEB_SN, + TILEPRO_OPC_SNEH_SN, TILEPRO_OPC_SNE_SN, TILEPRO_OPC_SRAB_SN, + TILEPRO_OPC_SRAH_SN, TILEPRO_OPC_SRA_SN, TILEPRO_OPC_SUBB_SN, + TILEPRO_OPC_SUBH_SN, TILEPRO_OPC_SUB_SN, TILEPRO_OPC_XOR_SN, + TILEPRO_OPC_DWORD_ALIGN_SN, BITFIELD(18, 3) /* index 780 */, CHILD(789), CHILD(792), CHILD(795), CHILD(798), CHILD(801), CHILD(804), CHILD(807), CHILD(810), BITFIELD(21, 1) /* index 789 */, - TILE_OPC_ADDS_SN, TILE_OPC_NONE, + TILEPRO_OPC_ADDS_SN, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 792 */, - TILE_OPC_SUBS_SN, TILE_OPC_NONE, + TILEPRO_OPC_SUBS_SN, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 795 */, - TILE_OPC_ADDBS_U_SN, TILE_OPC_NONE, + TILEPRO_OPC_ADDBS_U_SN, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 798 */, - TILE_OPC_ADDHS_SN, TILE_OPC_NONE, + TILEPRO_OPC_ADDHS_SN, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 801 */, - TILE_OPC_SUBBS_U_SN, TILE_OPC_NONE, + TILEPRO_OPC_SUBBS_U_SN, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 804 */, - TILE_OPC_SUBHS_SN, TILE_OPC_NONE, + TILEPRO_OPC_SUBHS_SN, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 807 */, - TILE_OPC_PACKHS_SN, TILE_OPC_NONE, + TILEPRO_OPC_PACKHS_SN, TILEPRO_OPC_NONE, BITFIELD(21, 1) /* index 810 */, - TILE_OPC_PACKBS_U_SN, TILE_OPC_NONE, + TILEPRO_OPC_PACKBS_U_SN, TILEPRO_OPC_NONE, BITFIELD(6, 2) /* index 813 */, - TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, CHILD(818), + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(818), BITFIELD(8, 2) /* index 818 */, - TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, CHILD(823), + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(823), BITFIELD(10, 2) /* index 823 */, - TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_MOVELI_SN, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + TILEPRO_OPC_MOVELI_SN, BITFIELD(6, 2) /* index 828 */, - TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, CHILD(833), + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(833), BITFIELD(8, 2) /* index 833 */, - TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, CHILD(838), + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(838), BITFIELD(10, 2) /* index 838 */, - TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_MOVELI, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_MOVELI, BITFIELD(0, 2) /* index 843 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(848), + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(848), BITFIELD(2, 2) /* index 848 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(853), + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(853), BITFIELD(4, 2) /* index 853 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(858), + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(858), BITFIELD(6, 2) /* index 858 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(863), + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(863), BITFIELD(8, 2) /* index 863 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(868), + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(868), BITFIELD(10, 2) /* index 868 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_INFOL, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_INFOL, BITFIELD(20, 2) /* index 873 */, - TILE_OPC_NONE, TILE_OPC_ADDIB, TILE_OPC_ADDIH, TILE_OPC_ADDI, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB, TILEPRO_OPC_ADDIH, TILEPRO_OPC_ADDI, BITFIELD(20, 2) /* index 878 */, - TILE_OPC_MAXIB_U, TILE_OPC_MAXIH, TILE_OPC_MINIB_U, TILE_OPC_MINIH, + TILEPRO_OPC_MAXIB_U, TILEPRO_OPC_MAXIH, TILEPRO_OPC_MINIB_U, + TILEPRO_OPC_MINIH, BITFIELD(20, 2) /* index 883 */, - CHILD(888), TILE_OPC_SEQIB, TILE_OPC_SEQIH, TILE_OPC_SEQI, + CHILD(888), TILEPRO_OPC_SEQIB, TILEPRO_OPC_SEQIH, TILEPRO_OPC_SEQI, BITFIELD(6, 2) /* index 888 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(893), + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(893), BITFIELD(8, 2) /* index 893 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(898), + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(898), BITFIELD(10, 2) /* index 898 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_MOVEI, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, BITFIELD(20, 2) /* index 903 */, - TILE_OPC_SLTIB, TILE_OPC_SLTIB_U, TILE_OPC_SLTIH, TILE_OPC_SLTIH_U, + TILEPRO_OPC_SLTIB, TILEPRO_OPC_SLTIB_U, TILEPRO_OPC_SLTIH, + TILEPRO_OPC_SLTIH_U, BITFIELD(20, 2) /* index 908 */, - TILE_OPC_SLTI, TILE_OPC_SLTI_U, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(20, 2) /* index 913 */, - TILE_OPC_NONE, TILE_OPC_ADDIB_SN, TILE_OPC_ADDIH_SN, TILE_OPC_ADDI_SN, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB_SN, TILEPRO_OPC_ADDIH_SN, + TILEPRO_OPC_ADDI_SN, BITFIELD(20, 2) /* index 918 */, - TILE_OPC_MAXIB_U_SN, TILE_OPC_MAXIH_SN, TILE_OPC_MINIB_U_SN, - TILE_OPC_MINIH_SN, + TILEPRO_OPC_MAXIB_U_SN, TILEPRO_OPC_MAXIH_SN, TILEPRO_OPC_MINIB_U_SN, + TILEPRO_OPC_MINIH_SN, BITFIELD(20, 2) /* index 923 */, - CHILD(928), TILE_OPC_SEQIB_SN, TILE_OPC_SEQIH_SN, TILE_OPC_SEQI_SN, + CHILD(928), TILEPRO_OPC_SEQIB_SN, TILEPRO_OPC_SEQIH_SN, TILEPRO_OPC_SEQI_SN, BITFIELD(6, 2) /* index 928 */, - TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, CHILD(933), + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(933), BITFIELD(8, 2) /* index 933 */, - TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, CHILD(938), + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(938), BITFIELD(10, 2) /* index 938 */, - TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_MOVEI_SN, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, + TILEPRO_OPC_MOVEI_SN, BITFIELD(20, 2) /* index 943 */, - TILE_OPC_SLTIB_SN, TILE_OPC_SLTIB_U_SN, TILE_OPC_SLTIH_SN, - TILE_OPC_SLTIH_U_SN, + TILEPRO_OPC_SLTIB_SN, TILEPRO_OPC_SLTIB_U_SN, TILEPRO_OPC_SLTIH_SN, + TILEPRO_OPC_SLTIH_U_SN, BITFIELD(20, 2) /* index 948 */, - TILE_OPC_SLTI_SN, TILE_OPC_SLTI_U_SN, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_SLTI_SN, TILEPRO_OPC_SLTI_U_SN, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, BITFIELD(20, 2) /* index 953 */, - TILE_OPC_NONE, CHILD(958), TILE_OPC_XORI, TILE_OPC_NONE, + TILEPRO_OPC_NONE, CHILD(958), TILEPRO_OPC_XORI, TILEPRO_OPC_NONE, BITFIELD(0, 2) /* index 958 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(963), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(963), BITFIELD(2, 2) /* index 963 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(968), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(968), BITFIELD(4, 2) /* index 968 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(973), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(973), BITFIELD(6, 2) /* index 973 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(978), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(978), BITFIELD(8, 2) /* index 978 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(983), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(983), BITFIELD(10, 2) /* index 983 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_INFO, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, BITFIELD(20, 2) /* index 988 */, - TILE_OPC_NONE, TILE_OPC_ANDI_SN, TILE_OPC_XORI_SN, TILE_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_ANDI_SN, TILEPRO_OPC_XORI_SN, + TILEPRO_OPC_NONE, BITFIELD(17, 5) /* index 993 */, - TILE_OPC_NONE, TILE_OPC_RLI, TILE_OPC_SHLIB, TILE_OPC_SHLIH, TILE_OPC_SHLI, - TILE_OPC_SHRIB, TILE_OPC_SHRIH, TILE_OPC_SHRI, TILE_OPC_SRAIB, - TILE_OPC_SRAIH, TILE_OPC_SRAI, CHILD(1026), TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_RLI, TILEPRO_OPC_SHLIB, TILEPRO_OPC_SHLIH, + TILEPRO_OPC_SHLI, TILEPRO_OPC_SHRIB, TILEPRO_OPC_SHRIH, TILEPRO_OPC_SHRI, + TILEPRO_OPC_SRAIB, TILEPRO_OPC_SRAIH, TILEPRO_OPC_SRAI, CHILD(1026), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(12, 4) /* index 1026 */, - TILE_OPC_NONE, CHILD(1043), CHILD(1046), CHILD(1049), CHILD(1052), + TILEPRO_OPC_NONE, CHILD(1043), CHILD(1046), CHILD(1049), CHILD(1052), CHILD(1055), CHILD(1058), CHILD(1061), CHILD(1064), CHILD(1067), - CHILD(1070), CHILD(1073), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, + CHILD(1070), CHILD(1073), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1043 */, - TILE_OPC_BITX, TILE_OPC_NONE, + TILEPRO_OPC_BITX, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1046 */, - TILE_OPC_BYTEX, TILE_OPC_NONE, + TILEPRO_OPC_BYTEX, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1049 */, - TILE_OPC_CLZ, TILE_OPC_NONE, + TILEPRO_OPC_CLZ, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1052 */, - TILE_OPC_CTZ, TILE_OPC_NONE, + TILEPRO_OPC_CTZ, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1055 */, - TILE_OPC_FNOP, TILE_OPC_NONE, + TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1058 */, - TILE_OPC_NOP, TILE_OPC_NONE, + TILEPRO_OPC_NOP, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1061 */, - TILE_OPC_PCNT, TILE_OPC_NONE, + TILEPRO_OPC_PCNT, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1064 */, - TILE_OPC_TBLIDXB0, TILE_OPC_NONE, + TILEPRO_OPC_TBLIDXB0, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1067 */, - TILE_OPC_TBLIDXB1, TILE_OPC_NONE, + TILEPRO_OPC_TBLIDXB1, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1070 */, - TILE_OPC_TBLIDXB2, TILE_OPC_NONE, + TILEPRO_OPC_TBLIDXB2, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1073 */, - TILE_OPC_TBLIDXB3, TILE_OPC_NONE, + TILEPRO_OPC_TBLIDXB3, TILEPRO_OPC_NONE, BITFIELD(17, 5) /* index 1076 */, - TILE_OPC_NONE, TILE_OPC_RLI_SN, TILE_OPC_SHLIB_SN, TILE_OPC_SHLIH_SN, - TILE_OPC_SHLI_SN, TILE_OPC_SHRIB_SN, TILE_OPC_SHRIH_SN, TILE_OPC_SHRI_SN, - TILE_OPC_SRAIB_SN, TILE_OPC_SRAIH_SN, TILE_OPC_SRAI_SN, CHILD(1109), - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_RLI_SN, TILEPRO_OPC_SHLIB_SN, + TILEPRO_OPC_SHLIH_SN, TILEPRO_OPC_SHLI_SN, TILEPRO_OPC_SHRIB_SN, + TILEPRO_OPC_SHRIH_SN, TILEPRO_OPC_SHRI_SN, TILEPRO_OPC_SRAIB_SN, + TILEPRO_OPC_SRAIH_SN, TILEPRO_OPC_SRAI_SN, CHILD(1109), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(12, 4) /* index 1109 */, - TILE_OPC_NONE, CHILD(1126), CHILD(1129), CHILD(1132), CHILD(1135), + TILEPRO_OPC_NONE, CHILD(1126), CHILD(1129), CHILD(1132), CHILD(1135), CHILD(1055), CHILD(1058), CHILD(1138), CHILD(1141), CHILD(1144), - CHILD(1147), CHILD(1150), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, + CHILD(1147), CHILD(1150), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1126 */, - TILE_OPC_BITX_SN, TILE_OPC_NONE, + TILEPRO_OPC_BITX_SN, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1129 */, - TILE_OPC_BYTEX_SN, TILE_OPC_NONE, + TILEPRO_OPC_BYTEX_SN, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1132 */, - TILE_OPC_CLZ_SN, TILE_OPC_NONE, + TILEPRO_OPC_CLZ_SN, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1135 */, - TILE_OPC_CTZ_SN, TILE_OPC_NONE, + TILEPRO_OPC_CTZ_SN, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1138 */, - TILE_OPC_PCNT_SN, TILE_OPC_NONE, + TILEPRO_OPC_PCNT_SN, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1141 */, - TILE_OPC_TBLIDXB0_SN, TILE_OPC_NONE, + TILEPRO_OPC_TBLIDXB0_SN, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1144 */, - TILE_OPC_TBLIDXB1_SN, TILE_OPC_NONE, + TILEPRO_OPC_TBLIDXB1_SN, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1147 */, - TILE_OPC_TBLIDXB2_SN, TILE_OPC_NONE, + TILEPRO_OPC_TBLIDXB2_SN, TILEPRO_OPC_NONE, BITFIELD(16, 1) /* index 1150 */, - TILE_OPC_TBLIDXB3_SN, TILE_OPC_NONE, + TILEPRO_OPC_TBLIDXB3_SN, TILEPRO_OPC_NONE, }; static const unsigned short decode_X1_fsm[1540] = { BITFIELD(54, 9) /* index 0 */, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, CHILD(513), CHILD(561), CHILD(594), - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(641), CHILD(689), - CHILD(722), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(766), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(513), CHILD(561), CHILD(594), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(641), + CHILD(689), CHILD(722), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), CHILD(766), @@ -1596,594 +1655,641 @@ static const unsigned short decode_X1_fsm[1540] = CHILD(826), CHILD(826), CHILD(826), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), CHILD(843), - CHILD(843), CHILD(860), CHILD(899), CHILD(923), CHILD(932), TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, CHILD(941), CHILD(950), CHILD(974), CHILD(983), - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, - TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, TILE_OPC_MM, CHILD(992), - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - CHILD(1334), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_J, TILE_OPC_J, - TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, - TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, - TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, - TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, - TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, - TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, - TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, - TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, - TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, - TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, TILE_OPC_J, - TILE_OPC_J, TILE_OPC_J, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, TILE_OPC_JAL, - TILE_OPC_JAL, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + CHILD(843), CHILD(860), CHILD(899), CHILD(923), CHILD(932), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(941), CHILD(950), CHILD(974), CHILD(983), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, + TILEPRO_OPC_MM, TILEPRO_OPC_MM, TILEPRO_OPC_MM, CHILD(992), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, CHILD(1334), + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, + TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_J, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, + TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_JAL, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(49, 5) /* index 513 */, - TILE_OPC_NONE, TILE_OPC_ADDB, TILE_OPC_ADDH, TILE_OPC_ADD, TILE_OPC_AND, - TILE_OPC_INTHB, TILE_OPC_INTHH, TILE_OPC_INTLB, TILE_OPC_INTLH, - TILE_OPC_JALRP, TILE_OPC_JALR, TILE_OPC_JRP, TILE_OPC_JR, TILE_OPC_LNK, - TILE_OPC_MAXB_U, TILE_OPC_MAXH, TILE_OPC_MINB_U, TILE_OPC_MINH, - TILE_OPC_MNZB, TILE_OPC_MNZH, TILE_OPC_MNZ, TILE_OPC_MZB, TILE_OPC_MZH, - TILE_OPC_MZ, TILE_OPC_NOR, CHILD(546), TILE_OPC_PACKHB, TILE_OPC_PACKLB, - TILE_OPC_RL, TILE_OPC_S1A, TILE_OPC_S2A, TILE_OPC_S3A, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB, TILEPRO_OPC_ADDH, TILEPRO_OPC_ADD, + TILEPRO_OPC_AND, TILEPRO_OPC_INTHB, TILEPRO_OPC_INTHH, TILEPRO_OPC_INTLB, + TILEPRO_OPC_INTLH, TILEPRO_OPC_JALRP, TILEPRO_OPC_JALR, TILEPRO_OPC_JRP, + TILEPRO_OPC_JR, TILEPRO_OPC_LNK, TILEPRO_OPC_MAXB_U, TILEPRO_OPC_MAXH, + TILEPRO_OPC_MINB_U, TILEPRO_OPC_MINH, TILEPRO_OPC_MNZB, TILEPRO_OPC_MNZH, + TILEPRO_OPC_MNZ, TILEPRO_OPC_MZB, TILEPRO_OPC_MZH, TILEPRO_OPC_MZ, + TILEPRO_OPC_NOR, CHILD(546), TILEPRO_OPC_PACKHB, TILEPRO_OPC_PACKLB, + TILEPRO_OPC_RL, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_S3A, BITFIELD(43, 2) /* index 546 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(551), + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(551), BITFIELD(45, 2) /* index 551 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(556), + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(556), BITFIELD(47, 2) /* index 556 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_MOVE, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, BITFIELD(49, 5) /* index 561 */, - TILE_OPC_SB, TILE_OPC_SEQB, TILE_OPC_SEQH, TILE_OPC_SEQ, TILE_OPC_SHLB, - TILE_OPC_SHLH, TILE_OPC_SHL, TILE_OPC_SHRB, TILE_OPC_SHRH, TILE_OPC_SHR, - TILE_OPC_SH, TILE_OPC_SLTB, TILE_OPC_SLTB_U, TILE_OPC_SLTEB, - TILE_OPC_SLTEB_U, TILE_OPC_SLTEH, TILE_OPC_SLTEH_U, TILE_OPC_SLTE, - TILE_OPC_SLTE_U, TILE_OPC_SLTH, TILE_OPC_SLTH_U, TILE_OPC_SLT, - TILE_OPC_SLT_U, TILE_OPC_SNEB, TILE_OPC_SNEH, TILE_OPC_SNE, TILE_OPC_SRAB, - TILE_OPC_SRAH, TILE_OPC_SRA, TILE_OPC_SUBB, TILE_OPC_SUBH, TILE_OPC_SUB, + TILEPRO_OPC_SB, TILEPRO_OPC_SEQB, TILEPRO_OPC_SEQH, TILEPRO_OPC_SEQ, + TILEPRO_OPC_SHLB, TILEPRO_OPC_SHLH, TILEPRO_OPC_SHL, TILEPRO_OPC_SHRB, + TILEPRO_OPC_SHRH, TILEPRO_OPC_SHR, TILEPRO_OPC_SH, TILEPRO_OPC_SLTB, + TILEPRO_OPC_SLTB_U, TILEPRO_OPC_SLTEB, TILEPRO_OPC_SLTEB_U, + TILEPRO_OPC_SLTEH, TILEPRO_OPC_SLTEH_U, TILEPRO_OPC_SLTE, + TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLTH, TILEPRO_OPC_SLTH_U, TILEPRO_OPC_SLT, + TILEPRO_OPC_SLT_U, TILEPRO_OPC_SNEB, TILEPRO_OPC_SNEH, TILEPRO_OPC_SNE, + TILEPRO_OPC_SRAB, TILEPRO_OPC_SRAH, TILEPRO_OPC_SRA, TILEPRO_OPC_SUBB, + TILEPRO_OPC_SUBH, TILEPRO_OPC_SUB, BITFIELD(49, 4) /* index 594 */, CHILD(611), CHILD(614), CHILD(617), CHILD(620), CHILD(623), CHILD(626), - CHILD(629), CHILD(632), CHILD(635), CHILD(638), TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + CHILD(629), CHILD(632), CHILD(635), CHILD(638), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 611 */, - TILE_OPC_SW, TILE_OPC_NONE, + TILEPRO_OPC_SW, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 614 */, - TILE_OPC_XOR, TILE_OPC_NONE, + TILEPRO_OPC_XOR, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 617 */, - TILE_OPC_ADDS, TILE_OPC_NONE, + TILEPRO_OPC_ADDS, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 620 */, - TILE_OPC_SUBS, TILE_OPC_NONE, + TILEPRO_OPC_SUBS, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 623 */, - TILE_OPC_ADDBS_U, TILE_OPC_NONE, + TILEPRO_OPC_ADDBS_U, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 626 */, - TILE_OPC_ADDHS, TILE_OPC_NONE, + TILEPRO_OPC_ADDHS, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 629 */, - TILE_OPC_SUBBS_U, TILE_OPC_NONE, + TILEPRO_OPC_SUBBS_U, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 632 */, - TILE_OPC_SUBHS, TILE_OPC_NONE, + TILEPRO_OPC_SUBHS, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 635 */, - TILE_OPC_PACKHS, TILE_OPC_NONE, + TILEPRO_OPC_PACKHS, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 638 */, - TILE_OPC_PACKBS_U, TILE_OPC_NONE, + TILEPRO_OPC_PACKBS_U, TILEPRO_OPC_NONE, BITFIELD(49, 5) /* index 641 */, - TILE_OPC_NONE, TILE_OPC_ADDB_SN, TILE_OPC_ADDH_SN, TILE_OPC_ADD_SN, - TILE_OPC_AND_SN, TILE_OPC_INTHB_SN, TILE_OPC_INTHH_SN, TILE_OPC_INTLB_SN, - TILE_OPC_INTLH_SN, TILE_OPC_JALRP, TILE_OPC_JALR, TILE_OPC_JRP, TILE_OPC_JR, - TILE_OPC_LNK_SN, TILE_OPC_MAXB_U_SN, TILE_OPC_MAXH_SN, TILE_OPC_MINB_U_SN, - TILE_OPC_MINH_SN, TILE_OPC_MNZB_SN, TILE_OPC_MNZH_SN, TILE_OPC_MNZ_SN, - TILE_OPC_MZB_SN, TILE_OPC_MZH_SN, TILE_OPC_MZ_SN, TILE_OPC_NOR_SN, - CHILD(674), TILE_OPC_PACKHB_SN, TILE_OPC_PACKLB_SN, TILE_OPC_RL_SN, - TILE_OPC_S1A_SN, TILE_OPC_S2A_SN, TILE_OPC_S3A_SN, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDB_SN, TILEPRO_OPC_ADDH_SN, + TILEPRO_OPC_ADD_SN, TILEPRO_OPC_AND_SN, TILEPRO_OPC_INTHB_SN, + TILEPRO_OPC_INTHH_SN, TILEPRO_OPC_INTLB_SN, TILEPRO_OPC_INTLH_SN, + TILEPRO_OPC_JALRP, TILEPRO_OPC_JALR, TILEPRO_OPC_JRP, TILEPRO_OPC_JR, + TILEPRO_OPC_LNK_SN, TILEPRO_OPC_MAXB_U_SN, TILEPRO_OPC_MAXH_SN, + TILEPRO_OPC_MINB_U_SN, TILEPRO_OPC_MINH_SN, TILEPRO_OPC_MNZB_SN, + TILEPRO_OPC_MNZH_SN, TILEPRO_OPC_MNZ_SN, TILEPRO_OPC_MZB_SN, + TILEPRO_OPC_MZH_SN, TILEPRO_OPC_MZ_SN, TILEPRO_OPC_NOR_SN, CHILD(674), + TILEPRO_OPC_PACKHB_SN, TILEPRO_OPC_PACKLB_SN, TILEPRO_OPC_RL_SN, + TILEPRO_OPC_S1A_SN, TILEPRO_OPC_S2A_SN, TILEPRO_OPC_S3A_SN, BITFIELD(43, 2) /* index 674 */, - TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, CHILD(679), + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(679), BITFIELD(45, 2) /* index 679 */, - TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, CHILD(684), + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, CHILD(684), BITFIELD(47, 2) /* index 684 */, - TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_OR_SN, TILE_OPC_MOVE_SN, + TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, TILEPRO_OPC_OR_SN, + TILEPRO_OPC_MOVE_SN, BITFIELD(49, 5) /* index 689 */, - TILE_OPC_SB, TILE_OPC_SEQB_SN, TILE_OPC_SEQH_SN, TILE_OPC_SEQ_SN, - TILE_OPC_SHLB_SN, TILE_OPC_SHLH_SN, TILE_OPC_SHL_SN, TILE_OPC_SHRB_SN, - TILE_OPC_SHRH_SN, TILE_OPC_SHR_SN, TILE_OPC_SH, TILE_OPC_SLTB_SN, - TILE_OPC_SLTB_U_SN, TILE_OPC_SLTEB_SN, TILE_OPC_SLTEB_U_SN, - TILE_OPC_SLTEH_SN, TILE_OPC_SLTEH_U_SN, TILE_OPC_SLTE_SN, - TILE_OPC_SLTE_U_SN, TILE_OPC_SLTH_SN, TILE_OPC_SLTH_U_SN, TILE_OPC_SLT_SN, - TILE_OPC_SLT_U_SN, TILE_OPC_SNEB_SN, TILE_OPC_SNEH_SN, TILE_OPC_SNE_SN, - TILE_OPC_SRAB_SN, TILE_OPC_SRAH_SN, TILE_OPC_SRA_SN, TILE_OPC_SUBB_SN, - TILE_OPC_SUBH_SN, TILE_OPC_SUB_SN, + TILEPRO_OPC_SB, TILEPRO_OPC_SEQB_SN, TILEPRO_OPC_SEQH_SN, + TILEPRO_OPC_SEQ_SN, TILEPRO_OPC_SHLB_SN, TILEPRO_OPC_SHLH_SN, + TILEPRO_OPC_SHL_SN, TILEPRO_OPC_SHRB_SN, TILEPRO_OPC_SHRH_SN, + TILEPRO_OPC_SHR_SN, TILEPRO_OPC_SH, TILEPRO_OPC_SLTB_SN, + TILEPRO_OPC_SLTB_U_SN, TILEPRO_OPC_SLTEB_SN, TILEPRO_OPC_SLTEB_U_SN, + TILEPRO_OPC_SLTEH_SN, TILEPRO_OPC_SLTEH_U_SN, TILEPRO_OPC_SLTE_SN, + TILEPRO_OPC_SLTE_U_SN, TILEPRO_OPC_SLTH_SN, TILEPRO_OPC_SLTH_U_SN, + TILEPRO_OPC_SLT_SN, TILEPRO_OPC_SLT_U_SN, TILEPRO_OPC_SNEB_SN, + TILEPRO_OPC_SNEH_SN, TILEPRO_OPC_SNE_SN, TILEPRO_OPC_SRAB_SN, + TILEPRO_OPC_SRAH_SN, TILEPRO_OPC_SRA_SN, TILEPRO_OPC_SUBB_SN, + TILEPRO_OPC_SUBH_SN, TILEPRO_OPC_SUB_SN, BITFIELD(49, 4) /* index 722 */, CHILD(611), CHILD(739), CHILD(742), CHILD(745), CHILD(748), CHILD(751), - CHILD(754), CHILD(757), CHILD(760), CHILD(763), TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + CHILD(754), CHILD(757), CHILD(760), CHILD(763), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 739 */, - TILE_OPC_XOR_SN, TILE_OPC_NONE, + TILEPRO_OPC_XOR_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 742 */, - TILE_OPC_ADDS_SN, TILE_OPC_NONE, + TILEPRO_OPC_ADDS_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 745 */, - TILE_OPC_SUBS_SN, TILE_OPC_NONE, + TILEPRO_OPC_SUBS_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 748 */, - TILE_OPC_ADDBS_U_SN, TILE_OPC_NONE, + TILEPRO_OPC_ADDBS_U_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 751 */, - TILE_OPC_ADDHS_SN, TILE_OPC_NONE, + TILEPRO_OPC_ADDHS_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 754 */, - TILE_OPC_SUBBS_U_SN, TILE_OPC_NONE, + TILEPRO_OPC_SUBBS_U_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 757 */, - TILE_OPC_SUBHS_SN, TILE_OPC_NONE, + TILEPRO_OPC_SUBHS_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 760 */, - TILE_OPC_PACKHS_SN, TILE_OPC_NONE, + TILEPRO_OPC_PACKHS_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 763 */, - TILE_OPC_PACKBS_U_SN, TILE_OPC_NONE, + TILEPRO_OPC_PACKBS_U_SN, TILEPRO_OPC_NONE, BITFIELD(37, 2) /* index 766 */, - TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, CHILD(771), + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(771), BITFIELD(39, 2) /* index 771 */, - TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, CHILD(776), + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + CHILD(776), BITFIELD(41, 2) /* index 776 */, - TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_ADDLI_SN, TILE_OPC_MOVELI_SN, + TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, TILEPRO_OPC_ADDLI_SN, + TILEPRO_OPC_MOVELI_SN, BITFIELD(37, 2) /* index 781 */, - TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, CHILD(786), + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(786), BITFIELD(39, 2) /* index 786 */, - TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, CHILD(791), + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, CHILD(791), BITFIELD(41, 2) /* index 791 */, - TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_ADDLI, TILE_OPC_MOVELI, + TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_ADDLI, TILEPRO_OPC_MOVELI, BITFIELD(31, 2) /* index 796 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(801), + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(801), BITFIELD(33, 2) /* index 801 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(806), + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(806), BITFIELD(35, 2) /* index 806 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(811), + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(811), BITFIELD(37, 2) /* index 811 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(816), + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(816), BITFIELD(39, 2) /* index 816 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, CHILD(821), + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, CHILD(821), BITFIELD(41, 2) /* index 821 */, - TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_AULI, TILE_OPC_INFOL, + TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_AULI, TILEPRO_OPC_INFOL, BITFIELD(31, 4) /* index 826 */, - TILE_OPC_BZ, TILE_OPC_BZT, TILE_OPC_BNZ, TILE_OPC_BNZT, TILE_OPC_BGZ, - TILE_OPC_BGZT, TILE_OPC_BGEZ, TILE_OPC_BGEZT, TILE_OPC_BLZ, TILE_OPC_BLZT, - TILE_OPC_BLEZ, TILE_OPC_BLEZT, TILE_OPC_BBS, TILE_OPC_BBST, TILE_OPC_BBNS, - TILE_OPC_BBNST, + TILEPRO_OPC_BZ, TILEPRO_OPC_BZT, TILEPRO_OPC_BNZ, TILEPRO_OPC_BNZT, + TILEPRO_OPC_BGZ, TILEPRO_OPC_BGZT, TILEPRO_OPC_BGEZ, TILEPRO_OPC_BGEZT, + TILEPRO_OPC_BLZ, TILEPRO_OPC_BLZT, TILEPRO_OPC_BLEZ, TILEPRO_OPC_BLEZT, + TILEPRO_OPC_BBS, TILEPRO_OPC_BBST, TILEPRO_OPC_BBNS, TILEPRO_OPC_BBNST, BITFIELD(31, 4) /* index 843 */, - TILE_OPC_BZ_SN, TILE_OPC_BZT_SN, TILE_OPC_BNZ_SN, TILE_OPC_BNZT_SN, - TILE_OPC_BGZ_SN, TILE_OPC_BGZT_SN, TILE_OPC_BGEZ_SN, TILE_OPC_BGEZT_SN, - TILE_OPC_BLZ_SN, TILE_OPC_BLZT_SN, TILE_OPC_BLEZ_SN, TILE_OPC_BLEZT_SN, - TILE_OPC_BBS_SN, TILE_OPC_BBST_SN, TILE_OPC_BBNS_SN, TILE_OPC_BBNST_SN, + TILEPRO_OPC_BZ_SN, TILEPRO_OPC_BZT_SN, TILEPRO_OPC_BNZ_SN, + TILEPRO_OPC_BNZT_SN, TILEPRO_OPC_BGZ_SN, TILEPRO_OPC_BGZT_SN, + TILEPRO_OPC_BGEZ_SN, TILEPRO_OPC_BGEZT_SN, TILEPRO_OPC_BLZ_SN, + TILEPRO_OPC_BLZT_SN, TILEPRO_OPC_BLEZ_SN, TILEPRO_OPC_BLEZT_SN, + TILEPRO_OPC_BBS_SN, TILEPRO_OPC_BBST_SN, TILEPRO_OPC_BBNS_SN, + TILEPRO_OPC_BBNST_SN, BITFIELD(51, 3) /* index 860 */, - TILE_OPC_NONE, TILE_OPC_ADDIB, TILE_OPC_ADDIH, TILE_OPC_ADDI, CHILD(869), - TILE_OPC_MAXIB_U, TILE_OPC_MAXIH, TILE_OPC_MFSPR, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB, TILEPRO_OPC_ADDIH, TILEPRO_OPC_ADDI, + CHILD(869), TILEPRO_OPC_MAXIB_U, TILEPRO_OPC_MAXIH, TILEPRO_OPC_MFSPR, BITFIELD(31, 2) /* index 869 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(874), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(874), BITFIELD(33, 2) /* index 874 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(879), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(879), BITFIELD(35, 2) /* index 879 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(884), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(884), BITFIELD(37, 2) /* index 884 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(889), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(889), BITFIELD(39, 2) /* index 889 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(894), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(894), BITFIELD(41, 2) /* index 894 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_INFO, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, BITFIELD(51, 3) /* index 899 */, - TILE_OPC_MINIB_U, TILE_OPC_MINIH, TILE_OPC_MTSPR, CHILD(908), - TILE_OPC_SEQIB, TILE_OPC_SEQIH, TILE_OPC_SEQI, TILE_OPC_SLTIB, + TILEPRO_OPC_MINIB_U, TILEPRO_OPC_MINIH, TILEPRO_OPC_MTSPR, CHILD(908), + TILEPRO_OPC_SEQIB, TILEPRO_OPC_SEQIH, TILEPRO_OPC_SEQI, TILEPRO_OPC_SLTIB, BITFIELD(37, 2) /* index 908 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(913), + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(913), BITFIELD(39, 2) /* index 913 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(918), + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(918), BITFIELD(41, 2) /* index 918 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_MOVEI, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, BITFIELD(51, 3) /* index 923 */, - TILE_OPC_SLTIB_U, TILE_OPC_SLTIH, TILE_OPC_SLTIH_U, TILE_OPC_SLTI, - TILE_OPC_SLTI_U, TILE_OPC_XORI, TILE_OPC_LBADD, TILE_OPC_LBADD_U, + TILEPRO_OPC_SLTIB_U, TILEPRO_OPC_SLTIH, TILEPRO_OPC_SLTIH_U, + TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_XORI, TILEPRO_OPC_LBADD, + TILEPRO_OPC_LBADD_U, BITFIELD(51, 3) /* index 932 */, - TILE_OPC_LHADD, TILE_OPC_LHADD_U, TILE_OPC_LWADD, TILE_OPC_LWADD_NA, - TILE_OPC_SBADD, TILE_OPC_SHADD, TILE_OPC_SWADD, TILE_OPC_NONE, + TILEPRO_OPC_LHADD, TILEPRO_OPC_LHADD_U, TILEPRO_OPC_LWADD, + TILEPRO_OPC_LWADD_NA, TILEPRO_OPC_SBADD, TILEPRO_OPC_SHADD, + TILEPRO_OPC_SWADD, TILEPRO_OPC_NONE, BITFIELD(51, 3) /* index 941 */, - TILE_OPC_NONE, TILE_OPC_ADDIB_SN, TILE_OPC_ADDIH_SN, TILE_OPC_ADDI_SN, - TILE_OPC_ANDI_SN, TILE_OPC_MAXIB_U_SN, TILE_OPC_MAXIH_SN, TILE_OPC_MFSPR, + TILEPRO_OPC_NONE, TILEPRO_OPC_ADDIB_SN, TILEPRO_OPC_ADDIH_SN, + TILEPRO_OPC_ADDI_SN, TILEPRO_OPC_ANDI_SN, TILEPRO_OPC_MAXIB_U_SN, + TILEPRO_OPC_MAXIH_SN, TILEPRO_OPC_MFSPR, BITFIELD(51, 3) /* index 950 */, - TILE_OPC_MINIB_U_SN, TILE_OPC_MINIH_SN, TILE_OPC_MTSPR, CHILD(959), - TILE_OPC_SEQIB_SN, TILE_OPC_SEQIH_SN, TILE_OPC_SEQI_SN, TILE_OPC_SLTIB_SN, + TILEPRO_OPC_MINIB_U_SN, TILEPRO_OPC_MINIH_SN, TILEPRO_OPC_MTSPR, CHILD(959), + TILEPRO_OPC_SEQIB_SN, TILEPRO_OPC_SEQIH_SN, TILEPRO_OPC_SEQI_SN, + TILEPRO_OPC_SLTIB_SN, BITFIELD(37, 2) /* index 959 */, - TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, CHILD(964), + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(964), BITFIELD(39, 2) /* index 964 */, - TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, CHILD(969), + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, CHILD(969), BITFIELD(41, 2) /* index 969 */, - TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_ORI_SN, TILE_OPC_MOVEI_SN, + TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, TILEPRO_OPC_ORI_SN, + TILEPRO_OPC_MOVEI_SN, BITFIELD(51, 3) /* index 974 */, - TILE_OPC_SLTIB_U_SN, TILE_OPC_SLTIH_SN, TILE_OPC_SLTIH_U_SN, - TILE_OPC_SLTI_SN, TILE_OPC_SLTI_U_SN, TILE_OPC_XORI_SN, TILE_OPC_LBADD_SN, - TILE_OPC_LBADD_U_SN, + TILEPRO_OPC_SLTIB_U_SN, TILEPRO_OPC_SLTIH_SN, TILEPRO_OPC_SLTIH_U_SN, + TILEPRO_OPC_SLTI_SN, TILEPRO_OPC_SLTI_U_SN, TILEPRO_OPC_XORI_SN, + TILEPRO_OPC_LBADD_SN, TILEPRO_OPC_LBADD_U_SN, BITFIELD(51, 3) /* index 983 */, - TILE_OPC_LHADD_SN, TILE_OPC_LHADD_U_SN, TILE_OPC_LWADD_SN, - TILE_OPC_LWADD_NA_SN, TILE_OPC_SBADD, TILE_OPC_SHADD, TILE_OPC_SWADD, - TILE_OPC_NONE, + TILEPRO_OPC_LHADD_SN, TILEPRO_OPC_LHADD_U_SN, TILEPRO_OPC_LWADD_SN, + TILEPRO_OPC_LWADD_NA_SN, TILEPRO_OPC_SBADD, TILEPRO_OPC_SHADD, + TILEPRO_OPC_SWADD, TILEPRO_OPC_NONE, BITFIELD(46, 7) /* index 992 */, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(1121), - CHILD(1121), CHILD(1121), CHILD(1121), CHILD(1124), CHILD(1124), - CHILD(1124), CHILD(1124), CHILD(1127), CHILD(1127), CHILD(1127), - CHILD(1127), CHILD(1130), CHILD(1130), CHILD(1130), CHILD(1130), - CHILD(1133), CHILD(1133), CHILD(1133), CHILD(1133), CHILD(1136), - CHILD(1136), CHILD(1136), CHILD(1136), CHILD(1139), CHILD(1139), - CHILD(1139), CHILD(1139), CHILD(1142), CHILD(1142), CHILD(1142), - CHILD(1142), CHILD(1145), CHILD(1145), CHILD(1145), CHILD(1145), - CHILD(1148), CHILD(1148), CHILD(1148), CHILD(1148), CHILD(1151), - CHILD(1242), CHILD(1290), CHILD(1323), TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(1121), CHILD(1121), CHILD(1121), CHILD(1121), CHILD(1124), + CHILD(1124), CHILD(1124), CHILD(1124), CHILD(1127), CHILD(1127), + CHILD(1127), CHILD(1127), CHILD(1130), CHILD(1130), CHILD(1130), + CHILD(1130), CHILD(1133), CHILD(1133), CHILD(1133), CHILD(1133), + CHILD(1136), CHILD(1136), CHILD(1136), CHILD(1136), CHILD(1139), + CHILD(1139), CHILD(1139), CHILD(1139), CHILD(1142), CHILD(1142), + CHILD(1142), CHILD(1142), CHILD(1145), CHILD(1145), CHILD(1145), + CHILD(1145), CHILD(1148), CHILD(1148), CHILD(1148), CHILD(1148), + CHILD(1151), CHILD(1242), CHILD(1290), CHILD(1323), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1121 */, - TILE_OPC_RLI, TILE_OPC_NONE, + TILEPRO_OPC_RLI, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1124 */, - TILE_OPC_SHLIB, TILE_OPC_NONE, + TILEPRO_OPC_SHLIB, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1127 */, - TILE_OPC_SHLIH, TILE_OPC_NONE, + TILEPRO_OPC_SHLIH, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1130 */, - TILE_OPC_SHLI, TILE_OPC_NONE, + TILEPRO_OPC_SHLI, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1133 */, - TILE_OPC_SHRIB, TILE_OPC_NONE, + TILEPRO_OPC_SHRIB, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1136 */, - TILE_OPC_SHRIH, TILE_OPC_NONE, + TILEPRO_OPC_SHRIH, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1139 */, - TILE_OPC_SHRI, TILE_OPC_NONE, + TILEPRO_OPC_SHRI, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1142 */, - TILE_OPC_SRAIB, TILE_OPC_NONE, + TILEPRO_OPC_SRAIB, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1145 */, - TILE_OPC_SRAIH, TILE_OPC_NONE, + TILEPRO_OPC_SRAIH, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1148 */, - TILE_OPC_SRAI, TILE_OPC_NONE, + TILEPRO_OPC_SRAI, TILEPRO_OPC_NONE, BITFIELD(43, 3) /* index 1151 */, - TILE_OPC_NONE, CHILD(1160), CHILD(1163), CHILD(1166), CHILD(1169), + TILEPRO_OPC_NONE, CHILD(1160), CHILD(1163), CHILD(1166), CHILD(1169), CHILD(1172), CHILD(1175), CHILD(1178), BITFIELD(53, 1) /* index 1160 */, - TILE_OPC_DRAIN, TILE_OPC_NONE, + TILEPRO_OPC_DRAIN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1163 */, - TILE_OPC_DTLBPR, TILE_OPC_NONE, + TILEPRO_OPC_DTLBPR, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1166 */, - TILE_OPC_FINV, TILE_OPC_NONE, + TILEPRO_OPC_FINV, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1169 */, - TILE_OPC_FLUSH, TILE_OPC_NONE, + TILEPRO_OPC_FLUSH, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1172 */, - TILE_OPC_FNOP, TILE_OPC_NONE, + TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1175 */, - TILE_OPC_ICOH, TILE_OPC_NONE, + TILEPRO_OPC_ICOH, TILEPRO_OPC_NONE, BITFIELD(31, 2) /* index 1178 */, CHILD(1183), CHILD(1211), CHILD(1239), CHILD(1239), BITFIELD(53, 1) /* index 1183 */, - CHILD(1186), TILE_OPC_NONE, + CHILD(1186), TILEPRO_OPC_NONE, BITFIELD(33, 2) /* index 1186 */, - TILE_OPC_ILL, TILE_OPC_ILL, TILE_OPC_ILL, CHILD(1191), + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, CHILD(1191), BITFIELD(35, 2) /* index 1191 */, - TILE_OPC_ILL, CHILD(1196), TILE_OPC_ILL, TILE_OPC_ILL, + TILEPRO_OPC_ILL, CHILD(1196), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, BITFIELD(37, 2) /* index 1196 */, - TILE_OPC_ILL, CHILD(1201), TILE_OPC_ILL, TILE_OPC_ILL, + TILEPRO_OPC_ILL, CHILD(1201), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, BITFIELD(39, 2) /* index 1201 */, - TILE_OPC_ILL, CHILD(1206), TILE_OPC_ILL, TILE_OPC_ILL, + TILEPRO_OPC_ILL, CHILD(1206), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, BITFIELD(41, 2) /* index 1206 */, - TILE_OPC_ILL, TILE_OPC_ILL, TILE_OPC_BPT, TILE_OPC_ILL, + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_BPT, TILEPRO_OPC_ILL, BITFIELD(53, 1) /* index 1211 */, - CHILD(1214), TILE_OPC_NONE, + CHILD(1214), TILEPRO_OPC_NONE, BITFIELD(33, 2) /* index 1214 */, - TILE_OPC_ILL, TILE_OPC_ILL, TILE_OPC_ILL, CHILD(1219), + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, CHILD(1219), BITFIELD(35, 2) /* index 1219 */, - TILE_OPC_ILL, CHILD(1224), TILE_OPC_ILL, TILE_OPC_ILL, + TILEPRO_OPC_ILL, CHILD(1224), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, BITFIELD(37, 2) /* index 1224 */, - TILE_OPC_ILL, CHILD(1229), TILE_OPC_ILL, TILE_OPC_ILL, + TILEPRO_OPC_ILL, CHILD(1229), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, BITFIELD(39, 2) /* index 1229 */, - TILE_OPC_ILL, CHILD(1234), TILE_OPC_ILL, TILE_OPC_ILL, + TILEPRO_OPC_ILL, CHILD(1234), TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, BITFIELD(41, 2) /* index 1234 */, - TILE_OPC_ILL, TILE_OPC_ILL, TILE_OPC_RAISE, TILE_OPC_ILL, + TILEPRO_OPC_ILL, TILEPRO_OPC_ILL, TILEPRO_OPC_RAISE, TILEPRO_OPC_ILL, BITFIELD(53, 1) /* index 1239 */, - TILE_OPC_ILL, TILE_OPC_NONE, + TILEPRO_OPC_ILL, TILEPRO_OPC_NONE, BITFIELD(43, 3) /* index 1242 */, CHILD(1251), CHILD(1254), CHILD(1257), CHILD(1275), CHILD(1278), CHILD(1281), CHILD(1284), CHILD(1287), BITFIELD(53, 1) /* index 1251 */, - TILE_OPC_INV, TILE_OPC_NONE, + TILEPRO_OPC_INV, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1254 */, - TILE_OPC_IRET, TILE_OPC_NONE, + TILEPRO_OPC_IRET, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1257 */, - CHILD(1260), TILE_OPC_NONE, + CHILD(1260), TILEPRO_OPC_NONE, BITFIELD(31, 2) /* index 1260 */, - TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, CHILD(1265), + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(1265), BITFIELD(33, 2) /* index 1265 */, - TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, CHILD(1270), + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(1270), BITFIELD(35, 2) /* index 1270 */, - TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_PREFETCH, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_PREFETCH, BITFIELD(53, 1) /* index 1275 */, - TILE_OPC_LB_U, TILE_OPC_NONE, + TILEPRO_OPC_LB_U, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1278 */, - TILE_OPC_LH, TILE_OPC_NONE, + TILEPRO_OPC_LH, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1281 */, - TILE_OPC_LH_U, TILE_OPC_NONE, + TILEPRO_OPC_LH_U, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1284 */, - TILE_OPC_LW, TILE_OPC_NONE, + TILEPRO_OPC_LW, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1287 */, - TILE_OPC_MF, TILE_OPC_NONE, + TILEPRO_OPC_MF, TILEPRO_OPC_NONE, BITFIELD(43, 3) /* index 1290 */, CHILD(1299), CHILD(1302), CHILD(1305), CHILD(1308), CHILD(1311), CHILD(1314), CHILD(1317), CHILD(1320), BITFIELD(53, 1) /* index 1299 */, - TILE_OPC_NAP, TILE_OPC_NONE, + TILEPRO_OPC_NAP, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1302 */, - TILE_OPC_NOP, TILE_OPC_NONE, + TILEPRO_OPC_NOP, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1305 */, - TILE_OPC_SWINT0, TILE_OPC_NONE, + TILEPRO_OPC_SWINT0, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1308 */, - TILE_OPC_SWINT1, TILE_OPC_NONE, + TILEPRO_OPC_SWINT1, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1311 */, - TILE_OPC_SWINT2, TILE_OPC_NONE, + TILEPRO_OPC_SWINT2, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1314 */, - TILE_OPC_SWINT3, TILE_OPC_NONE, + TILEPRO_OPC_SWINT3, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1317 */, - TILE_OPC_TNS, TILE_OPC_NONE, + TILEPRO_OPC_TNS, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1320 */, - TILE_OPC_WH64, TILE_OPC_NONE, + TILEPRO_OPC_WH64, TILEPRO_OPC_NONE, BITFIELD(43, 2) /* index 1323 */, - CHILD(1328), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + CHILD(1328), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(45, 1) /* index 1328 */, - CHILD(1331), TILE_OPC_NONE, + CHILD(1331), TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1331 */, - TILE_OPC_LW_NA, TILE_OPC_NONE, + TILEPRO_OPC_LW_NA, TILEPRO_OPC_NONE, BITFIELD(46, 7) /* index 1334 */, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, CHILD(1463), - CHILD(1463), CHILD(1463), CHILD(1463), CHILD(1466), CHILD(1466), - CHILD(1466), CHILD(1466), CHILD(1469), CHILD(1469), CHILD(1469), - CHILD(1469), CHILD(1472), CHILD(1472), CHILD(1472), CHILD(1472), - CHILD(1475), CHILD(1475), CHILD(1475), CHILD(1475), CHILD(1478), - CHILD(1478), CHILD(1478), CHILD(1478), CHILD(1481), CHILD(1481), - CHILD(1481), CHILD(1481), CHILD(1484), CHILD(1484), CHILD(1484), - CHILD(1484), CHILD(1487), CHILD(1487), CHILD(1487), CHILD(1487), - CHILD(1490), CHILD(1490), CHILD(1490), CHILD(1490), CHILD(1151), - CHILD(1493), CHILD(1517), CHILD(1529), TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + CHILD(1463), CHILD(1463), CHILD(1463), CHILD(1463), CHILD(1466), + CHILD(1466), CHILD(1466), CHILD(1466), CHILD(1469), CHILD(1469), + CHILD(1469), CHILD(1469), CHILD(1472), CHILD(1472), CHILD(1472), + CHILD(1472), CHILD(1475), CHILD(1475), CHILD(1475), CHILD(1475), + CHILD(1478), CHILD(1478), CHILD(1478), CHILD(1478), CHILD(1481), + CHILD(1481), CHILD(1481), CHILD(1481), CHILD(1484), CHILD(1484), + CHILD(1484), CHILD(1484), CHILD(1487), CHILD(1487), CHILD(1487), + CHILD(1487), CHILD(1490), CHILD(1490), CHILD(1490), CHILD(1490), + CHILD(1151), CHILD(1493), CHILD(1517), CHILD(1529), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1463 */, - TILE_OPC_RLI_SN, TILE_OPC_NONE, + TILEPRO_OPC_RLI_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1466 */, - TILE_OPC_SHLIB_SN, TILE_OPC_NONE, + TILEPRO_OPC_SHLIB_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1469 */, - TILE_OPC_SHLIH_SN, TILE_OPC_NONE, + TILEPRO_OPC_SHLIH_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1472 */, - TILE_OPC_SHLI_SN, TILE_OPC_NONE, + TILEPRO_OPC_SHLI_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1475 */, - TILE_OPC_SHRIB_SN, TILE_OPC_NONE, + TILEPRO_OPC_SHRIB_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1478 */, - TILE_OPC_SHRIH_SN, TILE_OPC_NONE, + TILEPRO_OPC_SHRIH_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1481 */, - TILE_OPC_SHRI_SN, TILE_OPC_NONE, + TILEPRO_OPC_SHRI_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1484 */, - TILE_OPC_SRAIB_SN, TILE_OPC_NONE, + TILEPRO_OPC_SRAIB_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1487 */, - TILE_OPC_SRAIH_SN, TILE_OPC_NONE, + TILEPRO_OPC_SRAIH_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1490 */, - TILE_OPC_SRAI_SN, TILE_OPC_NONE, + TILEPRO_OPC_SRAI_SN, TILEPRO_OPC_NONE, BITFIELD(43, 3) /* index 1493 */, CHILD(1251), CHILD(1254), CHILD(1502), CHILD(1505), CHILD(1508), CHILD(1511), CHILD(1514), CHILD(1287), BITFIELD(53, 1) /* index 1502 */, - TILE_OPC_LB_SN, TILE_OPC_NONE, + TILEPRO_OPC_LB_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1505 */, - TILE_OPC_LB_U_SN, TILE_OPC_NONE, + TILEPRO_OPC_LB_U_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1508 */, - TILE_OPC_LH_SN, TILE_OPC_NONE, + TILEPRO_OPC_LH_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1511 */, - TILE_OPC_LH_U_SN, TILE_OPC_NONE, + TILEPRO_OPC_LH_U_SN, TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1514 */, - TILE_OPC_LW_SN, TILE_OPC_NONE, + TILEPRO_OPC_LW_SN, TILEPRO_OPC_NONE, BITFIELD(43, 3) /* index 1517 */, CHILD(1299), CHILD(1302), CHILD(1305), CHILD(1308), CHILD(1311), CHILD(1314), CHILD(1526), CHILD(1320), BITFIELD(53, 1) /* index 1526 */, - TILE_OPC_TNS_SN, TILE_OPC_NONE, + TILEPRO_OPC_TNS_SN, TILEPRO_OPC_NONE, BITFIELD(43, 2) /* index 1529 */, - CHILD(1534), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + CHILD(1534), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(45, 1) /* index 1534 */, - CHILD(1537), TILE_OPC_NONE, + CHILD(1537), TILEPRO_OPC_NONE, BITFIELD(53, 1) /* index 1537 */, - TILE_OPC_LW_NA_SN, TILE_OPC_NONE, + TILEPRO_OPC_LW_NA_SN, TILEPRO_OPC_NONE, }; static const unsigned short decode_Y0_fsm[168] = { BITFIELD(27, 4) /* index 0 */, - TILE_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52), - CHILD(57), CHILD(62), CHILD(67), TILE_OPC_ADDI, CHILD(72), CHILD(102), - TILE_OPC_SEQI, CHILD(117), TILE_OPC_SLTI, TILE_OPC_SLTI_U, + TILEPRO_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52), + CHILD(57), CHILD(62), CHILD(67), TILEPRO_OPC_ADDI, CHILD(72), CHILD(102), + TILEPRO_OPC_SEQI, CHILD(117), TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, BITFIELD(18, 2) /* index 17 */, - TILE_OPC_ADD, TILE_OPC_S1A, TILE_OPC_S2A, TILE_OPC_SUB, + TILEPRO_OPC_ADD, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_SUB, BITFIELD(18, 2) /* index 22 */, - TILE_OPC_MNZ, TILE_OPC_MVNZ, TILE_OPC_MVZ, TILE_OPC_MZ, + TILEPRO_OPC_MNZ, TILEPRO_OPC_MVNZ, TILEPRO_OPC_MVZ, TILEPRO_OPC_MZ, BITFIELD(18, 2) /* index 27 */, - TILE_OPC_AND, TILE_OPC_NOR, CHILD(32), TILE_OPC_XOR, + TILEPRO_OPC_AND, TILEPRO_OPC_NOR, CHILD(32), TILEPRO_OPC_XOR, BITFIELD(12, 2) /* index 32 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(37), + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(37), BITFIELD(14, 2) /* index 37 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(42), + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(42), BITFIELD(16, 2) /* index 42 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_MOVE, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, BITFIELD(18, 2) /* index 47 */, - TILE_OPC_RL, TILE_OPC_SHL, TILE_OPC_SHR, TILE_OPC_SRA, + TILEPRO_OPC_RL, TILEPRO_OPC_SHL, TILEPRO_OPC_SHR, TILEPRO_OPC_SRA, BITFIELD(18, 2) /* index 52 */, - TILE_OPC_SLTE, TILE_OPC_SLTE_U, TILE_OPC_SLT, TILE_OPC_SLT_U, + TILEPRO_OPC_SLTE, TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLT, TILEPRO_OPC_SLT_U, BITFIELD(18, 2) /* index 57 */, - TILE_OPC_MULHLSA_UU, TILE_OPC_S3A, TILE_OPC_SEQ, TILE_OPC_SNE, + TILEPRO_OPC_MULHLSA_UU, TILEPRO_OPC_S3A, TILEPRO_OPC_SEQ, TILEPRO_OPC_SNE, BITFIELD(18, 2) /* index 62 */, - TILE_OPC_MULHH_SS, TILE_OPC_MULHH_UU, TILE_OPC_MULLL_SS, TILE_OPC_MULLL_UU, + TILEPRO_OPC_MULHH_SS, TILEPRO_OPC_MULHH_UU, TILEPRO_OPC_MULLL_SS, + TILEPRO_OPC_MULLL_UU, BITFIELD(18, 2) /* index 67 */, - TILE_OPC_MULHHA_SS, TILE_OPC_MULHHA_UU, TILE_OPC_MULLLA_SS, - TILE_OPC_MULLLA_UU, + TILEPRO_OPC_MULHHA_SS, TILEPRO_OPC_MULHHA_UU, TILEPRO_OPC_MULLLA_SS, + TILEPRO_OPC_MULLLA_UU, BITFIELD(0, 2) /* index 72 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(77), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(77), BITFIELD(2, 2) /* index 77 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(82), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(82), BITFIELD(4, 2) /* index 82 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(87), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(87), BITFIELD(6, 2) /* index 87 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(92), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(92), BITFIELD(8, 2) /* index 92 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(97), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(97), BITFIELD(10, 2) /* index 97 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_INFO, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, BITFIELD(6, 2) /* index 102 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(107), + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(107), BITFIELD(8, 2) /* index 107 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(112), + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(112), BITFIELD(10, 2) /* index 112 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_MOVEI, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, BITFIELD(15, 5) /* index 117 */, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_RLI, - TILE_OPC_RLI, TILE_OPC_RLI, TILE_OPC_RLI, TILE_OPC_SHLI, TILE_OPC_SHLI, - TILE_OPC_SHLI, TILE_OPC_SHLI, TILE_OPC_SHRI, TILE_OPC_SHRI, TILE_OPC_SHRI, - TILE_OPC_SHRI, TILE_OPC_SRAI, TILE_OPC_SRAI, TILE_OPC_SRAI, TILE_OPC_SRAI, - CHILD(150), CHILD(159), TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_RLI, TILEPRO_OPC_RLI, TILEPRO_OPC_RLI, TILEPRO_OPC_RLI, + TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHLI, + TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI, TILEPRO_OPC_SHRI, + TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI, TILEPRO_OPC_SRAI, + CHILD(150), CHILD(159), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(12, 3) /* index 150 */, - TILE_OPC_NONE, TILE_OPC_BITX, TILE_OPC_BYTEX, TILE_OPC_CLZ, TILE_OPC_CTZ, - TILE_OPC_FNOP, TILE_OPC_NOP, TILE_OPC_PCNT, + TILEPRO_OPC_NONE, TILEPRO_OPC_BITX, TILEPRO_OPC_BYTEX, TILEPRO_OPC_CLZ, + TILEPRO_OPC_CTZ, TILEPRO_OPC_FNOP, TILEPRO_OPC_NOP, TILEPRO_OPC_PCNT, BITFIELD(12, 3) /* index 159 */, - TILE_OPC_TBLIDXB0, TILE_OPC_TBLIDXB1, TILE_OPC_TBLIDXB2, TILE_OPC_TBLIDXB3, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_TBLIDXB0, TILEPRO_OPC_TBLIDXB1, TILEPRO_OPC_TBLIDXB2, + TILEPRO_OPC_TBLIDXB3, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, }; static const unsigned short decode_Y1_fsm[140] = { BITFIELD(59, 4) /* index 0 */, - TILE_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52), - CHILD(57), TILE_OPC_ADDI, CHILD(62), CHILD(92), TILE_OPC_SEQI, CHILD(107), - TILE_OPC_SLTI, TILE_OPC_SLTI_U, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_NONE, CHILD(17), CHILD(22), CHILD(27), CHILD(47), CHILD(52), + CHILD(57), TILEPRO_OPC_ADDI, CHILD(62), CHILD(92), TILEPRO_OPC_SEQI, + CHILD(107), TILEPRO_OPC_SLTI, TILEPRO_OPC_SLTI_U, TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, BITFIELD(49, 2) /* index 17 */, - TILE_OPC_ADD, TILE_OPC_S1A, TILE_OPC_S2A, TILE_OPC_SUB, + TILEPRO_OPC_ADD, TILEPRO_OPC_S1A, TILEPRO_OPC_S2A, TILEPRO_OPC_SUB, BITFIELD(49, 2) /* index 22 */, - TILE_OPC_NONE, TILE_OPC_MNZ, TILE_OPC_MZ, TILE_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_MNZ, TILEPRO_OPC_MZ, TILEPRO_OPC_NONE, BITFIELD(49, 2) /* index 27 */, - TILE_OPC_AND, TILE_OPC_NOR, CHILD(32), TILE_OPC_XOR, + TILEPRO_OPC_AND, TILEPRO_OPC_NOR, CHILD(32), TILEPRO_OPC_XOR, BITFIELD(43, 2) /* index 32 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(37), + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(37), BITFIELD(45, 2) /* index 37 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, CHILD(42), + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, CHILD(42), BITFIELD(47, 2) /* index 42 */, - TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_OR, TILE_OPC_MOVE, + TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_OR, TILEPRO_OPC_MOVE, BITFIELD(49, 2) /* index 47 */, - TILE_OPC_RL, TILE_OPC_SHL, TILE_OPC_SHR, TILE_OPC_SRA, + TILEPRO_OPC_RL, TILEPRO_OPC_SHL, TILEPRO_OPC_SHR, TILEPRO_OPC_SRA, BITFIELD(49, 2) /* index 52 */, - TILE_OPC_SLTE, TILE_OPC_SLTE_U, TILE_OPC_SLT, TILE_OPC_SLT_U, + TILEPRO_OPC_SLTE, TILEPRO_OPC_SLTE_U, TILEPRO_OPC_SLT, TILEPRO_OPC_SLT_U, BITFIELD(49, 2) /* index 57 */, - TILE_OPC_NONE, TILE_OPC_S3A, TILE_OPC_SEQ, TILE_OPC_SNE, + TILEPRO_OPC_NONE, TILEPRO_OPC_S3A, TILEPRO_OPC_SEQ, TILEPRO_OPC_SNE, BITFIELD(31, 2) /* index 62 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(67), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(67), BITFIELD(33, 2) /* index 67 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(72), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(72), BITFIELD(35, 2) /* index 72 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(77), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(77), BITFIELD(37, 2) /* index 77 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(82), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(82), BITFIELD(39, 2) /* index 82 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, CHILD(87), + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, CHILD(87), BITFIELD(41, 2) /* index 87 */, - TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_ANDI, TILE_OPC_INFO, + TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_ANDI, TILEPRO_OPC_INFO, BITFIELD(37, 2) /* index 92 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(97), + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(97), BITFIELD(39, 2) /* index 97 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, CHILD(102), + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, CHILD(102), BITFIELD(41, 2) /* index 102 */, - TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_ORI, TILE_OPC_MOVEI, + TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_ORI, TILEPRO_OPC_MOVEI, BITFIELD(48, 3) /* index 107 */, - TILE_OPC_NONE, TILE_OPC_RLI, TILE_OPC_SHLI, TILE_OPC_SHRI, TILE_OPC_SRAI, - CHILD(116), TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_RLI, TILEPRO_OPC_SHLI, TILEPRO_OPC_SHRI, + TILEPRO_OPC_SRAI, CHILD(116), TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(43, 3) /* index 116 */, - TILE_OPC_NONE, CHILD(125), CHILD(130), CHILD(135), TILE_OPC_NONE, - TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_NONE, CHILD(125), CHILD(130), CHILD(135), TILEPRO_OPC_NONE, + TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(46, 2) /* index 125 */, - TILE_OPC_FNOP, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_FNOP, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(46, 2) /* index 130 */, - TILE_OPC_ILL, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_ILL, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, BITFIELD(46, 2) /* index 135 */, - TILE_OPC_NOP, TILE_OPC_NONE, TILE_OPC_NONE, TILE_OPC_NONE, + TILEPRO_OPC_NOP, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, TILEPRO_OPC_NONE, }; static const unsigned short decode_Y2_fsm[24] = { BITFIELD(56, 3) /* index 0 */, - CHILD(9), TILE_OPC_LB_U, TILE_OPC_LH, TILE_OPC_LH_U, TILE_OPC_LW, - TILE_OPC_SB, TILE_OPC_SH, TILE_OPC_SW, + CHILD(9), TILEPRO_OPC_LB_U, TILEPRO_OPC_LH, TILEPRO_OPC_LH_U, + TILEPRO_OPC_LW, TILEPRO_OPC_SB, TILEPRO_OPC_SH, TILEPRO_OPC_SW, BITFIELD(20, 2) /* index 9 */, - TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, CHILD(14), + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(14), BITFIELD(22, 2) /* index 14 */, - TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, CHILD(19), + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, CHILD(19), BITFIELD(24, 2) /* index 19 */, - TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_LB, TILE_OPC_PREFETCH, + TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_LB, TILEPRO_OPC_PREFETCH, }; #undef BITFIELD #undef CHILD const unsigned short * const -tile_bundle_decoder_fsms[TILE_NUM_PIPELINE_ENCODINGS] = +tilepro_bundle_decoder_fsms[TILEPRO_NUM_PIPELINE_ENCODINGS] = { decode_X0_fsm, decode_X1_fsm, @@ -2191,220 +2297,220 @@ tile_bundle_decoder_fsms[TILE_NUM_PIPELINE_ENCODINGS] = decode_Y1_fsm, decode_Y2_fsm }; -const struct tile_operand tile_operands[43] = +const struct tilepro_operand tilepro_operands[43] = { { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_IMM8_X0), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_X0), 8, 1, 0, 0, 0, 0, create_Imm8_X0, get_Imm8_X0 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_IMM8_X1), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_X1), 8, 1, 0, 0, 0, 0, create_Imm8_X1, get_Imm8_X1 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_IMM8_Y0), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_Y0), 8, 1, 0, 0, 0, 0, create_Imm8_Y0, get_Imm8_Y0 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_IMM8_Y1), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM8_Y1), 8, 1, 0, 0, 0, 0, create_Imm8_Y1, get_Imm8_Y1 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_IMM16_X0), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM16_X0), 16, 1, 0, 0, 0, 0, create_Imm16_X0, get_Imm16_X0 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_IMM16_X1), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_IMM16_X1), 16, 1, 0, 0, 0, 0, create_Imm16_X1, get_Imm16_X1 }, { - TILE_OP_TYPE_ADDRESS, BFD_RELOC(TILE_JOFFLONG_X1), - 29, 1, 0, 0, 1, TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(TILEPRO_JOFFLONG_X1), + 29, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, create_JOffLong_X1, get_JOffLong_X1 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 0, 1, 0, 0, create_Dest_X0, get_Dest_X0 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 0, 0, 0, create_SrcA_X0, get_SrcA_X0 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 0, 1, 0, 0, create_Dest_X1, get_Dest_X1 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 0, 0, 0, create_SrcA_X1, get_SrcA_X1 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 0, 1, 0, 0, create_Dest_Y0, get_Dest_Y0 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 0, 0, 0, create_SrcA_Y0, get_SrcA_Y0 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 0, 1, 0, 0, create_Dest_Y1, get_Dest_Y1 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 0, 0, 0, create_SrcA_Y1, get_SrcA_Y1 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 0, 0, 0, create_SrcA_Y2, get_SrcA_Y2 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 0, 0, 0, create_SrcB_X0, get_SrcB_X0 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 0, 0, 0, create_SrcB_X1, get_SrcB_X1 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 0, 0, 0, create_SrcB_Y0, get_SrcB_Y0 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 0, 0, 0, create_SrcB_Y1, get_SrcB_Y1 }, { - TILE_OP_TYPE_ADDRESS, BFD_RELOC(TILE_BROFF_X1), - 17, 1, 0, 0, 1, TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(TILEPRO_BROFF_X1), + 17, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, create_BrOff_X1, get_BrOff_X1 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 1, 0, 0, create_Dest_X0, get_Dest_X0 }, { - TILE_OP_TYPE_ADDRESS, BFD_RELOC(NONE), - 28, 1, 0, 0, 1, TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(NONE), + 28, 1, 0, 0, 1, TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, create_JOff_X1, get_JOff_X1 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 0, 1, 0, 0, create_SrcBDest_Y2, get_SrcBDest_Y2 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 1, 0, 0, create_SrcA_X1, get_SrcA_X1 }, { - TILE_OP_TYPE_SPR, BFD_RELOC(TILE_MF_IMM15_X1), + TILEPRO_OP_TYPE_SPR, BFD_RELOC(TILEPRO_MF_IMM15_X1), 15, 0, 0, 0, 0, 0, create_MF_Imm15_X1, get_MF_Imm15_X1 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_MMSTART_X0), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMSTART_X0), 5, 0, 0, 0, 0, 0, create_MMStart_X0, get_MMStart_X0 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_MMEND_X0), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMEND_X0), 5, 0, 0, 0, 0, 0, create_MMEnd_X0, get_MMEnd_X0 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_MMSTART_X1), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMSTART_X1), 5, 0, 0, 0, 0, 0, create_MMStart_X1, get_MMStart_X1 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_MMEND_X1), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_MMEND_X1), 5, 0, 0, 0, 0, 0, create_MMEnd_X1, get_MMEnd_X1 }, { - TILE_OP_TYPE_SPR, BFD_RELOC(TILE_MT_IMM15_X1), + TILEPRO_OP_TYPE_SPR, BFD_RELOC(TILEPRO_MT_IMM15_X1), 15, 0, 0, 0, 0, 0, create_MT_Imm15_X1, get_MT_Imm15_X1 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 1, 0, 0, create_Dest_Y0, get_Dest_Y0 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_SHAMT_X0), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_X0), 5, 0, 0, 0, 0, 0, create_ShAmt_X0, get_ShAmt_X0 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_SHAMT_X1), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_X1), 5, 0, 0, 0, 0, 0, create_ShAmt_X1, get_ShAmt_X1 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_SHAMT_Y0), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_Y0), 5, 0, 0, 0, 0, 0, create_ShAmt_Y0, get_ShAmt_Y0 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_SHAMT_Y1), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_SHAMT_Y1), 5, 0, 0, 0, 0, 0, create_ShAmt_Y1, get_ShAmt_Y1 }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 6, 0, 1, 0, 0, 0, create_SrcBDest_Y2, get_SrcBDest_Y2 }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEPRO_DEST_IMM8_X1), 8, 1, 0, 0, 0, 0, create_Dest_Imm8_X1, get_Dest_Imm8_X1 }, { - TILE_OP_TYPE_ADDRESS, BFD_RELOC(TILE_SN_BROFF), - 10, 1, 0, 0, 1, TILE_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES, + TILEPRO_OP_TYPE_ADDRESS, BFD_RELOC(NONE), + 10, 1, 0, 0, 1, TILEPRO_LOG2_SN_INSTRUCTION_SIZE_IN_BYTES, create_BrOff_SN, get_BrOff_SN }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_SN_UIMM8), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(NONE), 8, 0, 0, 0, 0, 0, create_Imm8_SN, get_Imm8_SN }, { - TILE_OP_TYPE_IMMEDIATE, BFD_RELOC(TILE_SN_IMM8), + TILEPRO_OP_TYPE_IMMEDIATE, BFD_RELOC(NONE), 8, 1, 0, 0, 0, 0, create_Imm8_SN, get_Imm8_SN }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 2, 0, 0, 1, 0, 0, create_Dest_SN, get_Dest_SN }, { - TILE_OP_TYPE_REGISTER, BFD_RELOC(NONE), + TILEPRO_OP_TYPE_REGISTER, BFD_RELOC(NONE), 2, 0, 1, 0, 0, 0, create_Src_SN, get_Src_SN } @@ -2413,12 +2519,13 @@ const struct tile_operand tile_operands[43] = -/* Given a set of bundle bits and the lookup FSM for a specific pipe, - * returns which instruction the bundle contains in that pipe. +/* Given a set of bundle bits and a specific pipe, returns which + * instruction the bundle contains in that pipe. */ -static const struct tile_opcode * -find_opcode(tile_bundle_bits bits, const unsigned short *table) +const struct tilepro_opcode * +find_opcode(tilepro_bundle_bits bits, tilepro_pipeline pipe) { + const unsigned short *table = tilepro_bundle_decoder_fsms[pipe]; int index = 0; while (1) @@ -2428,51 +2535,51 @@ find_opcode(tile_bundle_bits bits, const unsigned short *table) ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6); unsigned short next = table[index + 1 + bitfield]; - if (next <= TILE_OPC_NONE) - return &tile_opcodes[next]; + if (next <= TILEPRO_OPC_NONE) + return &tilepro_opcodes[next]; - index = next - TILE_OPC_NONE; + index = next - TILEPRO_OPC_NONE; } } int -parse_insn_tile(tile_bundle_bits bits, - unsigned int pc, - struct tile_decoded_instruction - decoded[TILE_MAX_INSTRUCTIONS_PER_BUNDLE]) +parse_insn_tilepro(tilepro_bundle_bits bits, + unsigned int pc, + struct tilepro_decoded_instruction + decoded[TILEPRO_MAX_INSTRUCTIONS_PER_BUNDLE]) { int num_instructions = 0; int pipe; int min_pipe, max_pipe; - if ((bits & TILE_BUNDLE_Y_ENCODING_MASK) == 0) + if ((bits & TILEPRO_BUNDLE_Y_ENCODING_MASK) == 0) { - min_pipe = TILE_PIPELINE_X0; - max_pipe = TILE_PIPELINE_X1; + min_pipe = TILEPRO_PIPELINE_X0; + max_pipe = TILEPRO_PIPELINE_X1; } else { - min_pipe = TILE_PIPELINE_Y0; - max_pipe = TILE_PIPELINE_Y2; + min_pipe = TILEPRO_PIPELINE_Y0; + max_pipe = TILEPRO_PIPELINE_Y2; } /* For each pipe, find an instruction that fits. */ for (pipe = min_pipe; pipe <= max_pipe; pipe++) { - const struct tile_opcode *opc; - struct tile_decoded_instruction *d; + const struct tilepro_opcode *opc; + struct tilepro_decoded_instruction *d; int i; d = &decoded[num_instructions++]; - opc = find_opcode (bits, tile_bundle_decoder_fsms[pipe]); + opc = find_opcode (bits, (tilepro_pipeline)pipe); d->opcode = opc; /* Decode each operand, sign extending, etc. as appropriate. */ for (i = 0; i < opc->num_operands; i++) { - const struct tile_operand *op = - &tile_operands[opc->operands[pipe][i]]; + const struct tilepro_operand *op = + &tilepro_operands[opc->operands[pipe][i]]; int opval = op->extract (bits); if (op->is_signed) { @@ -2482,9 +2589,9 @@ parse_insn_tile(tile_bundle_bits bits, } /* Adjust PC-relative scaled branch offsets. */ - if (op->type == TILE_OP_TYPE_ADDRESS) + if (op->type == TILEPRO_OP_TYPE_ADDRESS) { - opval *= TILE_BUNDLE_SIZE_IN_BYTES; + opval *= TILEPRO_BUNDLE_SIZE_IN_BYTES; opval += (int)pc; } diff --git a/arch/tile/kernel/tile-desc_64.c b/arch/tile/kernel/tile-desc_64.c new file mode 100644 index 00000000000..65b5f8aca70 --- /dev/null +++ b/arch/tile/kernel/tile-desc_64.c @@ -0,0 +1,2218 @@ +/* TILE-Gx opcode information. + * + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * + * + * + * + */ + +/* This define is BFD_RELOC_##x for real bfd, or -1 for everyone else. */ +#define BFD_RELOC(x) -1 + +/* Special registers. */ +#define TREG_LR 55 +#define TREG_SN 56 +#define TREG_ZERO 63 + +#include <linux/stddef.h> +#include <asm/tile-desc.h> + +const struct tilegx_opcode tilegx_opcodes[334] = +{ + { "bpt", TILEGX_OPC_BPT, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "info", TILEGX_OPC_INFO, 0xf, 1, TREG_ZERO, 1, + { { 0 }, { 1 }, { 2 }, { 3 }, { 0, } }, + }, + { "infol", TILEGX_OPC_INFOL, 0x3, 1, TREG_ZERO, 1, + { { 4 }, { 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "move", TILEGX_OPC_MOVE, 0xf, 2, TREG_ZERO, 1, + { { 6, 7 }, { 8, 9 }, { 10, 11 }, { 12, 13 }, { 0, } }, + }, + { "movei", TILEGX_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1, + { { 6, 0 }, { 8, 1 }, { 10, 2 }, { 12, 3 }, { 0, } }, + }, + { "moveli", TILEGX_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1, + { { 6, 4 }, { 8, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch", TILEGX_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_add_l1", TILEGX_OPC_PREFETCH_ADD_L1, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l1_fault", TILEGX_OPC_PREFETCH_ADD_L1_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l2", TILEGX_OPC_PREFETCH_ADD_L2, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l2_fault", TILEGX_OPC_PREFETCH_ADD_L2_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l3", TILEGX_OPC_PREFETCH_ADD_L3, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_add_l3_fault", TILEGX_OPC_PREFETCH_ADD_L3_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "prefetch_l1", TILEGX_OPC_PREFETCH_L1, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l1_fault", TILEGX_OPC_PREFETCH_L1_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l2", TILEGX_OPC_PREFETCH_L2, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l2_fault", TILEGX_OPC_PREFETCH_L2_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l3", TILEGX_OPC_PREFETCH_L3, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "prefetch_l3_fault", TILEGX_OPC_PREFETCH_L3_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 14 } }, + }, + { "raise", TILEGX_OPC_RAISE, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "add", TILEGX_OPC_ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "addi", TILEGX_OPC_ADDI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "addli", TILEGX_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "addx", TILEGX_OPC_ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "addxi", TILEGX_OPC_ADDXI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "addxli", TILEGX_OPC_ADDXLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "addxsc", TILEGX_OPC_ADDXSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "and", TILEGX_OPC_AND, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "andi", TILEGX_OPC_ANDI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "beqz", TILEGX_OPC_BEQZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "beqzt", TILEGX_OPC_BEQZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bfexts", TILEGX_OPC_BFEXTS, 0x1, 4, TREG_ZERO, 1, + { { 6, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bfextu", TILEGX_OPC_BFEXTU, 0x1, 4, TREG_ZERO, 1, + { { 6, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bfins", TILEGX_OPC_BFINS, 0x1, 4, TREG_ZERO, 1, + { { 23, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "bgez", TILEGX_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgezt", TILEGX_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgtz", TILEGX_OPC_BGTZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bgtzt", TILEGX_OPC_BGTZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbc", TILEGX_OPC_BLBC, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbct", TILEGX_OPC_BLBCT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbs", TILEGX_OPC_BLBS, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blbst", TILEGX_OPC_BLBST, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blez", TILEGX_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "blezt", TILEGX_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bltz", TILEGX_OPC_BLTZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bltzt", TILEGX_OPC_BLTZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnez", TILEGX_OPC_BNEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "bnezt", TILEGX_OPC_BNEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 20 }, { 0, }, { 0, }, { 0, } }, + }, + { "clz", TILEGX_OPC_CLZ, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "cmoveqz", TILEGX_OPC_CMOVEQZ, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "cmovnez", TILEGX_OPC_CMOVNEZ, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "cmpeq", TILEGX_OPC_CMPEQ, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpeqi", TILEGX_OPC_CMPEQI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "cmpexch", TILEGX_OPC_CMPEXCH, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "cmpexch4", TILEGX_OPC_CMPEXCH4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "cmples", TILEGX_OPC_CMPLES, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpleu", TILEGX_OPC_CMPLEU, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmplts", TILEGX_OPC_CMPLTS, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpltsi", TILEGX_OPC_CMPLTSI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, + }, + { "cmpltu", TILEGX_OPC_CMPLTU, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmpltui", TILEGX_OPC_CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "cmpne", TILEGX_OPC_CMPNE, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "cmul", TILEGX_OPC_CMUL, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmula", TILEGX_OPC_CMULA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulaf", TILEGX_OPC_CMULAF, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulf", TILEGX_OPC_CMULF, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulfr", TILEGX_OPC_CMULFR, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulh", TILEGX_OPC_CMULH, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "cmulhr", TILEGX_OPC_CMULHR, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_32", TILEGX_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "crc32_8", TILEGX_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "ctz", TILEGX_OPC_CTZ, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "dblalign", TILEGX_OPC_DBLALIGN, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "dblalign2", TILEGX_OPC_DBLALIGN2, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "dblalign4", TILEGX_OPC_DBLALIGN4, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "dblalign6", TILEGX_OPC_DBLALIGN6, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "drain", TILEGX_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "dtlbpr", TILEGX_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "exch", TILEGX_OPC_EXCH, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "exch4", TILEGX_OPC_EXCH4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_add_flags", TILEGX_OPC_FDOUBLE_ADD_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_addsub", TILEGX_OPC_FDOUBLE_ADDSUB, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_mul_flags", TILEGX_OPC_FDOUBLE_MUL_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_pack1", TILEGX_OPC_FDOUBLE_PACK1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_pack2", TILEGX_OPC_FDOUBLE_PACK2, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_sub_flags", TILEGX_OPC_FDOUBLE_SUB_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_unpack_max", TILEGX_OPC_FDOUBLE_UNPACK_MAX, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fdouble_unpack_min", TILEGX_OPC_FDOUBLE_UNPACK_MIN, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchadd", TILEGX_OPC_FETCHADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchadd4", TILEGX_OPC_FETCHADD4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchaddgez", TILEGX_OPC_FETCHADDGEZ, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchaddgez4", TILEGX_OPC_FETCHADDGEZ4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchand", TILEGX_OPC_FETCHAND, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchand4", TILEGX_OPC_FETCHAND4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchor", TILEGX_OPC_FETCHOR, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "fetchor4", TILEGX_OPC_FETCHOR4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "finv", TILEGX_OPC_FINV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "flush", TILEGX_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "flushwb", TILEGX_OPC_FLUSHWB, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "fnop", TILEGX_OPC_FNOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, + }, + { "fsingle_add1", TILEGX_OPC_FSINGLE_ADD1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_addsub2", TILEGX_OPC_FSINGLE_ADDSUB2, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_mul1", TILEGX_OPC_FSINGLE_MUL1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_mul2", TILEGX_OPC_FSINGLE_MUL2, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_pack1", TILEGX_OPC_FSINGLE_PACK1, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "fsingle_pack2", TILEGX_OPC_FSINGLE_PACK2, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "fsingle_sub1", TILEGX_OPC_FSINGLE_SUB1, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "icoh", TILEGX_OPC_ICOH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ill", TILEGX_OPC_ILL, 0xa, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { }, { 0, } }, + }, + { "inv", TILEGX_OPC_INV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "iret", TILEGX_OPC_IRET, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "j", TILEGX_OPC_J, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } }, + }, + { "jal", TILEGX_OPC_JAL, 0x2, 1, TREG_LR, 1, + { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } }, + }, + { "jalr", TILEGX_OPC_JALR, 0xa, 1, TREG_LR, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "jalrp", TILEGX_OPC_JALRP, 0xa, 1, TREG_LR, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "jr", TILEGX_OPC_JR, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "jrp", TILEGX_OPC_JRP, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 13 }, { 0, } }, + }, + { "ld", TILEGX_OPC_LD, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld1s", TILEGX_OPC_LD1S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld1s_add", TILEGX_OPC_LD1S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld1u", TILEGX_OPC_LD1U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld1u_add", TILEGX_OPC_LD1U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld2s", TILEGX_OPC_LD2S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld2s_add", TILEGX_OPC_LD2S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld2u", TILEGX_OPC_LD2U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld2u_add", TILEGX_OPC_LD2U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld4s", TILEGX_OPC_LD4S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld4s_add", TILEGX_OPC_LD4S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld4u", TILEGX_OPC_LD4U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 26, 14 } }, + }, + { "ld4u_add", TILEGX_OPC_LD4U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ld_add", TILEGX_OPC_LD_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldna", TILEGX_OPC_LDNA, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldna_add", TILEGX_OPC_LDNA_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt", TILEGX_OPC_LDNT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1s", TILEGX_OPC_LDNT1S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1s_add", TILEGX_OPC_LDNT1S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1u", TILEGX_OPC_LDNT1U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt1u_add", TILEGX_OPC_LDNT1U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2s", TILEGX_OPC_LDNT2S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2s_add", TILEGX_OPC_LDNT2S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2u", TILEGX_OPC_LDNT2U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt2u_add", TILEGX_OPC_LDNT2U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4s", TILEGX_OPC_LDNT4S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4s_add", TILEGX_OPC_LDNT4S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4u", TILEGX_OPC_LDNT4U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt4u_add", TILEGX_OPC_LDNT4U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "ldnt_add", TILEGX_OPC_LDNT_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 8, 15, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "lnk", TILEGX_OPC_LNK, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 8 }, { 0, }, { 12 }, { 0, } }, + }, + { "mf", TILEGX_OPC_MF, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "mfspr", TILEGX_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 8, 27 }, { 0, }, { 0, }, { 0, } }, + }, + { "mm", TILEGX_OPC_MM, 0x1, 4, TREG_ZERO, 1, + { { 23, 7, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mnz", TILEGX_OPC_MNZ, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "mtspr", TILEGX_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 28, 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hs_hs", TILEGX_OPC_MUL_HS_HS, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mul_hs_hu", TILEGX_OPC_MUL_HS_HU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hs_ls", TILEGX_OPC_MUL_HS_LS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hs_lu", TILEGX_OPC_MUL_HS_LU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hu_hu", TILEGX_OPC_MUL_HU_HU, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mul_hu_ls", TILEGX_OPC_MUL_HU_LS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_hu_lu", TILEGX_OPC_MUL_HU_LU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_ls_ls", TILEGX_OPC_MUL_LS_LS, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mul_ls_lu", TILEGX_OPC_MUL_LS_LU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mul_lu_lu", TILEGX_OPC_MUL_LU_LU, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_hs_hs", TILEGX_OPC_MULA_HS_HS, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_hs_hu", TILEGX_OPC_MULA_HS_HU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hs_ls", TILEGX_OPC_MULA_HS_LS, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hs_lu", TILEGX_OPC_MULA_HS_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hu_hu", TILEGX_OPC_MULA_HU_HU, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_hu_ls", TILEGX_OPC_MULA_HU_LS, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_hu_lu", TILEGX_OPC_MULA_HU_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_ls_ls", TILEGX_OPC_MULA_LS_LS, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mula_ls_lu", TILEGX_OPC_MULA_LS_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "mula_lu_lu", TILEGX_OPC_MULA_LU_LU, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mulax", TILEGX_OPC_MULAX, 0x5, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, + }, + { "mulx", TILEGX_OPC_MULX, 0x5, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, + }, + { "mz", TILEGX_OPC_MZ, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "nap", TILEGX_OPC_NAP, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "nop", TILEGX_OPC_NOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, + }, + { "nor", TILEGX_OPC_NOR, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "or", TILEGX_OPC_OR, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "ori", TILEGX_OPC_ORI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "pcnt", TILEGX_OPC_PCNT, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "revbits", TILEGX_OPC_REVBITS, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "revbytes", TILEGX_OPC_REVBYTES, 0x5, 2, TREG_ZERO, 1, + { { 6, 7 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, + }, + { "rotl", TILEGX_OPC_ROTL, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "rotli", TILEGX_OPC_ROTLI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shl", TILEGX_OPC_SHL, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl16insli", TILEGX_OPC_SHL16INSLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 4 }, { 8, 9, 5 }, { 0, }, { 0, }, { 0, } }, + }, + { "shl1add", TILEGX_OPC_SHL1ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl1addx", TILEGX_OPC_SHL1ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl2add", TILEGX_OPC_SHL2ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl2addx", TILEGX_OPC_SHL2ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl3add", TILEGX_OPC_SHL3ADD, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shl3addx", TILEGX_OPC_SHL3ADDX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shli", TILEGX_OPC_SHLI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shlx", TILEGX_OPC_SHLX, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shlxi", TILEGX_OPC_SHLXI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "shrs", TILEGX_OPC_SHRS, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shrsi", TILEGX_OPC_SHRSI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shru", TILEGX_OPC_SHRU, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "shrui", TILEGX_OPC_SHRUI, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, + }, + { "shrux", TILEGX_OPC_SHRUX, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "shruxi", TILEGX_OPC_SHRUXI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "shufflebytes", TILEGX_OPC_SHUFFLEBYTES, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "st", TILEGX_OPC_ST, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st1", TILEGX_OPC_ST1, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st1_add", TILEGX_OPC_ST1_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "st2", TILEGX_OPC_ST2, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st2_add", TILEGX_OPC_ST2_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "st4", TILEGX_OPC_ST4, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 14, 33 } }, + }, + { "st4_add", TILEGX_OPC_ST4_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "st_add", TILEGX_OPC_ST_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt", TILEGX_OPC_STNT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt1", TILEGX_OPC_STNT1, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt1_add", TILEGX_OPC_STNT1_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt2", TILEGX_OPC_STNT2, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt2_add", TILEGX_OPC_STNT2_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt4", TILEGX_OPC_STNT4, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt4_add", TILEGX_OPC_STNT4_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "stnt_add", TILEGX_OPC_STNT_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, + }, + { "sub", TILEGX_OPC_SUB, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "subx", TILEGX_OPC_SUBX, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "subxsc", TILEGX_OPC_SUBXSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "swint0", TILEGX_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint1", TILEGX_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint2", TILEGX_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "swint3", TILEGX_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, + }, + { "tblidxb0", TILEGX_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "tblidxb1", TILEGX_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "tblidxb2", TILEGX_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "tblidxb3", TILEGX_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1, + { { 23, 7 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, + }, + { "v1add", TILEGX_OPC_V1ADD, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1addi", TILEGX_OPC_V1ADDI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1adduc", TILEGX_OPC_V1ADDUC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1adiffu", TILEGX_OPC_V1ADIFFU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1avgu", TILEGX_OPC_V1AVGU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpeq", TILEGX_OPC_V1CMPEQ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpeqi", TILEGX_OPC_V1CMPEQI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmples", TILEGX_OPC_V1CMPLES, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpleu", TILEGX_OPC_V1CMPLEU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmplts", TILEGX_OPC_V1CMPLTS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpltsi", TILEGX_OPC_V1CMPLTSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpltu", TILEGX_OPC_V1CMPLTU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpltui", TILEGX_OPC_V1CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1cmpne", TILEGX_OPC_V1CMPNE, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpu", TILEGX_OPC_V1DDOTPU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpua", TILEGX_OPC_V1DDOTPUA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpus", TILEGX_OPC_V1DDOTPUS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1ddotpusa", TILEGX_OPC_V1DDOTPUSA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotp", TILEGX_OPC_V1DOTP, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpa", TILEGX_OPC_V1DOTPA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpu", TILEGX_OPC_V1DOTPU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpua", TILEGX_OPC_V1DOTPUA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpus", TILEGX_OPC_V1DOTPUS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1dotpusa", TILEGX_OPC_V1DOTPUSA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1int_h", TILEGX_OPC_V1INT_H, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1int_l", TILEGX_OPC_V1INT_L, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1maxu", TILEGX_OPC_V1MAXU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1maxui", TILEGX_OPC_V1MAXUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1minu", TILEGX_OPC_V1MINU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1minui", TILEGX_OPC_V1MINUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mnz", TILEGX_OPC_V1MNZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1multu", TILEGX_OPC_V1MULTU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mulu", TILEGX_OPC_V1MULU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mulus", TILEGX_OPC_V1MULUS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1mz", TILEGX_OPC_V1MZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1sadau", TILEGX_OPC_V1SADAU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1sadu", TILEGX_OPC_V1SADU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shl", TILEGX_OPC_V1SHL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shli", TILEGX_OPC_V1SHLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shrs", TILEGX_OPC_V1SHRS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shrsi", TILEGX_OPC_V1SHRSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shru", TILEGX_OPC_V1SHRU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1shrui", TILEGX_OPC_V1SHRUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1sub", TILEGX_OPC_V1SUB, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v1subuc", TILEGX_OPC_V1SUBUC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2add", TILEGX_OPC_V2ADD, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2addi", TILEGX_OPC_V2ADDI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2addsc", TILEGX_OPC_V2ADDSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2adiffs", TILEGX_OPC_V2ADIFFS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2avgs", TILEGX_OPC_V2AVGS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpeq", TILEGX_OPC_V2CMPEQ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpeqi", TILEGX_OPC_V2CMPEQI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmples", TILEGX_OPC_V2CMPLES, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpleu", TILEGX_OPC_V2CMPLEU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmplts", TILEGX_OPC_V2CMPLTS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpltsi", TILEGX_OPC_V2CMPLTSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpltu", TILEGX_OPC_V2CMPLTU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpltui", TILEGX_OPC_V2CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2cmpne", TILEGX_OPC_V2CMPNE, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2dotp", TILEGX_OPC_V2DOTP, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2dotpa", TILEGX_OPC_V2DOTPA, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2int_h", TILEGX_OPC_V2INT_H, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2int_l", TILEGX_OPC_V2INT_L, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2maxs", TILEGX_OPC_V2MAXS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2maxsi", TILEGX_OPC_V2MAXSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mins", TILEGX_OPC_V2MINS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2minsi", TILEGX_OPC_V2MINSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mnz", TILEGX_OPC_V2MNZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mulfsc", TILEGX_OPC_V2MULFSC, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2muls", TILEGX_OPC_V2MULS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mults", TILEGX_OPC_V2MULTS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2mz", TILEGX_OPC_V2MZ, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2packh", TILEGX_OPC_V2PACKH, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2packl", TILEGX_OPC_V2PACKL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2packuc", TILEGX_OPC_V2PACKUC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sadas", TILEGX_OPC_V2SADAS, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sadau", TILEGX_OPC_V2SADAU, 0x1, 3, TREG_ZERO, 1, + { { 23, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sads", TILEGX_OPC_V2SADS, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sadu", TILEGX_OPC_V2SADU, 0x1, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shl", TILEGX_OPC_V2SHL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shli", TILEGX_OPC_V2SHLI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shlsc", TILEGX_OPC_V2SHLSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shrs", TILEGX_OPC_V2SHRS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shrsi", TILEGX_OPC_V2SHRSI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shru", TILEGX_OPC_V2SHRU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2shrui", TILEGX_OPC_V2SHRUI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 29 }, { 8, 9, 30 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2sub", TILEGX_OPC_V2SUB, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v2subsc", TILEGX_OPC_V2SUBSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4add", TILEGX_OPC_V4ADD, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4addsc", TILEGX_OPC_V4ADDSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4int_h", TILEGX_OPC_V4INT_H, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4int_l", TILEGX_OPC_V4INT_L, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4packsc", TILEGX_OPC_V4PACKSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shl", TILEGX_OPC_V4SHL, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shlsc", TILEGX_OPC_V4SHLSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shrs", TILEGX_OPC_V4SHRS, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4shru", TILEGX_OPC_V4SHRU, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4sub", TILEGX_OPC_V4SUB, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "v4subsc", TILEGX_OPC_V4SUBSC, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 0, }, { 0, }, { 0, } }, + }, + { "wh64", TILEGX_OPC_WH64, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 9 }, { 0, }, { 0, }, { 0, } }, + }, + { "xor", TILEGX_OPC_XOR, 0xf, 3, TREG_ZERO, 1, + { { 6, 7, 16 }, { 8, 9, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, + }, + { "xori", TILEGX_OPC_XORI, 0x3, 3, TREG_ZERO, 1, + { { 6, 7, 0 }, { 8, 9, 1 }, { 0, }, { 0, }, { 0, } }, + }, + { NULL, TILEGX_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } }, + } +}; +#define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6)) +#define CHILD(array_index) (TILEGX_OPC_NONE + (array_index)) + +static const unsigned short decode_X0_fsm[936] = +{ + BITFIELD(22, 9) /* index 0 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BFEXTS, + TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTU, + TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFINS, + TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_MM, + TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(528), CHILD(578), + CHILD(583), CHILD(588), CHILD(593), CHILD(598), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(603), CHILD(620), CHILD(637), CHILD(654), CHILD(671), + CHILD(703), CHILD(797), CHILD(814), CHILD(831), CHILD(848), CHILD(865), + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(889), TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + BITFIELD(6, 2) /* index 513 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518), + BITFIELD(8, 2) /* index 518 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523), + BITFIELD(10, 2) /* index 523 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI, + BITFIELD(20, 2) /* index 528 */, + TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548), + BITFIELD(6, 2) /* index 533 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538), + BITFIELD(8, 2) /* index 538 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543), + BITFIELD(10, 2) /* index 543 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(0, 2) /* index 548 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553), + BITFIELD(2, 2) /* index 553 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558), + BITFIELD(4, 2) /* index 558 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563), + BITFIELD(6, 2) /* index 563 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568), + BITFIELD(8, 2) /* index 568 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573), + BITFIELD(10, 2) /* index 573 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(20, 2) /* index 578 */, + TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, TILEGX_OPC_ORI, + BITFIELD(20, 2) /* index 583 */, + TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI, TILEGX_OPC_V1CMPLTSI, + TILEGX_OPC_V1CMPLTUI, + BITFIELD(20, 2) /* index 588 */, + TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI, TILEGX_OPC_V2ADDI, + TILEGX_OPC_V2CMPEQI, + BITFIELD(20, 2) /* index 593 */, + TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI, TILEGX_OPC_V2MAXSI, + TILEGX_OPC_V2MINSI, + BITFIELD(20, 2) /* index 598 */, + TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 603 */, + TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD, + TILEGX_OPC_AND, TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_CMPEQ, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + TILEGX_OPC_CMPNE, TILEGX_OPC_CMULAF, TILEGX_OPC_CMULA, TILEGX_OPC_CMULFR, + BITFIELD(18, 4) /* index 620 */, + TILEGX_OPC_CMULF, TILEGX_OPC_CMULHR, TILEGX_OPC_CMULH, TILEGX_OPC_CMUL, + TILEGX_OPC_CRC32_32, TILEGX_OPC_CRC32_8, TILEGX_OPC_DBLALIGN2, + TILEGX_OPC_DBLALIGN4, TILEGX_OPC_DBLALIGN6, TILEGX_OPC_DBLALIGN, + TILEGX_OPC_FDOUBLE_ADDSUB, TILEGX_OPC_FDOUBLE_ADD_FLAGS, + TILEGX_OPC_FDOUBLE_MUL_FLAGS, TILEGX_OPC_FDOUBLE_PACK1, + TILEGX_OPC_FDOUBLE_PACK2, TILEGX_OPC_FDOUBLE_SUB_FLAGS, + BITFIELD(18, 4) /* index 637 */, + TILEGX_OPC_FDOUBLE_UNPACK_MAX, TILEGX_OPC_FDOUBLE_UNPACK_MIN, + TILEGX_OPC_FSINGLE_ADD1, TILEGX_OPC_FSINGLE_ADDSUB2, + TILEGX_OPC_FSINGLE_MUL1, TILEGX_OPC_FSINGLE_MUL2, TILEGX_OPC_FSINGLE_PACK2, + TILEGX_OPC_FSINGLE_SUB1, TILEGX_OPC_MNZ, TILEGX_OPC_MULAX, + TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HS_HU, TILEGX_OPC_MULA_HS_LS, + TILEGX_OPC_MULA_HS_LU, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_HU_LS, + BITFIELD(18, 4) /* index 654 */, + TILEGX_OPC_MULA_HU_LU, TILEGX_OPC_MULA_LS_LS, TILEGX_OPC_MULA_LS_LU, + TILEGX_OPC_MULA_LU_LU, TILEGX_OPC_MULX, TILEGX_OPC_MUL_HS_HS, + TILEGX_OPC_MUL_HS_HU, TILEGX_OPC_MUL_HS_LS, TILEGX_OPC_MUL_HS_LU, + TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_HU_LS, TILEGX_OPC_MUL_HU_LU, + TILEGX_OPC_MUL_LS_LS, TILEGX_OPC_MUL_LS_LU, TILEGX_OPC_MUL_LU_LU, + TILEGX_OPC_MZ, + BITFIELD(18, 4) /* index 671 */, + TILEGX_OPC_NOR, CHILD(688), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL, + TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_SHUFFLEBYTES, + TILEGX_OPC_SUBXSC, + BITFIELD(12, 2) /* index 688 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(693), + BITFIELD(14, 2) /* index 693 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(698), + BITFIELD(16, 2) /* index 698 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(18, 4) /* index 703 */, + TILEGX_OPC_SUBX, TILEGX_OPC_SUB, CHILD(720), TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADIFFU, TILEGX_OPC_V1AVGU, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1DDOTPUSA, TILEGX_OPC_V1DDOTPUS, TILEGX_OPC_V1DOTPA, + BITFIELD(12, 4) /* index 720 */, + TILEGX_OPC_NONE, CHILD(737), CHILD(742), CHILD(747), CHILD(752), CHILD(757), + CHILD(762), CHILD(767), CHILD(772), CHILD(777), CHILD(782), CHILD(787), + CHILD(792), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 737 */, + TILEGX_OPC_CLZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 742 */, + TILEGX_OPC_CTZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 747 */, + TILEGX_OPC_FNOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 752 */, + TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 757 */, + TILEGX_OPC_NOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 762 */, + TILEGX_OPC_PCNT, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 767 */, + TILEGX_OPC_REVBITS, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 772 */, + TILEGX_OPC_REVBYTES, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 777 */, + TILEGX_OPC_TBLIDXB0, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 782 */, + TILEGX_OPC_TBLIDXB1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 787 */, + TILEGX_OPC_TBLIDXB2, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 792 */, + TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 797 */, + TILEGX_OPC_V1DOTPUSA, TILEGX_OPC_V1DOTPUS, TILEGX_OPC_V1DOTP, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1MAXU, + TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MULTU, TILEGX_OPC_V1MULUS, + TILEGX_OPC_V1MULU, TILEGX_OPC_V1MZ, TILEGX_OPC_V1SADAU, TILEGX_OPC_V1SADU, + TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS, + BITFIELD(18, 4) /* index 814 */, + TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC, TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC, + TILEGX_OPC_V2ADD, TILEGX_OPC_V2ADIFFS, TILEGX_OPC_V2AVGS, + TILEGX_OPC_V2CMPEQ, TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU, + TILEGX_OPC_V2CMPLTS, TILEGX_OPC_V2CMPLTU, TILEGX_OPC_V2CMPNE, + TILEGX_OPC_V2DOTPA, TILEGX_OPC_V2DOTP, TILEGX_OPC_V2INT_H, + BITFIELD(18, 4) /* index 831 */, + TILEGX_OPC_V2INT_L, TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ, + TILEGX_OPC_V2MULFSC, TILEGX_OPC_V2MULS, TILEGX_OPC_V2MULTS, TILEGX_OPC_V2MZ, + TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC, + TILEGX_OPC_V2SADAS, TILEGX_OPC_V2SADAU, TILEGX_OPC_V2SADS, + TILEGX_OPC_V2SADU, TILEGX_OPC_V2SHLSC, + BITFIELD(18, 4) /* index 848 */, + TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU, TILEGX_OPC_V2SUBSC, + TILEGX_OPC_V2SUB, TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H, + TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC, + TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC, + TILEGX_OPC_V4SUB, + BITFIELD(18, 3) /* index 865 */, + CHILD(874), CHILD(877), CHILD(880), CHILD(883), CHILD(886), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 874 */, + TILEGX_OPC_XOR, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 877 */, + TILEGX_OPC_V1DDOTPUA, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 880 */, + TILEGX_OPC_V1DDOTPU, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 883 */, + TILEGX_OPC_V1DOTPUA, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 886 */, + TILEGX_OPC_V1DOTPU, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 889 */, + TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI, + TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI, + TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI, + TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(0, 2) /* index 906 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(911), + BITFIELD(2, 2) /* index 911 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(916), + BITFIELD(4, 2) /* index 916 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(921), + BITFIELD(6, 2) /* index 921 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(926), + BITFIELD(8, 2) /* index 926 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(931), + BITFIELD(10, 2) /* index 931 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + TILEGX_OPC_INFOL, +}; + +static const unsigned short decode_X1_fsm[1206] = +{ + BITFIELD(53, 9) /* index 0 */, + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BEQZT, + TILEGX_OPC_BEQZT, TILEGX_OPC_BEQZ, TILEGX_OPC_BEQZ, TILEGX_OPC_BGEZT, + TILEGX_OPC_BGEZT, TILEGX_OPC_BGEZ, TILEGX_OPC_BGEZ, TILEGX_OPC_BGTZT, + TILEGX_OPC_BGTZT, TILEGX_OPC_BGTZ, TILEGX_OPC_BGTZ, TILEGX_OPC_BLBCT, + TILEGX_OPC_BLBCT, TILEGX_OPC_BLBC, TILEGX_OPC_BLBC, TILEGX_OPC_BLBST, + TILEGX_OPC_BLBST, TILEGX_OPC_BLBS, TILEGX_OPC_BLBS, TILEGX_OPC_BLEZT, + TILEGX_OPC_BLEZT, TILEGX_OPC_BLEZ, TILEGX_OPC_BLEZ, TILEGX_OPC_BLTZT, + TILEGX_OPC_BLTZT, TILEGX_OPC_BLTZ, TILEGX_OPC_BLTZ, TILEGX_OPC_BNEZT, + TILEGX_OPC_BNEZT, TILEGX_OPC_BNEZ, TILEGX_OPC_BNEZ, CHILD(528), CHILD(578), + CHILD(598), CHILD(663), CHILD(683), CHILD(688), CHILD(693), CHILD(698), + CHILD(703), CHILD(708), CHILD(713), CHILD(718), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + CHILD(723), CHILD(740), CHILD(772), CHILD(789), CHILD(1108), CHILD(1125), + CHILD(1142), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1159), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), CHILD(1176), + CHILD(1176), + BITFIELD(37, 2) /* index 513 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518), + BITFIELD(39, 2) /* index 518 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523), + BITFIELD(41, 2) /* index 523 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI, + BITFIELD(51, 2) /* index 528 */, + TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548), + BITFIELD(37, 2) /* index 533 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538), + BITFIELD(39, 2) /* index 538 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543), + BITFIELD(41, 2) /* index 543 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(31, 2) /* index 548 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553), + BITFIELD(33, 2) /* index 553 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558), + BITFIELD(35, 2) /* index 558 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563), + BITFIELD(37, 2) /* index 563 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568), + BITFIELD(39, 2) /* index 568 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573), + BITFIELD(41, 2) /* index 573 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(51, 2) /* index 578 */, + TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, CHILD(583), + BITFIELD(31, 2) /* index 583 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(588), + BITFIELD(33, 2) /* index 588 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(593), + BITFIELD(35, 2) /* index 593 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, + TILEGX_OPC_PREFETCH_ADD_L1_FAULT, + BITFIELD(51, 2) /* index 598 */, + CHILD(603), CHILD(618), CHILD(633), CHILD(648), + BITFIELD(31, 2) /* index 603 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(608), + BITFIELD(33, 2) /* index 608 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(613), + BITFIELD(35, 2) /* index 613 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, + TILEGX_OPC_PREFETCH_ADD_L1, + BITFIELD(31, 2) /* index 618 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(623), + BITFIELD(33, 2) /* index 623 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(628), + BITFIELD(35, 2) /* index 628 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, + TILEGX_OPC_PREFETCH_ADD_L2_FAULT, + BITFIELD(31, 2) /* index 633 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(638), + BITFIELD(33, 2) /* index 638 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(643), + BITFIELD(35, 2) /* index 643 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, + TILEGX_OPC_PREFETCH_ADD_L2, + BITFIELD(31, 2) /* index 648 */, + TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, CHILD(653), + BITFIELD(33, 2) /* index 653 */, + TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, CHILD(658), + BITFIELD(35, 2) /* index 658 */, + TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, + TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + BITFIELD(51, 2) /* index 663 */, + CHILD(668), TILEGX_OPC_LDNT1S_ADD, TILEGX_OPC_LDNT1U_ADD, + TILEGX_OPC_LDNT2S_ADD, + BITFIELD(31, 2) /* index 668 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(673), + BITFIELD(33, 2) /* index 673 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(678), + BITFIELD(35, 2) /* index 678 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, + TILEGX_OPC_PREFETCH_ADD_L3, + BITFIELD(51, 2) /* index 683 */, + TILEGX_OPC_LDNT2U_ADD, TILEGX_OPC_LDNT4S_ADD, TILEGX_OPC_LDNT4U_ADD, + TILEGX_OPC_LDNT_ADD, + BITFIELD(51, 2) /* index 688 */, + TILEGX_OPC_LD_ADD, TILEGX_OPC_LDNA_ADD, TILEGX_OPC_MFSPR, TILEGX_OPC_MTSPR, + BITFIELD(51, 2) /* index 693 */, + TILEGX_OPC_ORI, TILEGX_OPC_ST1_ADD, TILEGX_OPC_ST2_ADD, TILEGX_OPC_ST4_ADD, + BITFIELD(51, 2) /* index 698 */, + TILEGX_OPC_STNT1_ADD, TILEGX_OPC_STNT2_ADD, TILEGX_OPC_STNT4_ADD, + TILEGX_OPC_STNT_ADD, + BITFIELD(51, 2) /* index 703 */, + TILEGX_OPC_ST_ADD, TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI, + TILEGX_OPC_V1CMPLTSI, + BITFIELD(51, 2) /* index 708 */, + TILEGX_OPC_V1CMPLTUI, TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI, + TILEGX_OPC_V2ADDI, + BITFIELD(51, 2) /* index 713 */, + TILEGX_OPC_V2CMPEQI, TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI, + TILEGX_OPC_V2MAXSI, + BITFIELD(51, 2) /* index 718 */, + TILEGX_OPC_V2MINSI, TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 723 */, + TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD, + TILEGX_OPC_AND, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPEXCH4, TILEGX_OPC_CMPEXCH, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + TILEGX_OPC_CMPNE, TILEGX_OPC_DBLALIGN2, TILEGX_OPC_DBLALIGN4, + TILEGX_OPC_DBLALIGN6, + BITFIELD(49, 4) /* index 740 */, + TILEGX_OPC_EXCH4, TILEGX_OPC_EXCH, TILEGX_OPC_FETCHADD4, + TILEGX_OPC_FETCHADDGEZ4, TILEGX_OPC_FETCHADDGEZ, TILEGX_OPC_FETCHADD, + TILEGX_OPC_FETCHAND4, TILEGX_OPC_FETCHAND, TILEGX_OPC_FETCHOR4, + TILEGX_OPC_FETCHOR, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, TILEGX_OPC_NOR, + CHILD(757), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX, + BITFIELD(43, 2) /* index 757 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(762), + BITFIELD(45, 2) /* index 762 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(767), + BITFIELD(47, 2) /* index 767 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(49, 4) /* index 772 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL, + TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_ST1, + TILEGX_OPC_ST2, TILEGX_OPC_ST4, TILEGX_OPC_STNT1, TILEGX_OPC_STNT2, + TILEGX_OPC_STNT4, + BITFIELD(46, 7) /* index 789 */, + TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, + TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, + TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, + TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_SUBXSC, + TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, + TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBX, + TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, + TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, + TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, CHILD(918), CHILD(927), + CHILD(1006), CHILD(1090), CHILD(1099), TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + BITFIELD(43, 3) /* index 918 */, + TILEGX_OPC_NONE, TILEGX_OPC_DRAIN, TILEGX_OPC_DTLBPR, TILEGX_OPC_FINV, + TILEGX_OPC_FLUSHWB, TILEGX_OPC_FLUSH, TILEGX_OPC_FNOP, TILEGX_OPC_ICOH, + BITFIELD(43, 3) /* index 927 */, + CHILD(936), TILEGX_OPC_INV, TILEGX_OPC_IRET, TILEGX_OPC_JALRP, + TILEGX_OPC_JALR, TILEGX_OPC_JRP, TILEGX_OPC_JR, CHILD(991), + BITFIELD(31, 2) /* index 936 */, + CHILD(941), CHILD(966), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(33, 2) /* index 941 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(946), + BITFIELD(35, 2) /* index 946 */, + TILEGX_OPC_ILL, CHILD(951), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(37, 2) /* index 951 */, + TILEGX_OPC_ILL, CHILD(956), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(39, 2) /* index 956 */, + TILEGX_OPC_ILL, CHILD(961), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(41, 2) /* index 961 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_BPT, TILEGX_OPC_ILL, + BITFIELD(33, 2) /* index 966 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(971), + BITFIELD(35, 2) /* index 971 */, + TILEGX_OPC_ILL, CHILD(976), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(37, 2) /* index 976 */, + TILEGX_OPC_ILL, CHILD(981), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(39, 2) /* index 981 */, + TILEGX_OPC_ILL, CHILD(986), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(41, 2) /* index 986 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_RAISE, TILEGX_OPC_ILL, + BITFIELD(31, 2) /* index 991 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(996), + BITFIELD(33, 2) /* index 996 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(1001), + BITFIELD(35, 2) /* index 1001 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, + TILEGX_OPC_PREFETCH_L1_FAULT, + BITFIELD(43, 3) /* index 1006 */, + CHILD(1015), CHILD(1030), CHILD(1045), CHILD(1060), CHILD(1075), + TILEGX_OPC_LDNA, TILEGX_OPC_LDNT1S, TILEGX_OPC_LDNT1U, + BITFIELD(31, 2) /* index 1015 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1020), + BITFIELD(33, 2) /* index 1020 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1025), + BITFIELD(35, 2) /* index 1025 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH, + BITFIELD(31, 2) /* index 1030 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1035), + BITFIELD(33, 2) /* index 1035 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1040), + BITFIELD(35, 2) /* index 1040 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, + TILEGX_OPC_PREFETCH_L2_FAULT, + BITFIELD(31, 2) /* index 1045 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1050), + BITFIELD(33, 2) /* index 1050 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1055), + BITFIELD(35, 2) /* index 1055 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2, + BITFIELD(31, 2) /* index 1060 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1065), + BITFIELD(33, 2) /* index 1065 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1070), + BITFIELD(35, 2) /* index 1070 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, + TILEGX_OPC_PREFETCH_L3_FAULT, + BITFIELD(31, 2) /* index 1075 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1080), + BITFIELD(33, 2) /* index 1080 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1085), + BITFIELD(35, 2) /* index 1085 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3, + BITFIELD(43, 3) /* index 1090 */, + TILEGX_OPC_LDNT2S, TILEGX_OPC_LDNT2U, TILEGX_OPC_LDNT4S, TILEGX_OPC_LDNT4U, + TILEGX_OPC_LDNT, TILEGX_OPC_LD, TILEGX_OPC_LNK, TILEGX_OPC_MF, + BITFIELD(43, 3) /* index 1099 */, + TILEGX_OPC_NAP, TILEGX_OPC_NOP, TILEGX_OPC_SWINT0, TILEGX_OPC_SWINT1, + TILEGX_OPC_SWINT2, TILEGX_OPC_SWINT3, TILEGX_OPC_WH64, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 1108 */, + TILEGX_OPC_V1MAXU, TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MZ, + TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS, TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC, + TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC, TILEGX_OPC_V2ADD, TILEGX_OPC_V2CMPEQ, + TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU, TILEGX_OPC_V2CMPLTS, + TILEGX_OPC_V2CMPLTU, + BITFIELD(49, 4) /* index 1125 */, + TILEGX_OPC_V2CMPNE, TILEGX_OPC_V2INT_H, TILEGX_OPC_V2INT_L, + TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ, TILEGX_OPC_V2MZ, + TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC, + TILEGX_OPC_V2SHLSC, TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU, + TILEGX_OPC_V2SUBSC, TILEGX_OPC_V2SUB, + BITFIELD(49, 4) /* index 1142 */, + TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H, + TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC, + TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC, + TILEGX_OPC_V4SUB, TILEGX_OPC_XOR, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 1159 */, + TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI, + TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI, + TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI, + TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(31, 2) /* index 1176 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1181), + BITFIELD(33, 2) /* index 1181 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1186), + BITFIELD(35, 2) /* index 1186 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1191), + BITFIELD(37, 2) /* index 1191 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1196), + BITFIELD(39, 2) /* index 1196 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1201), + BITFIELD(41, 2) /* index 1201 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + TILEGX_OPC_INFOL, +}; + +static const unsigned short decode_Y0_fsm[178] = +{ + BITFIELD(27, 4) /* index 0 */, + CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI, + TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(118), CHILD(123), + CHILD(128), CHILD(133), CHILD(153), CHILD(158), CHILD(163), CHILD(168), + CHILD(173), + BITFIELD(6, 2) /* index 17 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22), + BITFIELD(8, 2) /* index 22 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27), + BITFIELD(10, 2) /* index 27 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(0, 2) /* index 32 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37), + BITFIELD(2, 2) /* index 37 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42), + BITFIELD(4, 2) /* index 42 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47), + BITFIELD(6, 2) /* index 47 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52), + BITFIELD(8, 2) /* index 52 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57), + BITFIELD(10, 2) /* index 57 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(18, 2) /* index 62 */, + TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + BITFIELD(15, 5) /* index 67 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(100), + CHILD(109), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(12, 3) /* index 100 */, + TILEGX_OPC_NONE, TILEGX_OPC_CLZ, TILEGX_OPC_CTZ, TILEGX_OPC_FNOP, + TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NOP, TILEGX_OPC_PCNT, + TILEGX_OPC_REVBITS, + BITFIELD(12, 3) /* index 109 */, + TILEGX_OPC_REVBYTES, TILEGX_OPC_TBLIDXB0, TILEGX_OPC_TBLIDXB1, + TILEGX_OPC_TBLIDXB2, TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(18, 2) /* index 118 */, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + BITFIELD(18, 2) /* index 123 */, + TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE, TILEGX_OPC_MULAX, TILEGX_OPC_MULX, + BITFIELD(18, 2) /* index 128 */, + TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, + BITFIELD(18, 2) /* index 133 */, + TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(138), TILEGX_OPC_XOR, + BITFIELD(12, 2) /* index 138 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(143), + BITFIELD(14, 2) /* index 143 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(148), + BITFIELD(16, 2) /* index 148 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(18, 2) /* index 153 */, + TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU, + BITFIELD(18, 2) /* index 158 */, + TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX, + TILEGX_OPC_SHL3ADDX, + BITFIELD(18, 2) /* index 163 */, + TILEGX_OPC_MUL_HS_HS, TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_LS_LS, + TILEGX_OPC_MUL_LU_LU, + BITFIELD(18, 2) /* index 168 */, + TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_LS_LS, + TILEGX_OPC_MULA_LU_LU, + BITFIELD(18, 2) /* index 173 */, + TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, +}; + +static const unsigned short decode_Y1_fsm[167] = +{ + BITFIELD(58, 4) /* index 0 */, + TILEGX_OPC_NONE, CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI, + TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(117), CHILD(122), + CHILD(127), CHILD(132), CHILD(152), CHILD(157), CHILD(162), TILEGX_OPC_NONE, + BITFIELD(37, 2) /* index 17 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22), + BITFIELD(39, 2) /* index 22 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27), + BITFIELD(41, 2) /* index 27 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(31, 2) /* index 32 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37), + BITFIELD(33, 2) /* index 37 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42), + BITFIELD(35, 2) /* index 42 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47), + BITFIELD(37, 2) /* index 47 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52), + BITFIELD(39, 2) /* index 52 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57), + BITFIELD(41, 2) /* index 57 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(49, 2) /* index 62 */, + TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + BITFIELD(47, 4) /* index 67 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(84), + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(43, 3) /* index 84 */, + CHILD(93), CHILD(96), CHILD(99), CHILD(102), CHILD(105), CHILD(108), + CHILD(111), CHILD(114), + BITFIELD(46, 1) /* index 93 */, + TILEGX_OPC_NONE, TILEGX_OPC_FNOP, + BITFIELD(46, 1) /* index 96 */, + TILEGX_OPC_NONE, TILEGX_OPC_ILL, + BITFIELD(46, 1) /* index 99 */, + TILEGX_OPC_NONE, TILEGX_OPC_JALRP, + BITFIELD(46, 1) /* index 102 */, + TILEGX_OPC_NONE, TILEGX_OPC_JALR, + BITFIELD(46, 1) /* index 105 */, + TILEGX_OPC_NONE, TILEGX_OPC_JRP, + BITFIELD(46, 1) /* index 108 */, + TILEGX_OPC_NONE, TILEGX_OPC_JR, + BITFIELD(46, 1) /* index 111 */, + TILEGX_OPC_NONE, TILEGX_OPC_LNK, + BITFIELD(46, 1) /* index 114 */, + TILEGX_OPC_NONE, TILEGX_OPC_NOP, + BITFIELD(49, 2) /* index 117 */, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + BITFIELD(49, 2) /* index 122 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE, + BITFIELD(49, 2) /* index 127 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, + BITFIELD(49, 2) /* index 132 */, + TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(137), TILEGX_OPC_XOR, + BITFIELD(43, 2) /* index 137 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(142), + BITFIELD(45, 2) /* index 142 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(147), + BITFIELD(47, 2) /* index 147 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(49, 2) /* index 152 */, + TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU, + BITFIELD(49, 2) /* index 157 */, + TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX, + TILEGX_OPC_SHL3ADDX, + BITFIELD(49, 2) /* index 162 */, + TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, +}; + +static const unsigned short decode_Y2_fsm[118] = +{ + BITFIELD(62, 2) /* index 0 */, + TILEGX_OPC_NONE, CHILD(5), CHILD(66), CHILD(109), + BITFIELD(55, 3) /* index 5 */, + CHILD(14), CHILD(14), CHILD(14), CHILD(17), CHILD(40), CHILD(40), CHILD(40), + CHILD(43), + BITFIELD(26, 1) /* index 14 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1U, + BITFIELD(26, 1) /* index 17 */, + CHILD(20), CHILD(30), + BITFIELD(51, 2) /* index 20 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(25), + BITFIELD(53, 2) /* index 25 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, + TILEGX_OPC_PREFETCH_L1_FAULT, + BITFIELD(51, 2) /* index 30 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(35), + BITFIELD(53, 2) /* index 35 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH, + BITFIELD(26, 1) /* index 40 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2U, + BITFIELD(26, 1) /* index 43 */, + CHILD(46), CHILD(56), + BITFIELD(51, 2) /* index 46 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(51), + BITFIELD(53, 2) /* index 51 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, + TILEGX_OPC_PREFETCH_L2_FAULT, + BITFIELD(51, 2) /* index 56 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(61), + BITFIELD(53, 2) /* index 61 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2, + BITFIELD(56, 2) /* index 66 */, + CHILD(71), CHILD(74), CHILD(90), CHILD(93), + BITFIELD(26, 1) /* index 71 */, + TILEGX_OPC_NONE, TILEGX_OPC_LD4S, + BITFIELD(26, 1) /* index 74 */, + TILEGX_OPC_NONE, CHILD(77), + BITFIELD(51, 2) /* index 77 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(82), + BITFIELD(53, 2) /* index 82 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(87), + BITFIELD(55, 1) /* index 87 */, + TILEGX_OPC_LD4S, TILEGX_OPC_PREFETCH_L3_FAULT, + BITFIELD(26, 1) /* index 90 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD, + BITFIELD(26, 1) /* index 93 */, + CHILD(96), TILEGX_OPC_LD, + BITFIELD(51, 2) /* index 96 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(101), + BITFIELD(53, 2) /* index 101 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(106), + BITFIELD(55, 1) /* index 106 */, + TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3, + BITFIELD(26, 1) /* index 109 */, + CHILD(112), CHILD(115), + BITFIELD(57, 1) /* index 112 */, + TILEGX_OPC_ST1, TILEGX_OPC_ST4, + BITFIELD(57, 1) /* index 115 */, + TILEGX_OPC_ST2, TILEGX_OPC_ST, +}; + +#undef BITFIELD +#undef CHILD +const unsigned short * const +tilegx_bundle_decoder_fsms[TILEGX_NUM_PIPELINE_ENCODINGS] = +{ + decode_X0_fsm, + decode_X1_fsm, + decode_Y0_fsm, + decode_Y1_fsm, + decode_Y2_fsm +}; +const struct tilegx_operand tilegx_operands[35] = +{ + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X0), + 8, 1, 0, 0, 0, 0, + create_Imm8_X0, get_Imm8_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Imm8_X1, get_Imm8_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y0), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y0, get_Imm8_Y0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y1), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y1, get_Imm8_Y1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X0_HW0_LAST), + 16, 1, 0, 0, 0, 0, + create_Imm16_X0, get_Imm16_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X1_HW0_LAST), + 16, 1, 0, 0, 0, 0, + create_Imm16_X1, get_Imm16_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X0, get_SrcA_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X1, get_Dest_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y0, get_SrcA_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y1, get_Dest_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y1, get_SrcA_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y2, get_SrcA_Y2 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X0, get_SrcB_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X1, get_SrcB_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y0, get_SrcB_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y1, get_SrcB_Y1 + }, + { + TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_BROFF_X1), + 17, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_BrOff_X1, get_BrOff_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMSTART_X0), + 6, 0, 0, 0, 0, 0, + create_BFStart_X0, get_BFStart_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMEND_X0), + 6, 0, 0, 0, 0, 0, + create_BFEnd_X0, get_BFEnd_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_JUMPOFF_X1), + 27, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_JumpOff_X1, get_JumpOff_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MF_IMM14_X1), + 14, 0, 0, 0, 0, 0, + create_MF_Imm14_X1, get_MF_Imm14_X1 + }, + { + TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MT_IMM14_X1), + 14, 0, 0, 0, 0, 0, + create_MT_Imm14_X1, get_MT_Imm14_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X0), + 6, 0, 0, 0, 0, 0, + create_ShAmt_X0, get_ShAmt_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X1), + 6, 0, 0, 0, 0, 0, + create_ShAmt_X1, get_ShAmt_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y0), + 6, 0, 0, 0, 0, 0, + create_ShAmt_Y0, get_ShAmt_Y0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y1), + 6, 0, 0, 0, 0, 0, + create_ShAmt_Y1, get_ShAmt_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_DEST_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Dest_Imm8_X1, get_Dest_Imm8_X1 + } +}; + + + + +/* Given a set of bundle bits and the lookup FSM for a specific pipe, + * returns which instruction the bundle contains in that pipe. + */ +static const struct tilegx_opcode * +find_opcode(tilegx_bundle_bits bits, const unsigned short *table) +{ + int index = 0; + + while (1) + { + unsigned short bitspec = table[index]; + unsigned int bitfield = + ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6); + + unsigned short next = table[index + 1 + bitfield]; + if (next <= TILEGX_OPC_NONE) + return &tilegx_opcodes[next]; + + index = next - TILEGX_OPC_NONE; + } +} + + +int +parse_insn_tilegx(tilegx_bundle_bits bits, + unsigned long long pc, + struct tilegx_decoded_instruction + decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]) +{ + int num_instructions = 0; + int pipe; + + int min_pipe, max_pipe; + if ((bits & TILEGX_BUNDLE_MODE_MASK) == 0) + { + min_pipe = TILEGX_PIPELINE_X0; + max_pipe = TILEGX_PIPELINE_X1; + } + else + { + min_pipe = TILEGX_PIPELINE_Y0; + max_pipe = TILEGX_PIPELINE_Y2; + } + + /* For each pipe, find an instruction that fits. */ + for (pipe = min_pipe; pipe <= max_pipe; pipe++) + { + const struct tilegx_opcode *opc; + struct tilegx_decoded_instruction *d; + int i; + + d = &decoded[num_instructions++]; + opc = find_opcode (bits, tilegx_bundle_decoder_fsms[pipe]); + d->opcode = opc; + + /* Decode each operand, sign extending, etc. as appropriate. */ + for (i = 0; i < opc->num_operands; i++) + { + const struct tilegx_operand *op = + &tilegx_operands[opc->operands[pipe][i]]; + int raw_opval = op->extract (bits); + long long opval; + + if (op->is_signed) + { + /* Sign-extend the operand. */ + int shift = (int)((sizeof(int) * 8) - op->num_bits); + raw_opval = (raw_opval << shift) >> shift; + } + + /* Adjust PC-relative scaled branch offsets. */ + if (op->type == TILEGX_OP_TYPE_ADDRESS) + opval = (raw_opval * TILEGX_BUNDLE_SIZE_IN_BYTES) + pc; + else + opval = raw_opval; + + /* Record the final value. */ + d->operands[i] = op; + d->operand_values[i] = opval; + } + } + + return num_instructions; +} diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index 49a605be94c..462dcd0c170 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -22,8 +22,11 @@ #include <linux/sched.h> #include <linux/smp.h> #include <linux/delay.h> +#include <linux/module.h> +#include <linux/timekeeper_internal.h> #include <asm/irq_regs.h> #include <asm/traps.h> +#include <asm/vdso.h> #include <hv/hypervisor.h> #include <arch/interrupts.h> #include <arch/spr_def.h> @@ -56,6 +59,7 @@ cycles_t get_cycles(void) return (((cycles_t)high) << 32) | low; } +EXPORT_SYMBOL(get_cycles); #endif /* @@ -76,7 +80,6 @@ static struct clocksource cycle_counter_cs = { .rating = 300, .read = clocksource_get_cycles, .mask = CLOCKSOURCE_MASK(64), - .shift = 22, /* typical value, e.g. x86 tsc uses this */ .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -89,8 +92,6 @@ void __init setup_clock(void) cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED); sched_clock_mult = clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT); - cycle_counter_cs.mult = - clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift); } void __init calibrate_delay(void) @@ -105,13 +106,12 @@ void __init calibrate_delay(void) void __init time_init(void) { /* Initialize and register the clock source. */ - clocksource_register(&cycle_counter_cs); + clocksource_register_hz(&cycle_counter_cs, cycles_per_sec); /* Start up the tile-timer interrupt source on the boot cpu. */ setup_tile_timer(); } - /* * Define the tile timer clock event device. The timer is driven by * the TILE_TIMER_CONTROL register, which consists of a 31-bit down @@ -160,7 +160,7 @@ static DEFINE_PER_CPU(struct clock_event_device, tile_timer) = { .set_mode = tile_timer_set_mode, }; -void __cpuinit setup_tile_timer(void) +void setup_tile_timer(void) { struct clock_event_device *evt = &__get_cpu_var(tile_timer); @@ -231,6 +231,52 @@ int setup_profiling_timer(unsigned int multiplier) */ cycles_t ns2cycles(unsigned long nsecs) { - struct clock_event_device *dev = &__get_cpu_var(tile_timer); - return ((u64)nsecs * dev->mult) >> dev->shift; + /* + * We do not have to disable preemption here as each core has the same + * clock frequency. + */ + struct clock_event_device *dev = &__raw_get_cpu_var(tile_timer); + + /* + * as in clocksource.h and x86's timer.h, we split the calculation + * into 2 parts to avoid unecessary overflow of the intermediate + * value. This will not lead to any loss of precision. + */ + u64 quot = (u64)nsecs >> dev->shift; + u64 rem = (u64)nsecs & ((1ULL << dev->shift) - 1); + return quot * dev->mult + ((rem * dev->mult) >> dev->shift); +} + +void update_vsyscall_tz(void) +{ + /* Userspace gettimeofday will spin while this value is odd. */ + ++vdso_data->tz_update_count; + smp_wmb(); + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + smp_wmb(); + ++vdso_data->tz_update_count; +} + +void update_vsyscall(struct timekeeper *tk) +{ + struct timespec wall_time = tk_xtime(tk); + struct timespec *wtm = &tk->wall_to_monotonic; + struct clocksource *clock = tk->clock; + + if (clock != &cycle_counter_cs) + return; + + /* Userspace gettimeofday will spin while this value is odd. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->xtime_tod_stamp = clock->cycle_last; + vdso_data->xtime_clock_sec = wall_time.tv_sec; + vdso_data->xtime_clock_nsec = wall_time.tv_nsec; + vdso_data->wtom_clock_sec = wtm->tv_sec; + vdso_data->wtom_clock_nsec = wtm->tv_nsec; + vdso_data->mult = clock->mult; + vdso_data->shift = clock->shift; + smp_wmb(); + ++vdso_data->tb_update_count; } diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c index 2dffc1044d8..f23b5351567 100644 --- a/arch/tile/kernel/tlb.c +++ b/arch/tile/kernel/tlb.c @@ -15,6 +15,7 @@ #include <linux/cpumask.h> #include <linux/module.h> +#include <linux/hugetlb.h> #include <asm/tlbflush.h> #include <asm/homecache.h> #include <hv/hypervisor.h> @@ -34,13 +35,13 @@ void flush_tlb_mm(struct mm_struct *mm) { HV_Remote_ASID asids[NR_CPUS]; int i = 0, cpu; - for_each_cpu(cpu, &mm->cpu_vm_mask) { + for_each_cpu(cpu, mm_cpumask(mm)) { HV_Remote_ASID *asid = &asids[i++]; asid->y = cpu / smp_topology.width; asid->x = cpu % smp_topology.width; asid->asid = per_cpu(current_asid, cpu); } - flush_remote(0, HV_FLUSH_EVICT_L1I, &mm->cpu_vm_mask, + flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(mm), 0, 0, 0, NULL, asids, i); } @@ -49,29 +50,29 @@ void flush_tlb_current_task(void) flush_tlb_mm(current->mm); } -void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm, +void flush_tlb_page_mm(struct vm_area_struct *vma, struct mm_struct *mm, unsigned long va) { - unsigned long size = hv_page_size(vma); + unsigned long size = vma_kernel_pagesize(vma); int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; - flush_remote(0, cache, &mm->cpu_vm_mask, - va, size, size, &mm->cpu_vm_mask, NULL, 0); + flush_remote(0, cache, mm_cpumask(mm), + va, size, size, mm_cpumask(mm), NULL, 0); } -void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va) +void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { flush_tlb_page_mm(vma, vma->vm_mm, va); } EXPORT_SYMBOL(flush_tlb_page); -void flush_tlb_range(const struct vm_area_struct *vma, +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long size = hv_page_size(vma); + unsigned long size = vma_kernel_pagesize(vma); struct mm_struct *mm = vma->vm_mm; int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; - flush_remote(0, cache, &mm->cpu_vm_mask, start, end - start, size, - &mm->cpu_vm_mask, NULL, 0); + flush_remote(0, cache, mm_cpumask(mm), start, end - start, size, + mm_cpumask(mm), NULL, 0); } void flush_tlb_all(void) @@ -90,8 +91,14 @@ void flush_tlb_all(void) } } +/* + * Callers need to flush the L1I themselves if necessary, e.g. for + * kernel module unload. Otherwise we assume callers are not using + * executable pgprot_t's. Using EVICT_L1I means that dataplane cpus + * will get an unnecessary interrupt otherwise. + */ void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask, + flush_remote(0, 0, NULL, start, end - start, PAGE_SIZE, cpu_online_mask, NULL, 0); } diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 5474fc2e77e..f3ceb6308e4 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -15,21 +15,22 @@ #include <linux/sched.h> #include <linux/kernel.h> #include <linux/kprobes.h> +#include <linux/kdebug.h> #include <linux/module.h> #include <linux/reboot.h> #include <linux/uaccess.h> #include <linux/ptrace.h> -#include <asm/opcode-tile.h> -#include <asm/opcode_constants.h> #include <asm/stack.h> #include <asm/traps.h> +#include <asm/setup.h> #include <arch/interrupts.h> #include <arch/spr_def.h> +#include <arch/opcode.h> void __init trap_init(void) { - /* Nothing needed here since we link code at .intrpt1 */ + /* Nothing needed here since we link code at .intrpt */ } int unaligned_fixup = 1; @@ -41,10 +42,9 @@ static int __init setup_unaligned_fixup(char *str) * will still parse the instruction, then fire a SIGBUS with * the correct address from inside the single_step code. */ - long val; - if (strict_strtol(str, 0, &val) != 0) + if (kstrtoint(str, 0, &unaligned_fixup) != 0) return 0; - unaligned_fixup = val; + pr_info("Fixups for unaligned data accesses are %s\n", unaligned_fixup >= 0 ? (unaligned_fixup ? "enabled" : "disabled") : @@ -100,13 +100,7 @@ static int retry_gpv(unsigned int gpv_reason) #endif /* CHIP_HAS_TILE_DMA() */ -#ifdef __tilegx__ -#define bundle_bits tilegx_bundle_bits -#else -#define bundle_bits tile_bundle_bits -#endif - -extern bundle_bits bpt_code; +extern tile_bundle_bits bpt_code; asm(".pushsection .rodata.bpt_code,\"a\";" ".align 8;" @@ -114,7 +108,7 @@ asm(".pushsection .rodata.bpt_code,\"a\";" ".size bpt_code,.-bpt_code;" ".popsection"); -static int special_ill(bundle_bits bundle, int *sigp, int *codep) +static int special_ill(tile_bundle_bits bundle, int *sigp, int *codep) { int sig, code, maxcode; @@ -135,7 +129,7 @@ static int special_ill(bundle_bits bundle, int *sigp, int *codep) if (get_UnaryOpcodeExtension_X1(bundle) != ILL_UNARY_OPCODE_X1) return 0; #else - if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) + if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) return 0; if (get_Opcode_X1(bundle) != SHUN_0_OPCODE_X1) return 0; @@ -195,34 +189,119 @@ static int special_ill(bundle_bits bundle, int *sigp, int *codep) return 1; } +static const char *const int_name[] = { + [INT_MEM_ERROR] = "Memory error", + [INT_ILL] = "Illegal instruction", + [INT_GPV] = "General protection violation", + [INT_UDN_ACCESS] = "UDN access", + [INT_IDN_ACCESS] = "IDN access", +#if CHIP_HAS_SN() + [INT_SN_ACCESS] = "SN access", +#endif + [INT_SWINT_3] = "Software interrupt 3", + [INT_SWINT_2] = "Software interrupt 2", + [INT_SWINT_0] = "Software interrupt 0", + [INT_UNALIGN_DATA] = "Unaligned data", + [INT_DOUBLE_FAULT] = "Double fault", +#ifdef __tilegx__ + [INT_ILL_TRANS] = "Illegal virtual address", +#endif +}; + +static int do_bpt(struct pt_regs *regs) +{ + unsigned long bundle, bcode, bpt; + + bundle = *(unsigned long *)instruction_pointer(regs); + + /* + * bpt shoule be { bpt; nop }, which is 0x286a44ae51485000ULL. + * we encode the unused least significant bits for other purpose. + */ + bpt = bundle & ~((1ULL << 12) - 1); + if (bpt != TILE_BPT_BUNDLE) + return 0; + + bcode = bundle & ((1ULL << 12) - 1); + /* + * notify the kprobe handlers, if instruction is likely to + * pertain to them. + */ + switch (bcode) { + /* breakpoint_insn */ + case 0: + notify_die(DIE_BREAK, "debug", regs, bundle, + INT_ILL, SIGTRAP); + break; + /* compiled_bpt */ + case DIE_COMPILED_BPT: + notify_die(DIE_COMPILED_BPT, "debug", regs, bundle, + INT_ILL, SIGTRAP); + break; + /* breakpoint2_insn */ + case DIE_SSTEPBP: + notify_die(DIE_SSTEPBP, "single_step", regs, bundle, + INT_ILL, SIGTRAP); + break; + default: + return 0; + } + + return 1; +} + void __kprobes do_trap(struct pt_regs *regs, int fault_num, unsigned long reason) { siginfo_t info = { 0 }; int signo, code; - unsigned long address; - bundle_bits instr; + unsigned long address = 0; + tile_bundle_bits instr; + int is_kernel = !user_mode(regs); + + /* Handle breakpoints, etc. */ + if (is_kernel && fault_num == INT_ILL && do_bpt(regs)) + return; - /* Re-enable interrupts. */ - local_irq_enable(); + /* Re-enable interrupts, if they were previously enabled. */ + if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) + local_irq_enable(); /* * If it hits in kernel mode and we can't fix it up, just exit the * current process and hope for the best. */ - if (!user_mode(regs)) { - if (fixup_exception(regs)) /* only UNALIGN_DATA in practice */ + if (is_kernel) { + const char *name; + char buf[100]; + if (fixup_exception(regs)) /* ILL_TRANS or UNALIGN_DATA */ return; - pr_alert("Kernel took bad trap %d at PC %#lx\n", - fault_num, regs->pc); + if (fault_num >= 0 && + fault_num < sizeof(int_name)/sizeof(int_name[0]) && + int_name[fault_num] != NULL) + name = int_name[fault_num]; + else + name = "Unknown interrupt"; if (fault_num == INT_GPV) - pr_alert("GPV_REASON is %#lx\n", reason); + snprintf(buf, sizeof(buf), "; GPV_REASON %#lx", reason); +#ifdef __tilegx__ + else if (fault_num == INT_ILL_TRANS) + snprintf(buf, sizeof(buf), "; address %#lx", reason); +#endif + else + buf[0] = '\0'; + pr_alert("Kernel took bad trap %d (%s) at PC %#lx%s\n", + fault_num, name, regs->pc, buf); show_regs(regs); do_exit(SIGKILL); /* FIXME: implement i386 die() */ return; } switch (fault_num) { + case INT_MEM_ERROR: + signo = SIGBUS; + code = BUS_OBJERR; + break; case INT_ILL: if (copy_from_user(&instr, (void __user *)regs->pc, sizeof(instr))) { @@ -289,14 +368,15 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, address = regs->pc; break; #ifdef __tilegx__ - case INT_ILL_TRANS: + case INT_ILL_TRANS: { + /* Avoid a hardware erratum with the return address stack. */ + fill_ra_stack(); + signo = SIGSEGV; + address = reason; code = SEGV_MAPERR; - if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK) - address = regs->pc; - else - address = 0; /* FIXME: GX: single-step for address */ break; + } #endif default: panic("Unexpected do_trap interrupt number %d", fault_num); @@ -308,6 +388,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, info.si_addr = (void __user *)address; if (signo == SIGILL) info.si_trapno = fault_num; + if (signo != SIGTRAP) + trace_unhandled_signal("trap", regs, address, signo); force_sig_info(signo, &info, current); } diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c new file mode 100644 index 00000000000..c02ea2a45f6 --- /dev/null +++ b/arch/tile/kernel/unaligned.c @@ -0,0 +1,1598 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * A code-rewriter that handles unaligned exception. + */ + +#include <linux/smp.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/thread_info.h> +#include <linux/uaccess.h> +#include <linux/mman.h> +#include <linux/types.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/compat.h> +#include <linux/prctl.h> +#include <asm/cacheflush.h> +#include <asm/traps.h> +#include <asm/uaccess.h> +#include <asm/unaligned.h> +#include <arch/abi.h> +#include <arch/spr_def.h> +#include <arch/opcode.h> + + +/* + * This file handles unaligned exception for tile-Gx. The tilepro's unaligned + * exception is supported out of single_step.c + */ + +int unaligned_printk; + +static int __init setup_unaligned_printk(char *str) +{ + long val; + if (kstrtol(str, 0, &val) != 0) + return 0; + unaligned_printk = val; + pr_info("Printk for each unaligned data accesses is %s\n", + unaligned_printk ? "enabled" : "disabled"); + return 1; +} +__setup("unaligned_printk=", setup_unaligned_printk); + +unsigned int unaligned_fixup_count; + +#ifdef __tilegx__ + +/* + * Unalign data jit fixup code fragement. Reserved space is 128 bytes. + * The 1st 64-bit word saves fault PC address, 2nd word is the fault + * instruction bundle followed by 14 JIT bundles. + */ + +struct unaligned_jit_fragment { + unsigned long pc; + tilegx_bundle_bits bundle; + tilegx_bundle_bits insn[14]; +}; + +/* + * Check if a nop or fnop at bundle's pipeline X0. + */ + +static bool is_bundle_x0_nop(tilegx_bundle_bits bundle) +{ + return (((get_UnaryOpcodeExtension_X0(bundle) == + NOP_UNARY_OPCODE_X0) && + (get_RRROpcodeExtension_X0(bundle) == + UNARY_RRR_0_OPCODE_X0) && + (get_Opcode_X0(bundle) == + RRR_0_OPCODE_X0)) || + ((get_UnaryOpcodeExtension_X0(bundle) == + FNOP_UNARY_OPCODE_X0) && + (get_RRROpcodeExtension_X0(bundle) == + UNARY_RRR_0_OPCODE_X0) && + (get_Opcode_X0(bundle) == + RRR_0_OPCODE_X0))); +} + +/* + * Check if nop or fnop at bundle's pipeline X1. + */ + +static bool is_bundle_x1_nop(tilegx_bundle_bits bundle) +{ + return (((get_UnaryOpcodeExtension_X1(bundle) == + NOP_UNARY_OPCODE_X1) && + (get_RRROpcodeExtension_X1(bundle) == + UNARY_RRR_0_OPCODE_X1) && + (get_Opcode_X1(bundle) == + RRR_0_OPCODE_X1)) || + ((get_UnaryOpcodeExtension_X1(bundle) == + FNOP_UNARY_OPCODE_X1) && + (get_RRROpcodeExtension_X1(bundle) == + UNARY_RRR_0_OPCODE_X1) && + (get_Opcode_X1(bundle) == + RRR_0_OPCODE_X1))); +} + +/* + * Check if nop or fnop at bundle's Y0 pipeline. + */ + +static bool is_bundle_y0_nop(tilegx_bundle_bits bundle) +{ + return (((get_UnaryOpcodeExtension_Y0(bundle) == + NOP_UNARY_OPCODE_Y0) && + (get_RRROpcodeExtension_Y0(bundle) == + UNARY_RRR_1_OPCODE_Y0) && + (get_Opcode_Y0(bundle) == + RRR_1_OPCODE_Y0)) || + ((get_UnaryOpcodeExtension_Y0(bundle) == + FNOP_UNARY_OPCODE_Y0) && + (get_RRROpcodeExtension_Y0(bundle) == + UNARY_RRR_1_OPCODE_Y0) && + (get_Opcode_Y0(bundle) == + RRR_1_OPCODE_Y0))); +} + +/* + * Check if nop or fnop at bundle's pipeline Y1. + */ + +static bool is_bundle_y1_nop(tilegx_bundle_bits bundle) +{ + return (((get_UnaryOpcodeExtension_Y1(bundle) == + NOP_UNARY_OPCODE_Y1) && + (get_RRROpcodeExtension_Y1(bundle) == + UNARY_RRR_1_OPCODE_Y1) && + (get_Opcode_Y1(bundle) == + RRR_1_OPCODE_Y1)) || + ((get_UnaryOpcodeExtension_Y1(bundle) == + FNOP_UNARY_OPCODE_Y1) && + (get_RRROpcodeExtension_Y1(bundle) == + UNARY_RRR_1_OPCODE_Y1) && + (get_Opcode_Y1(bundle) == + RRR_1_OPCODE_Y1))); +} + +/* + * Test if a bundle's y0 and y1 pipelines are both nop or fnop. + */ + +static bool is_y0_y1_nop(tilegx_bundle_bits bundle) +{ + return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle); +} + +/* + * Test if a bundle's x0 and x1 pipelines are both nop or fnop. + */ + +static bool is_x0_x1_nop(tilegx_bundle_bits bundle) +{ + return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle); +} + +/* + * Find the destination, source registers of fault unalign access instruction + * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and + * clob3, which are guaranteed different from any register used in the fault + * bundle. r_alias is used to return if the other instructions other than the + * unalign load/store shares same register with ra, rb and rd. + */ + +static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra, + uint64_t *rb, uint64_t *clob1, uint64_t *clob2, + uint64_t *clob3, bool *r_alias) +{ + int i; + uint64_t reg; + uint64_t reg_map = 0, alias_reg_map = 0, map; + bool alias = false; + + /* + * Parse fault bundle, find potential used registers and mark + * corresponding bits in reg_map and alias_map. These 2 bit maps + * are used to find the scratch registers and determine if there + * is register alais. + */ + if (bundle & TILEGX_BUNDLE_MODE_MASK) { /* Y Mode Bundle. */ + + reg = get_SrcA_Y2(bundle); + reg_map |= 1ULL << reg; + *ra = reg; + reg = get_SrcBDest_Y2(bundle); + reg_map |= 1ULL << reg; + + if (rd) { + /* Load. */ + *rd = reg; + alias_reg_map = (1ULL << *rd) | (1ULL << *ra); + } else { + /* Store. */ + *rb = reg; + alias_reg_map = (1ULL << *ra) | (1ULL << *rb); + } + + if (!is_bundle_y1_nop(bundle)) { + reg = get_SrcA_Y1(bundle); + reg_map |= (1ULL << reg); + map = (1ULL << reg); + + reg = get_SrcB_Y1(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + reg = get_Dest_Y1(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + if (map & alias_reg_map) + alias = true; + } + + if (!is_bundle_y0_nop(bundle)) { + reg = get_SrcA_Y0(bundle); + reg_map |= (1ULL << reg); + map = (1ULL << reg); + + reg = get_SrcB_Y0(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + reg = get_Dest_Y0(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + if (map & alias_reg_map) + alias = true; + } + } else { /* X Mode Bundle. */ + + reg = get_SrcA_X1(bundle); + reg_map |= (1ULL << reg); + *ra = reg; + if (rd) { + /* Load. */ + reg = get_Dest_X1(bundle); + reg_map |= (1ULL << reg); + *rd = reg; + alias_reg_map = (1ULL << *rd) | (1ULL << *ra); + } else { + /* Store. */ + reg = get_SrcB_X1(bundle); + reg_map |= (1ULL << reg); + *rb = reg; + alias_reg_map = (1ULL << *ra) | (1ULL << *rb); + } + + if (!is_bundle_x0_nop(bundle)) { + reg = get_SrcA_X0(bundle); + reg_map |= (1ULL << reg); + map = (1ULL << reg); + + reg = get_SrcB_X0(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + reg = get_Dest_X0(bundle); + reg_map |= (1ULL << reg); + map |= (1ULL << reg); + + if (map & alias_reg_map) + alias = true; + } + } + + /* + * "alias" indicates if the unalign access registers have collision + * with others in the same bundle. We jsut simply test all register + * operands case (RRR), ignored the case with immidate. If a bundle + * has no register alias, we may do fixup in a simple or fast manner. + * So if an immidata field happens to hit with a register, we may end + * up fall back to the generic handling. + */ + + *r_alias = alias; + + /* Flip bits on reg_map. */ + reg_map ^= -1ULL; + + /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */ + for (i = 0; i < TREG_SP; i++) { + if (reg_map & (0x1ULL << i)) { + if (*clob1 == -1) { + *clob1 = i; + } else if (*clob2 == -1) { + *clob2 = i; + } else if (*clob3 == -1) { + *clob3 = i; + return; + } + } + } +} + +/* + * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them + * is unexpected. + */ + +static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb, + uint64_t clob1, uint64_t clob2, uint64_t clob3) +{ + bool unexpected = false; + if ((ra >= 56) && (ra != TREG_ZERO)) + unexpected = true; + + if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56)) + unexpected = true; + + if (rd != -1) { + if ((rd >= 56) && (rd != TREG_ZERO)) + unexpected = true; + } else { + if ((rb >= 56) && (rb != TREG_ZERO)) + unexpected = true; + } + return unexpected; +} + + +#define GX_INSN_X0_MASK ((1ULL << 31) - 1) +#define GX_INSN_X1_MASK (((1ULL << 31) - 1) << 31) +#define GX_INSN_Y0_MASK ((0xFULL << 27) | (0xFFFFFULL)) +#define GX_INSN_Y1_MASK (GX_INSN_Y0_MASK << 31) +#define GX_INSN_Y2_MASK ((0x7FULL << 51) | (0x7FULL << 20)) + +#ifdef __LITTLE_ENDIAN +#define GX_INSN_BSWAP(_bundle_) (_bundle_) +#else +#define GX_INSN_BSWAP(_bundle_) swab64(_bundle_) +#endif /* __LITTLE_ENDIAN */ + +/* + * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section. + * The corresponding static function jix_x#_###(.) generates partial or + * whole bundle based on the template and given arguments. + */ + +#define __JIT_CODE(_X_) \ + asm (".pushsection .rodata.unalign_data, \"a\"\n" \ + _X_"\n" \ + ".popsection\n") + +__JIT_CODE("__unalign_jit_x1_mtspr: {mtspr 0, r0}"); +static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg) +{ + extern tilegx_bundle_bits __unalign_jit_x1_mtspr; + return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) | + create_MT_Imm14_X1(spr) | create_SrcA_X1(reg); +} + +__JIT_CODE("__unalign_jit_x1_mfspr: {mfspr r0, 0}"); +static tilegx_bundle_bits jit_x1_mfspr(int reg, int spr) +{ + extern tilegx_bundle_bits __unalign_jit_x1_mfspr; + return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) | + create_MF_Imm14_X1(spr) | create_Dest_X1(reg); +} + +__JIT_CODE("__unalign_jit_x0_addi: {addi r0, r0, 0; iret}"); +static tilegx_bundle_bits jit_x0_addi(int rd, int ra, int imm8) +{ + extern tilegx_bundle_bits __unalign_jit_x0_addi; + return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_Imm8_X0(imm8); +} + +__JIT_CODE("__unalign_jit_x1_ldna: {ldna r0, r0}"); +static tilegx_bundle_bits jit_x1_ldna(int rd, int ra) +{ + extern tilegx_bundle_bits __unalign_jit_x1_ldna; + return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) & GX_INSN_X1_MASK) | + create_Dest_X1(rd) | create_SrcA_X1(ra); +} + +__JIT_CODE("__unalign_jit_x0_dblalign: {dblalign r0, r0 ,r0}"); +static tilegx_bundle_bits jit_x0_dblalign(int rd, int ra, int rb) +{ + extern tilegx_bundle_bits __unalign_jit_x0_dblalign; + return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_SrcB_X0(rb); +} + +__JIT_CODE("__unalign_jit_x1_iret: {iret}"); +static tilegx_bundle_bits jit_x1_iret(void) +{ + extern tilegx_bundle_bits __unalign_jit_x1_iret; + return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK; +} + +__JIT_CODE("__unalign_jit_x01_fnop: {fnop;fnop}"); +static tilegx_bundle_bits jit_x0_fnop(void) +{ + extern tilegx_bundle_bits __unalign_jit_x01_fnop; + return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK; +} + +static tilegx_bundle_bits jit_x1_fnop(void) +{ + extern tilegx_bundle_bits __unalign_jit_x01_fnop; + return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK; +} + +__JIT_CODE("__unalign_jit_y2_dummy: {fnop; fnop; ld zero, sp}"); +static tilegx_bundle_bits jit_y2_dummy(void) +{ + extern tilegx_bundle_bits __unalign_jit_y2_dummy; + return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK; +} + +static tilegx_bundle_bits jit_y1_fnop(void) +{ + extern tilegx_bundle_bits __unalign_jit_y2_dummy; + return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK; +} + +__JIT_CODE("__unalign_jit_x1_st1_add: {st1_add r1, r0, 0}"); +static tilegx_bundle_bits jit_x1_st1_add(int ra, int rb, int imm8) +{ + extern tilegx_bundle_bits __unalign_jit_x1_st1_add; + return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) & + (~create_SrcA_X1(-1)) & + GX_INSN_X1_MASK) | create_SrcA_X1(ra) | + create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8); +} + +__JIT_CODE("__unalign_jit_x1_st: {crc32_8 r1, r0, r0; st r0, r0}"); +static tilegx_bundle_bits jit_x1_st(int ra, int rb) +{ + extern tilegx_bundle_bits __unalign_jit_x1_st; + return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) | + create_SrcA_X1(ra) | create_SrcB_X1(rb); +} + +__JIT_CODE("__unalign_jit_x1_st_add: {st_add r1, r0, 0}"); +static tilegx_bundle_bits jit_x1_st_add(int ra, int rb, int imm8) +{ + extern tilegx_bundle_bits __unalign_jit_x1_st_add; + return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) & + (~create_SrcA_X1(-1)) & + GX_INSN_X1_MASK) | create_SrcA_X1(ra) | + create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8); +} + +__JIT_CODE("__unalign_jit_x1_ld: {crc32_8 r1, r0, r0; ld r0, r0}"); +static tilegx_bundle_bits jit_x1_ld(int rd, int ra) +{ + extern tilegx_bundle_bits __unalign_jit_x1_ld; + return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) | + create_Dest_X1(rd) | create_SrcA_X1(ra); +} + +__JIT_CODE("__unalign_jit_x1_ld_add: {ld_add r1, r0, 0}"); +static tilegx_bundle_bits jit_x1_ld_add(int rd, int ra, int imm8) +{ + extern tilegx_bundle_bits __unalign_jit_x1_ld_add; + return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) & + (~create_Dest_X1(-1)) & + GX_INSN_X1_MASK) | create_Dest_X1(rd) | + create_SrcA_X1(ra) | create_Imm8_X1(imm8); +} + +__JIT_CODE("__unalign_jit_x0_bfexts: {bfexts r0, r0, 0, 0}"); +static tilegx_bundle_bits jit_x0_bfexts(int rd, int ra, int bfs, int bfe) +{ + extern tilegx_bundle_bits __unalign_jit_x0_bfexts; + return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) & + GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_BFStart_X0(bfs) | create_BFEnd_X0(bfe); +} + +__JIT_CODE("__unalign_jit_x0_bfextu: {bfextu r0, r0, 0, 0}"); +static tilegx_bundle_bits jit_x0_bfextu(int rd, int ra, int bfs, int bfe) +{ + extern tilegx_bundle_bits __unalign_jit_x0_bfextu; + return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) & + GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_BFStart_X0(bfs) | create_BFEnd_X0(bfe); +} + +__JIT_CODE("__unalign_jit_x1_addi: {bfextu r1, r1, 0, 0; addi r0, r0, 0}"); +static tilegx_bundle_bits jit_x1_addi(int rd, int ra, int imm8) +{ + extern tilegx_bundle_bits __unalign_jit_x1_addi; + return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) | + create_Dest_X1(rd) | create_SrcA_X1(ra) | + create_Imm8_X1(imm8); +} + +__JIT_CODE("__unalign_jit_x0_shrui: {shrui r0, r0, 0; iret}"); +static tilegx_bundle_bits jit_x0_shrui(int rd, int ra, int imm6) +{ + extern tilegx_bundle_bits __unalign_jit_x0_shrui; + return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) & + GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_ShAmt_X0(imm6); +} + +__JIT_CODE("__unalign_jit_x0_rotli: {rotli r0, r0, 0; iret}"); +static tilegx_bundle_bits jit_x0_rotli(int rd, int ra, int imm6) +{ + extern tilegx_bundle_bits __unalign_jit_x0_rotli; + return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) & + GX_INSN_X0_MASK) | + create_Dest_X0(rd) | create_SrcA_X0(ra) | + create_ShAmt_X0(imm6); +} + +__JIT_CODE("__unalign_jit_x1_bnezt: {bnezt r0, __unalign_jit_x1_bnezt}"); +static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff) +{ + extern tilegx_bundle_bits __unalign_jit_x1_bnezt; + return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) & + GX_INSN_X1_MASK) | + create_SrcA_X1(ra) | create_BrOff_X1(broff); +} + +#undef __JIT_CODE + +/* + * This function generates unalign fixup JIT. + * + * We first find unalign load/store instruction's destination, source + * registers: ra, rb and rd. and 3 scratch registers by calling + * find_regs(...). 3 scratch clobbers should not alias with any register + * used in the fault bundle. Then analyze the fault bundle to determine + * if it's a load or store, operand width, branch or address increment etc. + * At last generated JIT is copied into JIT code area in user space. + */ + +static +void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle, + int align_ctl) +{ + struct thread_info *info = current_thread_info(); + struct unaligned_jit_fragment frag; + struct unaligned_jit_fragment *jit_code_area; + tilegx_bundle_bits bundle_2 = 0; + /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */ + bool bundle_2_enable = true; + uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1; + /* + * Indicate if the unalign access + * instruction's registers hit with + * others in the same bundle. + */ + bool alias = false; + bool load_n_store = true; + bool load_store_signed = false; + unsigned int load_store_size = 8; + bool y1_br = false; /* True, for a branch in same bundle at Y1.*/ + int y1_br_reg = 0; + /* True for link operation. i.e. jalr or lnk at Y1 */ + bool y1_lr = false; + int y1_lr_reg = 0; + bool x1_add = false;/* True, for load/store ADD instruction at X1*/ + int x1_add_imm8 = 0; + bool unexpected = false; + int n = 0, k; + + jit_code_area = + (struct unaligned_jit_fragment *)(info->unalign_jit_base); + + memset((void *)&frag, 0, sizeof(frag)); + + /* 0: X mode, Otherwise: Y mode. */ + if (bundle & TILEGX_BUNDLE_MODE_MASK) { + unsigned int mod, opcode; + + if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 && + get_RRROpcodeExtension_Y1(bundle) == + UNARY_RRR_1_OPCODE_Y1) { + + opcode = get_UnaryOpcodeExtension_Y1(bundle); + + /* + * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1 + * pipeline. + */ + switch (opcode) { + case JALR_UNARY_OPCODE_Y1: + case JALRP_UNARY_OPCODE_Y1: + y1_lr = true; + y1_lr_reg = 55; /* Link register. */ + /* FALLTHROUGH */ + case JR_UNARY_OPCODE_Y1: + case JRP_UNARY_OPCODE_Y1: + y1_br = true; + y1_br_reg = get_SrcA_Y1(bundle); + break; + case LNK_UNARY_OPCODE_Y1: + /* "lnk" at Y1 pipeline. */ + y1_lr = true; + y1_lr_reg = get_Dest_Y1(bundle); + break; + } + } + + opcode = get_Opcode_Y2(bundle); + mod = get_Mode(bundle); + + /* + * bundle_2 is bundle after making Y2 as a dummy operation + * - ld zero, sp + */ + bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy(); + + /* Make Y1 as fnop if Y1 is a branch or lnk operation. */ + if (y1_br || y1_lr) { + bundle_2 &= ~(GX_INSN_Y1_MASK); + bundle_2 |= jit_y1_fnop(); + } + + if (is_y0_y1_nop(bundle_2)) + bundle_2_enable = false; + + if (mod == MODE_OPCODE_YC2) { + /* Store. */ + load_n_store = false; + load_store_size = 1 << opcode; + load_store_signed = false; + find_regs(bundle, 0, &ra, &rb, &clob1, &clob2, + &clob3, &alias); + if (load_store_size > 8) + unexpected = true; + } else { + /* Load. */ + load_n_store = true; + if (mod == MODE_OPCODE_YB2) { + switch (opcode) { + case LD_OPCODE_Y2: + load_store_signed = false; + load_store_size = 8; + break; + case LD4S_OPCODE_Y2: + load_store_signed = true; + load_store_size = 4; + break; + case LD4U_OPCODE_Y2: + load_store_signed = false; + load_store_size = 4; + break; + default: + unexpected = true; + } + } else if (mod == MODE_OPCODE_YA2) { + if (opcode == LD2S_OPCODE_Y2) { + load_store_signed = true; + load_store_size = 2; + } else if (opcode == LD2U_OPCODE_Y2) { + load_store_signed = false; + load_store_size = 2; + } else + unexpected = true; + } else + unexpected = true; + find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2, + &clob3, &alias); + } + } else { + unsigned int opcode; + + /* bundle_2 is bundle after making X1 as "fnop". */ + bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop(); + + if (is_x0_x1_nop(bundle_2)) + bundle_2_enable = false; + + if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) { + opcode = get_UnaryOpcodeExtension_X1(bundle); + + if (get_RRROpcodeExtension_X1(bundle) == + UNARY_RRR_0_OPCODE_X1) { + load_n_store = true; + find_regs(bundle, &rd, &ra, &rb, &clob1, + &clob2, &clob3, &alias); + + switch (opcode) { + case LD_UNARY_OPCODE_X1: + load_store_signed = false; + load_store_size = 8; + break; + case LD4S_UNARY_OPCODE_X1: + load_store_signed = true; + /* FALLTHROUGH */ + case LD4U_UNARY_OPCODE_X1: + load_store_size = 4; + break; + + case LD2S_UNARY_OPCODE_X1: + load_store_signed = true; + /* FALLTHROUGH */ + case LD2U_UNARY_OPCODE_X1: + load_store_size = 2; + break; + default: + unexpected = true; + } + } else { + load_n_store = false; + load_store_signed = false; + find_regs(bundle, 0, &ra, &rb, + &clob1, &clob2, &clob3, + &alias); + + opcode = get_RRROpcodeExtension_X1(bundle); + switch (opcode) { + case ST_RRR_0_OPCODE_X1: + load_store_size = 8; + break; + case ST4_RRR_0_OPCODE_X1: + load_store_size = 4; + break; + case ST2_RRR_0_OPCODE_X1: + load_store_size = 2; + break; + default: + unexpected = true; + } + } + } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) { + load_n_store = true; + opcode = get_Imm8OpcodeExtension_X1(bundle); + switch (opcode) { + case LD_ADD_IMM8_OPCODE_X1: + load_store_size = 8; + break; + + case LD4S_ADD_IMM8_OPCODE_X1: + load_store_signed = true; + /* FALLTHROUGH */ + case LD4U_ADD_IMM8_OPCODE_X1: + load_store_size = 4; + break; + + case LD2S_ADD_IMM8_OPCODE_X1: + load_store_signed = true; + /* FALLTHROUGH */ + case LD2U_ADD_IMM8_OPCODE_X1: + load_store_size = 2; + break; + + case ST_ADD_IMM8_OPCODE_X1: + load_n_store = false; + load_store_size = 8; + break; + case ST4_ADD_IMM8_OPCODE_X1: + load_n_store = false; + load_store_size = 4; + break; + case ST2_ADD_IMM8_OPCODE_X1: + load_n_store = false; + load_store_size = 2; + break; + default: + unexpected = true; + } + + if (!unexpected) { + x1_add = true; + if (load_n_store) + x1_add_imm8 = get_Imm8_X1(bundle); + else + x1_add_imm8 = get_Dest_Imm8_X1(bundle); + } + + find_regs(bundle, load_n_store ? (&rd) : NULL, + &ra, &rb, &clob1, &clob2, &clob3, &alias); + } else + unexpected = true; + } + + /* + * Some sanity check for register numbers extracted from fault bundle. + */ + if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true) + unexpected = true; + + /* Give warning if register ra has an aligned address. */ + if (!unexpected) + WARN_ON(!((load_store_size - 1) & (regs->regs[ra]))); + + + /* + * Fault came from kernel space, here we only need take care of + * unaligned "get_user/put_user" macros defined in "uaccess.h". + * Basically, we will handle bundle like this: + * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0} + * (Refer to file "arch/tile/include/asm/uaccess.h" for details). + * For either load or store, byte-wise operation is performed by calling + * get_user() or put_user(). If the macro returns non-zero value, + * set the value to rx, otherwise set zero to rx. Finally make pc point + * to next bundle and return. + */ + + if (EX1_PL(regs->ex1) != USER_PL) { + + unsigned long rx = 0; + unsigned long x = 0, ret = 0; + + if (y1_br || y1_lr || x1_add || + (load_store_signed != + (load_n_store && load_store_size == 4))) { + /* No branch, link, wrong sign-ext or load/store add. */ + unexpected = true; + } else if (!unexpected) { + if (bundle & TILEGX_BUNDLE_MODE_MASK) { + /* + * Fault bundle is Y mode. + * Check if the Y1 and Y0 is the form of + * { movei rx, 0; nop/fnop }, if yes, + * find the rx. + */ + + if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1) + && (get_SrcA_Y1(bundle) == TREG_ZERO) && + (get_Imm8_Y1(bundle) == 0) && + is_bundle_y0_nop(bundle)) { + rx = get_Dest_Y1(bundle); + } else if ((get_Opcode_Y0(bundle) == + ADDI_OPCODE_Y0) && + (get_SrcA_Y0(bundle) == TREG_ZERO) && + (get_Imm8_Y0(bundle) == 0) && + is_bundle_y1_nop(bundle)) { + rx = get_Dest_Y0(bundle); + } else { + unexpected = true; + } + } else { + /* + * Fault bundle is X mode. + * Check if the X0 is 'movei rx, 0', + * if yes, find the rx. + */ + + if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0) + && (get_Imm8OpcodeExtension_X0(bundle) == + ADDI_IMM8_OPCODE_X0) && + (get_SrcA_X0(bundle) == TREG_ZERO) && + (get_Imm8_X0(bundle) == 0)) { + rx = get_Dest_X0(bundle); + } else { + unexpected = true; + } + } + + /* rx should be less than 56. */ + if (!unexpected && (rx >= 56)) + unexpected = true; + } + + if (!search_exception_tables(regs->pc)) { + /* No fixup in the exception tables for the pc. */ + unexpected = true; + } + + if (unexpected) { + /* Unexpected unalign kernel fault. */ + struct task_struct *tsk = validate_current(); + + bust_spinlocks(1); + + show_regs(regs); + + if (unlikely(tsk->pid < 2)) { + panic("Kernel unalign fault running %s!", + tsk->pid ? "init" : "the idle task"); + } +#ifdef SUPPORT_DIE + die("Oops", regs); +#endif + bust_spinlocks(1); + + do_group_exit(SIGKILL); + + } else { + unsigned long i, b = 0; + unsigned char *ptr = + (unsigned char *)regs->regs[ra]; + if (load_n_store) { + /* handle get_user(x, ptr) */ + for (i = 0; i < load_store_size; i++) { + ret = get_user(b, ptr++); + if (!ret) { + /* Success! update x. */ +#ifdef __LITTLE_ENDIAN + x |= (b << (8 * i)); +#else + x <<= 8; + x |= b; +#endif /* __LITTLE_ENDIAN */ + } else { + x = 0; + break; + } + } + + /* Sign-extend 4-byte loads. */ + if (load_store_size == 4) + x = (long)(int)x; + + /* Set register rd. */ + regs->regs[rd] = x; + + /* Set register rx. */ + regs->regs[rx] = ret; + + /* Bump pc. */ + regs->pc += 8; + + } else { + /* Handle put_user(x, ptr) */ + x = regs->regs[rb]; +#ifdef __LITTLE_ENDIAN + b = x; +#else + /* + * Swap x in order to store x from low + * to high memory same as the + * little-endian case. + */ + switch (load_store_size) { + case 8: + b = swab64(x); + break; + case 4: + b = swab32(x); + break; + case 2: + b = swab16(x); + break; + } +#endif /* __LITTLE_ENDIAN */ + for (i = 0; i < load_store_size; i++) { + ret = put_user(b, ptr++); + if (ret) + break; + /* Success! shift 1 byte. */ + b >>= 8; + } + /* Set register rx. */ + regs->regs[rx] = ret; + + /* Bump pc. */ + regs->pc += 8; + } + } + + unaligned_fixup_count++; + + if (unaligned_printk) { + pr_info("%s/%d. Unalign fixup for kernel access " + "to userspace %lx.", + current->comm, current->pid, regs->regs[ra]); + } + + /* Done! Return to the exception handler. */ + return; + } + + if ((align_ctl == 0) || unexpected) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = (unsigned char __user *)0 + }; + if (unaligned_printk) + pr_info("Unalign bundle: unexp @%llx, %llx", + (unsigned long long)regs->pc, + (unsigned long long)bundle); + + if (ra < 56) { + unsigned long uaa = (unsigned long)regs->regs[ra]; + /* Set bus Address. */ + info.si_addr = (unsigned char __user *)uaa; + } + + unaligned_fixup_count++; + + trace_unhandled_signal("unaligned fixup trap", regs, + (unsigned long)info.si_addr, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return; + } + +#ifdef __LITTLE_ENDIAN +#define UA_FIXUP_ADDR_DELTA 1 +#define UA_FIXUP_BFEXT_START(_B_) 0 +#define UA_FIXUP_BFEXT_END(_B_) (8 * (_B_) - 1) +#else /* __BIG_ENDIAN */ +#define UA_FIXUP_ADDR_DELTA -1 +#define UA_FIXUP_BFEXT_START(_B_) (64 - 8 * (_B_)) +#define UA_FIXUP_BFEXT_END(_B_) 63 +#endif /* __LITTLE_ENDIAN */ + + + + if ((ra != rb) && (rd != TREG_SP) && !alias && + !y1_br && !y1_lr && !x1_add) { + /* + * Simple case: ra != rb and no register alias found, + * and no branch or link. This will be the majority. + * We can do a little better for simplae case than the + * generic scheme below. + */ + if (!load_n_store) { + /* + * Simple store: ra != rb, no need for scratch register. + * Just store and rotate to right bytewise. + */ +#ifdef __BIG_ENDIAN + frag.insn[n++] = + jit_x0_addi(ra, ra, load_store_size - 1) | + jit_x1_fnop(); +#endif /* __BIG_ENDIAN */ + for (k = 0; k < load_store_size; k++) { + /* Store a byte. */ + frag.insn[n++] = + jit_x0_rotli(rb, rb, 56) | + jit_x1_st1_add(ra, rb, + UA_FIXUP_ADDR_DELTA); + } +#ifdef __BIG_ENDIAN + frag.insn[n] = jit_x1_addi(ra, ra, 1); +#else + frag.insn[n] = jit_x1_addi(ra, ra, + -1 * load_store_size); +#endif /* __LITTLE_ENDIAN */ + + if (load_store_size == 8) { + frag.insn[n] |= jit_x0_fnop(); + } else if (load_store_size == 4) { + frag.insn[n] |= jit_x0_rotli(rb, rb, 32); + } else { /* = 2 */ + frag.insn[n] |= jit_x0_rotli(rb, rb, 16); + } + n++; + if (bundle_2_enable) + frag.insn[n++] = bundle_2; + frag.insn[n++] = jit_x0_fnop() | jit_x1_iret(); + } else { + if (rd == ra) { + /* Use two clobber registers: clob1/2. */ + frag.insn[n++] = + jit_x0_addi(TREG_SP, TREG_SP, -16) | + jit_x1_fnop(); + frag.insn[n++] = + jit_x0_addi(clob1, ra, 7) | + jit_x1_st_add(TREG_SP, clob1, -8); + frag.insn[n++] = + jit_x0_addi(clob2, ra, 0) | + jit_x1_st(TREG_SP, clob2); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ldna(rd, ra); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ldna(clob1, clob1); + /* + * Note: we must make sure that rd must not + * be sp. Recover clob1/2 from stack. + */ + frag.insn[n++] = + jit_x0_dblalign(rd, clob1, clob2) | + jit_x1_ld_add(clob2, TREG_SP, 8); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ld_add(clob1, TREG_SP, 16); + } else { + /* Use one clobber register: clob1 only. */ + frag.insn[n++] = + jit_x0_addi(TREG_SP, TREG_SP, -16) | + jit_x1_fnop(); + frag.insn[n++] = + jit_x0_addi(clob1, ra, 7) | + jit_x1_st(TREG_SP, clob1); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ldna(rd, ra); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ldna(clob1, clob1); + /* + * Note: we must make sure that rd must not + * be sp. Recover clob1 from stack. + */ + frag.insn[n++] = + jit_x0_dblalign(rd, clob1, ra) | + jit_x1_ld_add(clob1, TREG_SP, 16); + } + + if (bundle_2_enable) + frag.insn[n++] = bundle_2; + /* + * For non 8-byte load, extract corresponding bytes and + * signed extension. + */ + if (load_store_size == 4) { + if (load_store_signed) + frag.insn[n++] = + jit_x0_bfexts( + rd, rd, + UA_FIXUP_BFEXT_START(4), + UA_FIXUP_BFEXT_END(4)) | + jit_x1_fnop(); + else + frag.insn[n++] = + jit_x0_bfextu( + rd, rd, + UA_FIXUP_BFEXT_START(4), + UA_FIXUP_BFEXT_END(4)) | + jit_x1_fnop(); + } else if (load_store_size == 2) { + if (load_store_signed) + frag.insn[n++] = + jit_x0_bfexts( + rd, rd, + UA_FIXUP_BFEXT_START(2), + UA_FIXUP_BFEXT_END(2)) | + jit_x1_fnop(); + else + frag.insn[n++] = + jit_x0_bfextu( + rd, rd, + UA_FIXUP_BFEXT_START(2), + UA_FIXUP_BFEXT_END(2)) | + jit_x1_fnop(); + } + + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_iret(); + } + } else if (!load_n_store) { + + /* + * Generic memory store cases: use 3 clobber registers. + * + * Alloc space for saveing clob2,1,3 on user's stack. + * register clob3 points to where clob2 saved, followed by + * clob1 and 3 from high to low memory. + */ + frag.insn[n++] = + jit_x0_addi(TREG_SP, TREG_SP, -32) | + jit_x1_fnop(); + frag.insn[n++] = + jit_x0_addi(clob3, TREG_SP, 16) | + jit_x1_st_add(TREG_SP, clob3, 8); +#ifdef __LITTLE_ENDIAN + frag.insn[n++] = + jit_x0_addi(clob1, ra, 0) | + jit_x1_st_add(TREG_SP, clob1, 8); +#else + frag.insn[n++] = + jit_x0_addi(clob1, ra, load_store_size - 1) | + jit_x1_st_add(TREG_SP, clob1, 8); +#endif + if (load_store_size == 8) { + /* + * We save one byte a time, not for fast, but compact + * code. After each store, data source register shift + * right one byte. unchanged after 8 stores. + */ + frag.insn[n++] = + jit_x0_addi(clob2, TREG_ZERO, 7) | + jit_x1_st_add(TREG_SP, clob2, 16); + frag.insn[n++] = + jit_x0_rotli(rb, rb, 56) | + jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA); + frag.insn[n++] = + jit_x0_addi(clob2, clob2, -1) | + jit_x1_bnezt(clob2, -1); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_addi(clob2, y1_br_reg, 0); + } else if (load_store_size == 4) { + frag.insn[n++] = + jit_x0_addi(clob2, TREG_ZERO, 3) | + jit_x1_st_add(TREG_SP, clob2, 16); + frag.insn[n++] = + jit_x0_rotli(rb, rb, 56) | + jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA); + frag.insn[n++] = + jit_x0_addi(clob2, clob2, -1) | + jit_x1_bnezt(clob2, -1); + /* + * same as 8-byte case, but need shift another 4 + * byte to recover rb for 4-byte store. + */ + frag.insn[n++] = jit_x0_rotli(rb, rb, 32) | + jit_x1_addi(clob2, y1_br_reg, 0); + } else { /* =2 */ + frag.insn[n++] = + jit_x0_addi(clob2, rb, 0) | + jit_x1_st_add(TREG_SP, clob2, 16); + for (k = 0; k < 2; k++) { + frag.insn[n++] = + jit_x0_shrui(rb, rb, 8) | + jit_x1_st1_add(clob1, rb, + UA_FIXUP_ADDR_DELTA); + } + frag.insn[n++] = + jit_x0_addi(rb, clob2, 0) | + jit_x1_addi(clob2, y1_br_reg, 0); + } + + if (bundle_2_enable) + frag.insn[n++] = bundle_2; + + if (y1_lr) { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_mfspr(y1_lr_reg, + SPR_EX_CONTEXT_0_0); + } + if (y1_br) { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_mtspr(SPR_EX_CONTEXT_0_0, + clob2); + } + if (x1_add) { + frag.insn[n++] = + jit_x0_addi(ra, ra, x1_add_imm8) | + jit_x1_ld_add(clob2, clob3, -8); + } else { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ld_add(clob2, clob3, -8); + } + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ld_add(clob1, clob3, -8); + frag.insn[n++] = jit_x0_fnop() | jit_x1_ld(clob3, clob3); + frag.insn[n++] = jit_x0_fnop() | jit_x1_iret(); + + } else { + /* + * Generic memory load cases. + * + * Alloc space for saveing clob1,2,3 on user's stack. + * register clob3 points to where clob1 saved, followed + * by clob2 and 3 from high to low memory. + */ + + frag.insn[n++] = + jit_x0_addi(TREG_SP, TREG_SP, -32) | + jit_x1_fnop(); + frag.insn[n++] = + jit_x0_addi(clob3, TREG_SP, 16) | + jit_x1_st_add(TREG_SP, clob3, 8); + frag.insn[n++] = + jit_x0_addi(clob2, ra, 0) | + jit_x1_st_add(TREG_SP, clob2, 8); + + if (y1_br) { + frag.insn[n++] = + jit_x0_addi(clob1, y1_br_reg, 0) | + jit_x1_st_add(TREG_SP, clob1, 16); + } else { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_st_add(TREG_SP, clob1, 16); + } + + if (bundle_2_enable) + frag.insn[n++] = bundle_2; + + if (y1_lr) { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_mfspr(y1_lr_reg, + SPR_EX_CONTEXT_0_0); + } + + if (y1_br) { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_mtspr(SPR_EX_CONTEXT_0_0, + clob1); + } + + frag.insn[n++] = + jit_x0_addi(clob1, clob2, 7) | + jit_x1_ldna(rd, clob2); + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ldna(clob1, clob1); + frag.insn[n++] = + jit_x0_dblalign(rd, clob1, clob2) | + jit_x1_ld_add(clob1, clob3, -8); + if (x1_add) { + frag.insn[n++] = + jit_x0_addi(ra, ra, x1_add_imm8) | + jit_x1_ld_add(clob2, clob3, -8); + } else { + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ld_add(clob2, clob3, -8); + } + + frag.insn[n++] = + jit_x0_fnop() | + jit_x1_ld(clob3, clob3); + + if (load_store_size == 4) { + if (load_store_signed) + frag.insn[n++] = + jit_x0_bfexts( + rd, rd, + UA_FIXUP_BFEXT_START(4), + UA_FIXUP_BFEXT_END(4)) | + jit_x1_fnop(); + else + frag.insn[n++] = + jit_x0_bfextu( + rd, rd, + UA_FIXUP_BFEXT_START(4), + UA_FIXUP_BFEXT_END(4)) | + jit_x1_fnop(); + } else if (load_store_size == 2) { + if (load_store_signed) + frag.insn[n++] = + jit_x0_bfexts( + rd, rd, + UA_FIXUP_BFEXT_START(2), + UA_FIXUP_BFEXT_END(2)) | + jit_x1_fnop(); + else + frag.insn[n++] = + jit_x0_bfextu( + rd, rd, + UA_FIXUP_BFEXT_START(2), + UA_FIXUP_BFEXT_END(2)) | + jit_x1_fnop(); + } + + frag.insn[n++] = jit_x0_fnop() | jit_x1_iret(); + } + + /* Max JIT bundle count is 14. */ + WARN_ON(n > 14); + + if (!unexpected) { + int status = 0; + int idx = (regs->pc >> 3) & + ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1); + + frag.pc = regs->pc; + frag.bundle = bundle; + + if (unaligned_printk) { + pr_info("%s/%d, Unalign fixup: pc=%lx " + "bundle=%lx %d %d %d %d %d %d %d %d.", + current->comm, current->pid, + (unsigned long)frag.pc, + (unsigned long)frag.bundle, + (int)alias, (int)rd, (int)ra, + (int)rb, (int)bundle_2_enable, + (int)y1_lr, (int)y1_br, (int)x1_add); + + for (k = 0; k < n; k += 2) + pr_info("[%d] %016llx %016llx", k, + (unsigned long long)frag.insn[k], + (unsigned long long)frag.insn[k+1]); + } + + /* Swap bundle byte order for big endian sys. */ +#ifdef __BIG_ENDIAN + frag.bundle = GX_INSN_BSWAP(frag.bundle); + for (k = 0; k < n; k++) + frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]); +#endif /* __BIG_ENDIAN */ + + status = copy_to_user((void __user *)&jit_code_area[idx], + &frag, sizeof(frag)); + if (status) { + /* Fail to copy JIT into user land. send SIGSEGV. */ + siginfo_t info = { + .si_signo = SIGSEGV, + .si_code = SEGV_MAPERR, + .si_addr = (void __user *)&jit_code_area[idx] + }; + + pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx", + current->pid, current->comm, + (unsigned long long)&jit_code_area[idx]); + + trace_unhandled_signal("segfault in unalign fixup", + regs, + (unsigned long)info.si_addr, + SIGSEGV); + force_sig_info(info.si_signo, &info, current); + return; + } + + + /* Do a cheaper increment, not accurate. */ + unaligned_fixup_count++; + __flush_icache_range((unsigned long)&jit_code_area[idx], + (unsigned long)&jit_code_area[idx] + + sizeof(frag)); + + /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/ + __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8); + __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0)); + + /* Modify pc at the start of new JIT. */ + regs->pc = (unsigned long)&jit_code_area[idx].insn[0]; + /* Set ICS in SPR_EX_CONTEXT_K_1. */ + regs->ex1 = PL_ICS_EX1(USER_PL, 1); + } +} + + +/* + * C function to generate unalign data JIT. Called from unalign data + * interrupt handler. + * + * First check if unalign fix is disabled or exception did not not come from + * user space or sp register points to unalign address, if true, generate a + * SIGBUS. Then map a page into user space as JIT area if it is not mapped + * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return + * back to exception handler. + * + * The exception handler will "iret" to new generated JIT code after + * restoring caller saved registers. In theory, the JIT code will perform + * another "iret" to resume user's program. + */ + +void do_unaligned(struct pt_regs *regs, int vecnum) +{ + tilegx_bundle_bits __user *pc; + tilegx_bundle_bits bundle; + struct thread_info *info = current_thread_info(); + int align_ctl; + + /* Checks the per-process unaligned JIT flags */ + align_ctl = unaligned_fixup; + switch (task_thread_info(current)->align_ctl) { + case PR_UNALIGN_NOPRINT: + align_ctl = 1; + break; + case PR_UNALIGN_SIGBUS: + align_ctl = 0; + break; + } + + /* Enable iterrupt in order to access user land. */ + local_irq_enable(); + + /* + * The fault came from kernel space. Two choices: + * (a) unaligned_fixup < 1, we will first call get/put_user fixup + * to return -EFAULT. If no fixup, simply panic the kernel. + * (b) unaligned_fixup >=1, we will try to fix the unaligned access + * if it was triggered by get_user/put_user() macros. Panic the + * kernel if it is not fixable. + */ + + if (EX1_PL(regs->ex1) != USER_PL) { + + if (align_ctl < 1) { + unaligned_fixup_count++; + /* If exception came from kernel, try fix it up. */ + if (fixup_exception(regs)) { + if (unaligned_printk) + pr_info("Unalign fixup: %d %llx @%llx", + (int)unaligned_fixup, + (unsigned long long)regs->ex1, + (unsigned long long)regs->pc); + return; + } + /* Not fixable. Go panic. */ + panic("Unalign exception in Kernel. pc=%lx", + regs->pc); + return; + } else { + /* + * Try to fix the exception. If we can't, panic the + * kernel. + */ + bundle = GX_INSN_BSWAP( + *((tilegx_bundle_bits *)(regs->pc))); + jit_bundle_gen(regs, bundle, align_ctl); + return; + } + } + + /* + * Fault came from user with ICS or stack is not aligned. + * If so, we will trigger SIGBUS. + */ + if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = (unsigned char __user *)0 + }; + + if (unaligned_printk) + pr_info("Unalign fixup: %d %llx @%llx", + (int)unaligned_fixup, + (unsigned long long)regs->ex1, + (unsigned long long)regs->pc); + + unaligned_fixup_count++; + + trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return; + } + + + /* Read the bundle casued the exception! */ + pc = (tilegx_bundle_bits __user *)(regs->pc); + if (get_user(bundle, pc) != 0) { + /* Probably never be here since pc is valid user address.*/ + siginfo_t info = { + .si_signo = SIGSEGV, + .si_code = SEGV_MAPERR, + .si_addr = (void __user *)pc + }; + pr_err("Couldn't read instruction at %p trying to step\n", pc); + trace_unhandled_signal("segfault in unalign fixup", regs, + (unsigned long)info.si_addr, SIGSEGV); + force_sig_info(info.si_signo, &info, current); + return; + } + + if (!info->unalign_jit_base) { + void __user *user_page; + + /* + * Allocate a page in userland. + * For 64-bit processes we try to place the mapping far + * from anything else that might be going on (specifically + * 64 GB below the top of the user address space). If it + * happens not to be possible to put it there, it's OK; + * the kernel will choose another location and we'll + * remember it for later. + */ + if (is_compat_task()) + user_page = NULL; + else + user_page = (void __user *)(TASK_SIZE - (1UL << 36)) + + (current->pid << PAGE_SHIFT); + + user_page = (void __user *) vm_mmap(NULL, + (unsigned long)user_page, + PAGE_SIZE, + PROT_EXEC | PROT_READ | + PROT_WRITE, +#ifdef CONFIG_HOMECACHE + MAP_CACHE_HOME_TASK | +#endif + MAP_PRIVATE | + MAP_ANONYMOUS, + 0); + + if (IS_ERR((void __force *)user_page)) { + pr_err("Out of kernel pages trying do_mmap.\n"); + return; + } + + /* Save the address in the thread_info struct */ + info->unalign_jit_base = user_page; + if (unaligned_printk) + pr_info("Unalign bundle: %d:%d, allocate page @%llx", + raw_smp_processor_id(), current->pid, + (unsigned long long)user_page); + } + + /* Generate unalign JIT */ + jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl); +} + +#endif /* __tilegx__ */ diff --git a/arch/tile/kernel/usb.c b/arch/tile/kernel/usb.c new file mode 100644 index 00000000000..5af8debc6a7 --- /dev/null +++ b/arch/tile/kernel/usb.c @@ -0,0 +1,69 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Register the Tile-Gx USB interfaces as platform devices. + * + * The actual USB driver is just some glue (in + * drivers/usb/host/[eo]hci-tilegx.c) which makes the registers available + * to the standard kernel EHCI and OHCI drivers. + */ + +#include <linux/dma-mapping.h> +#include <linux/platform_device.h> +#include <linux/usb/tilegx.h> +#include <linux/types.h> + +static u64 ehci_dmamask = DMA_BIT_MASK(32); + +#define USB_HOST_DEF(unit, type, dmamask) \ + static struct \ + tilegx_usb_platform_data tilegx_usb_platform_data_ ## type ## \ + hci ## unit = { \ + .dev_index = unit, \ + }; \ + \ + static struct platform_device tilegx_usb_ ## type ## hci ## unit = { \ + .name = "tilegx-" #type "hci", \ + .id = unit, \ + .dev = { \ + .dma_mask = dmamask, \ + .coherent_dma_mask = DMA_BIT_MASK(32), \ + .platform_data = \ + &tilegx_usb_platform_data_ ## type ## hci ## \ + unit, \ + }, \ + }; + +USB_HOST_DEF(0, e, &ehci_dmamask) +USB_HOST_DEF(0, o, NULL) +USB_HOST_DEF(1, e, &ehci_dmamask) +USB_HOST_DEF(1, o, NULL) + +#undef USB_HOST_DEF + +static struct platform_device *tilegx_usb_devices[] __initdata = { + &tilegx_usb_ehci0, + &tilegx_usb_ehci1, + &tilegx_usb_ohci0, + &tilegx_usb_ohci1, +}; + +/** Add our set of possible USB devices. */ +static int __init tilegx_usb_init(void) +{ + platform_add_devices(tilegx_usb_devices, + ARRAY_SIZE(tilegx_usb_devices)); + + return 0; +} +arch_initcall(tilegx_usb_init); diff --git a/arch/tile/kernel/vdso.c b/arch/tile/kernel/vdso.c new file mode 100644 index 00000000000..1533af24106 --- /dev/null +++ b/arch/tile/kernel/vdso.c @@ -0,0 +1,212 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/binfmts.h> +#include <linux/compat.h> +#include <linux/elf.h> +#include <linux/mm.h> +#include <linux/pagemap.h> + +#include <asm/vdso.h> +#include <asm/mman.h> +#include <asm/sections.h> + +#include <arch/sim.h> + +/* The alignment of the vDSO. */ +#define VDSO_ALIGNMENT PAGE_SIZE + + +static unsigned int vdso_pages; +static struct page **vdso_pagelist; + +#ifdef CONFIG_COMPAT +static unsigned int vdso32_pages; +static struct page **vdso32_pagelist; +#endif +static int vdso_ready; + +/* + * The vdso data page. + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; + +struct vdso_data *vdso_data = &vdso_data_store.data; + +static unsigned int __read_mostly vdso_enabled = 1; + +static struct page **vdso_setup(void *vdso_kbase, unsigned int pages) +{ + int i; + struct page **pagelist; + + pagelist = kzalloc(sizeof(struct page *) * (pages + 1), GFP_KERNEL); + BUG_ON(pagelist == NULL); + for (i = 0; i < pages - 1; i++) { + struct page *pg = virt_to_page(vdso_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + pagelist[i] = pg; + } + pagelist[pages - 1] = virt_to_page(vdso_data); + pagelist[pages] = NULL; + + return pagelist; +} + +static int __init vdso_init(void) +{ + int data_pages = sizeof(vdso_data_store) >> PAGE_SHIFT; + + /* + * We can disable vDSO support generally, but we need to retain + * one page to support the two-bundle (16-byte) rt_sigreturn path. + */ + if (!vdso_enabled) { + size_t offset = (unsigned long)&__vdso_rt_sigreturn; + static struct page *sigret_page; + sigret_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + BUG_ON(sigret_page == NULL); + vdso_pagelist = &sigret_page; + vdso_pages = 1; + BUG_ON(offset >= PAGE_SIZE); + memcpy(page_address(sigret_page) + offset, + vdso_start + offset, 16); +#ifdef CONFIG_COMPAT + vdso32_pages = vdso_pages; + vdso32_pagelist = vdso_pagelist; +#endif + vdso_ready = 1; + return 0; + } + + vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; + vdso_pages += data_pages; + vdso_pagelist = vdso_setup(vdso_start, vdso_pages); + +#ifdef CONFIG_COMPAT + vdso32_pages = (vdso32_end - vdso32_start) >> PAGE_SHIFT; + vdso32_pages += data_pages; + vdso32_pagelist = vdso_setup(vdso32_start, vdso32_pages); +#endif + + smp_wmb(); + vdso_ready = 1; + + return 0; +} +arch_initcall(vdso_init); + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == VDSO_BASE) + return "[vdso]"; +#ifndef __tilegx__ + if (vma->vm_start == MEM_USER_INTRPT) + return "[intrpt]"; +#endif + return NULL; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return NULL; +} + +int in_gate_area(struct mm_struct *mm, unsigned long address) +{ + return 0; +} + +int in_gate_area_no_mm(unsigned long address) +{ + return 0; +} + +int setup_vdso_pages(void) +{ + struct page **pagelist; + unsigned long pages; + struct mm_struct *mm = current->mm; + unsigned long vdso_base = 0; + int retval = 0; + + if (!vdso_ready) + return 0; + + mm->context.vdso_base = 0; + + pagelist = vdso_pagelist; + pages = vdso_pages; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + pagelist = vdso32_pagelist; + pages = vdso32_pages; + } +#endif + + /* + * vDSO has a problem and was disabled, just don't "enable" it for the + * process. + */ + if (pages == 0) + return 0; + + vdso_base = get_unmapped_area(NULL, vdso_base, + (pages << PAGE_SHIFT) + + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), + 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + retval = vdso_base; + return retval; + } + + /* Add required alignment. */ + vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); + + /* + * Put vDSO base into mm struct. We need to do this before calling + * install_special_mapping or the perf counter mmap tracking code + * will fail to recognise it as a vDSO (since arch_vma_name fails). + */ + mm->context.vdso_base = vdso_base; + + /* + * our vma flags don't have VM_WRITE so by default, the process isn't + * allowed to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW on + * those pages but it's then your responsibility to never do that on + * the "data" page of the vDSO or you'll stop getting kernel updates + * and your nice userland gettimeofday will be totally dead. + * It's fine to use that for setting breakpoints in the vDSO code + * pages though + */ + retval = install_special_mapping(mm, vdso_base, + pages << PAGE_SHIFT, + VM_READ|VM_EXEC | + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + pagelist); + if (retval) + mm->context.vdso_base = 0; + + return retval; +} + +static __init int vdso_func(char *s) +{ + return kstrtouint(s, 0, &vdso_enabled); +} +__setup("vdso=", vdso_func); diff --git a/arch/tile/kernel/vdso/Makefile b/arch/tile/kernel/vdso/Makefile new file mode 100644 index 00000000000..a025f63d54c --- /dev/null +++ b/arch/tile/kernel/vdso/Makefile @@ -0,0 +1,118 @@ +# Symbols present in the vdso +vdso-syms = rt_sigreturn gettimeofday + +# Files to link into the vdso +obj-vdso = $(patsubst %, v%.o, $(vdso-syms)) + +# Build rules +targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) + +# vdso32 is only for tilegx -m32 compat task. +VDSO32-$(CONFIG_COMPAT) := y + +obj-y += vdso.o +obj-$(VDSO32-y) += vdso32.o +extra-y += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# vDSO code runs in userspace and -pg doesn't help with profiling anyway. +CFLAGS_REMOVE_vdso.o = -pg +CFLAGS_REMOVE_vdso32.o = -pg +CFLAGS_REMOVE_vrt_sigreturn.o = -pg +CFLAGS_REMOVE_vrt_sigreturn32.o = -pg +CFLAGS_REMOVE_vgettimeofday.o = -pg +CFLAGS_REMOVE_vgettimeofday32.o = -pg + +ifdef CONFIG_FEEDBACK_COLLECT +# vDSO code runs in userspace, not collecting feedback data. +CFLAGS_REMOVE_vdso.o = -ffeedback-generate +CFLAGS_REMOVE_vdso32.o = -ffeedback-generate +CFLAGS_REMOVE_vrt_sigreturn.o = -ffeedback-generate +CFLAGS_REMOVE_vrt_sigreturn32.o = -ffeedback-generate +CFLAGS_REMOVE_vgettimeofday.o = -ffeedback-generate +CFLAGS_REMOVE_vgettimeofday32.o = -ffeedback-generate +endif + +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + +# Force dependency +$(obj)/vdso.o: $(obj)/vdso.so + +# link rule for the .so file, .lds has to be first +SYSCFLAGS_vdso.so.dbg = $(c_flags) +$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) + $(call if_changed,vdsold) + + +# We also create a special relocatable object that should mirror the symbol +# table and layout of the linked DSO. With ld -R we can then refer to +# these symbols in the kernel code rather than hand-coded addresses. +extra-y += vdso-syms.o +$(obj)/built-in.o: $(obj)/vdso-syms.o +$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o + +SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +SYSCFLAGS_vdso_syms.o = -r +$(obj)/vdso-syms.o: $(src)/vdso.lds $(obj)/vrt_sigreturn.o FORCE + $(call if_changed,vdsold) + + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# actual build commands +# The DSO images are built using a special linker script +# Add -lgcc so tilepro gets static muldi3 and lshrdi3 definitions. +# Make sure only to export the intended __vdso_xxx symbol offsets. +quiet_cmd_vdsold = VDSOLD $@ + cmd_vdsold = $(CC) $(KCFLAGS) -nostdlib $(SYSCFLAGS_$(@F)) \ + -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp -lgcc && \ + $(CROSS_COMPILE)objcopy \ + $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso32.so: $(obj)/vdso32.so.dbg + $(call cmd,vdso_install) + +vdso_install: vdso.so +vdso32_install: vdso32.so + + +KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_32 += -m32 -s +KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_32 += -m32 -fPIC -shared + +obj-vdso32 = $(patsubst %, v%32.o, $(vdso-syms)) +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +targets += $(obj-vdso32) vdso32.so vdso32.so.dbg + +$(obj-vdso32:%=%): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) +$(obj-vdso32:%=%): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) + +$(obj)/vgettimeofday32.o: $(obj)/vgettimeofday.c + $(call if_changed_rule,cc_o_c) + +$(obj)/vrt_sigreturn32.o: $(obj)/vrt_sigreturn.S + $(call if_changed,as_o_S) + +# Force dependency +$(obj)/vdso32.o: $(obj)/vdso32.so + +SYSCFLAGS_vdso32.so.dbg = -m32 -shared -s -Wl,-soname=linux-vdso32.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +$(obj)/vdso32.so.dbg: $(src)/vdso.lds $(obj-vdso32) + $(call if_changed,vdsold) diff --git a/arch/tile/kernel/vdso/vdso.S b/arch/tile/kernel/vdso/vdso.S new file mode 100644 index 00000000000..3467adb4163 --- /dev/null +++ b/arch/tile/kernel/vdso/vdso.S @@ -0,0 +1,28 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .global vdso_start, vdso_end + .align PAGE_SIZE +vdso_start: + .incbin "arch/tile/kernel/vdso/vdso.so" + .align PAGE_SIZE +vdso_end: + + .previous diff --git a/arch/tile/kernel/vdso/vdso.lds.S b/arch/tile/kernel/vdso/vdso.lds.S new file mode 100644 index 00000000000..041cd6c39c8 --- /dev/null +++ b/arch/tile/kernel/vdso/vdso.lds.S @@ -0,0 +1,87 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#define VDSO_VERSION_STRING LINUX_2.6 + + +OUTPUT_ARCH(tile) + +/* The ELF entry point can be used to set the AT_SYSINFO value. */ +ENTRY(__vdso_rt_sigreturn); + + +SECTIONS +{ + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + + /* + * This linker script is used both with -r and with -shared. + * For the layouts to match, we need to skip more than enough + * space for the dynamic symbol table et al. If this amount + * is insufficient, ld -shared will barf. Just increase it here. + */ + . = 0x1000; + .text : { *(.text .text.*) } :text + + .data : { + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + } +} + + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + __vdso_rt_sigreturn; + __vdso_gettimeofday; + gettimeofday; + local:*; + }; +} diff --git a/arch/tile/kernel/vdso/vdso32.S b/arch/tile/kernel/vdso/vdso32.S new file mode 100644 index 00000000000..1d1ac3257e1 --- /dev/null +++ b/arch/tile/kernel/vdso/vdso32.S @@ -0,0 +1,28 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .global vdso32_start, vdso32_end + .align PAGE_SIZE +vdso32_start: + .incbin "arch/tile/kernel/vdso/vdso32.so" + .align PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c new file mode 100644 index 00000000000..51ec8e46f5f --- /dev/null +++ b/arch/tile/kernel/vdso/vgettimeofday.c @@ -0,0 +1,107 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#define VDSO_BUILD /* avoid some shift warnings for -m32 in <asm/page.h> */ +#include <linux/time.h> +#include <asm/timex.h> +#include <asm/vdso.h> + +#if CHIP_HAS_SPLIT_CYCLE() +static inline cycles_t get_cycles_inline(void) +{ + unsigned int high = __insn_mfspr(SPR_CYCLE_HIGH); + unsigned int low = __insn_mfspr(SPR_CYCLE_LOW); + unsigned int high2 = __insn_mfspr(SPR_CYCLE_HIGH); + + while (unlikely(high != high2)) { + low = __insn_mfspr(SPR_CYCLE_LOW); + high = high2; + high2 = __insn_mfspr(SPR_CYCLE_HIGH); + } + + return (((cycles_t)high) << 32) | low; +} +#define get_cycles get_cycles_inline +#endif + +/* + * Find out the vDSO data page address in the process address space. + */ +inline unsigned long get_datapage(void) +{ + unsigned long ret; + + /* vdso data page located in the 2nd vDSO page. */ + asm volatile ("lnk %0" : "=r"(ret)); + ret &= ~(PAGE_SIZE - 1); + ret += PAGE_SIZE; + + return ret; +} + +int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + cycles_t cycles; + unsigned long count, sec, ns; + volatile struct vdso_data *vdso_data; + + vdso_data = (struct vdso_data *)get_datapage(); + /* The use of the timezone is obsolete, normally tz is NULL. */ + if (unlikely(tz != NULL)) { + while (1) { + /* Spin until the update finish. */ + count = vdso_data->tz_update_count; + if (count & 1) + continue; + + tz->tz_minuteswest = vdso_data->tz_minuteswest; + tz->tz_dsttime = vdso_data->tz_dsttime; + + /* Check whether updated, read again if so. */ + if (count == vdso_data->tz_update_count) + break; + } + } + + if (unlikely(tv == NULL)) + return 0; + + while (1) { + /* Spin until the update finish. */ + count = vdso_data->tb_update_count; + if (count & 1) + continue; + + cycles = (get_cycles() - vdso_data->xtime_tod_stamp); + ns = (cycles * vdso_data->mult) >> vdso_data->shift; + sec = vdso_data->xtime_clock_sec; + ns += vdso_data->xtime_clock_nsec; + if (ns >= NSEC_PER_SEC) { + ns -= NSEC_PER_SEC; + sec += 1; + } + + /* Check whether updated, read again if so. */ + if (count == vdso_data->tb_update_count) + break; + } + + tv->tv_sec = sec; + tv->tv_usec = ns / 1000; + + return 0; +} + +int gettimeofday(struct timeval *tv, struct timezone *tz) + __attribute__((weak, alias("__vdso_gettimeofday"))); diff --git a/arch/tile/kernel/vdso/vrt_sigreturn.S b/arch/tile/kernel/vdso/vrt_sigreturn.S new file mode 100644 index 00000000000..6326caf4a03 --- /dev/null +++ b/arch/tile/kernel/vdso/vrt_sigreturn.S @@ -0,0 +1,30 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/linkage.h> +#include <arch/abi.h> +#include <asm/unistd.h> + +/* + * Note that libc has a copy of this function that it uses to compare + * against the PC when a stack backtrace ends, so if this code is + * changed, the libc implementation(s) should also be updated. + */ +ENTRY(__vdso_rt_sigreturn) + moveli TREG_SYSCALL_NR_NAME, __NR_rt_sigreturn + swint1 + /* We don't use ENDPROC to avoid tagging this symbol as FUNC, + * which confuses the perf tool. + */ + END(__vdso_rt_sigreturn) diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 38f64fafdc1..f1819423ffc 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -5,7 +5,7 @@ #include <hv/hypervisor.h> /* Text loads starting from the supervisor interrupt vector address. */ -#define TEXT_OFFSET MEM_SV_INTRPT +#define TEXT_OFFSET MEM_SV_START OUTPUT_ARCH(tile) ENTRY(_start) @@ -13,7 +13,7 @@ jiffies = jiffies_64; PHDRS { - intrpt1 PT_LOAD ; + intrpt PT_LOAD ; text PT_LOAD ; data PT_LOAD ; } @@ -24,23 +24,30 @@ SECTIONS #define LOAD_OFFSET TEXT_OFFSET /* Interrupt vectors */ - .intrpt1 (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */ + .intrpt (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */ { _text = .; - _stext = .; - *(.intrpt1) - } :intrpt1 =0 + *(.intrpt) + } :intrpt =0 /* Hypervisor call vectors */ - #include "hvglue.lds" + . = ALIGN(0x10000); + .hvglue : AT (ADDR(.hvglue) - LOAD_OFFSET) { + *(.hvglue) + } :NONE /* Now the real code */ . = ALIGN(0x20000); + _stext = .; .text : AT (ADDR(.text) - LOAD_OFFSET) { HEAD_TEXT SCHED_TEXT LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT __fix_text_end = .; /* tile-cpack won't rearrange before this */ + ALIGN_FUNCTION(); + *(.hottext*) TEXT_TEXT *(.text.*) *(.coldtext*) @@ -58,27 +65,17 @@ SECTIONS #define LOAD_OFFSET PAGE_OFFSET . = ALIGN(PAGE_SIZE); + __init_begin = .; VMLINUX_SYMBOL(_sinitdata) = .; INIT_DATA_SECTION(16) :data =0 - PERCPU(L2_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L2_CACHE_BYTES) . = ALIGN(PAGE_SIZE); VMLINUX_SYMBOL(_einitdata) = .; + __init_end = .; _sdata = .; /* Start of data section */ - RO_DATA_SECTION(PAGE_SIZE) - - /* initially writeable, then read-only */ - . = ALIGN(PAGE_SIZE); - __w1data_begin = .; - .w1data : AT(ADDR(.w1data) - LOAD_OFFSET) { - VMLINUX_SYMBOL(__w1data_begin) = .; - *(.w1data) - VMLINUX_SYMBOL(__w1data_end) = .; - } - RW_DATA_SECTION(L2_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) - _edata = .; EXCEPTION_TABLE(L2_CACHE_BYTES) diff --git a/arch/tile/kvm/Kconfig b/arch/tile/kvm/Kconfig index b88f9c04778..2298cb1daff 100644 --- a/arch/tile/kvm/Kconfig +++ b/arch/tile/kvm/Kconfig @@ -18,7 +18,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" - depends on HAVE_KVM && MODULES && EXPERIMENTAL + depends on HAVE_KVM && MODULES select PREEMPT_NOTIFIERS select ANON_INODES ---help--- @@ -33,6 +33,5 @@ config KVM If unsure, say N. source drivers/vhost/Kconfig -source drivers/virtio/Kconfig endif # VIRTUALIZATION diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 0c26086ecbe..c4211cbb202 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile @@ -4,14 +4,15 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ - strchr_$(BITS).o strlen_$(BITS).o - -ifeq ($(CONFIG_TILEGX),y) -lib-y += memcpy_user_64.o -else -lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o -endif + strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o +lib-$(CONFIG_TILEGX) += memcpy_user_64.o +lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o obj-$(CONFIG_MODULES) += exports.o + +# The finv_buffer_remote() and copy_{to,from}_user() routines can't +# have -pg added, since they both rely on being leaf functions. +CFLAGS_REMOVE_cacheflush.o = -pg +CFLAGS_REMOVE_memcpy_user_64.o = -pg diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 46570211df5..c89b211fd9e 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c @@ -17,54 +17,15 @@ #include <linux/uaccess.h> #include <linux/module.h> #include <linux/mm.h> -#include <asm/atomic.h> -#include <asm/futex.h> +#include <linux/atomic.h> #include <arch/chip.h> -/* See <asm/atomic_32.h> */ -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - -/* - * A block of memory containing locks for atomic ops. Each instance of this - * struct will be homed on a different CPU. - */ -struct atomic_locks_on_cpu { - int lock[ATOMIC_HASH_L2_SIZE]; -} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4))); - -static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool); - -/* The locks we'll use until __init_atomic_per_cpu is called. */ -static struct atomic_locks_on_cpu __initdata initial_atomic_locks; - -/* Hash into this vector to get a pointer to lock for the given atomic. */ -struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE] - __write_once = { - [0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks) -}; - -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* This page is remapped on startup to be hash-for-home. */ int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss; -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - -static inline int *__atomic_hashed_lock(volatile void *v) +int *__atomic_hashed_lock(volatile void *v) { /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - unsigned long i = - (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); - unsigned long n = __insn_crc32_32(0, i); - - /* Grab high bits for L1 index. */ - unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT); - /* Grab low bits for L2 index. */ - unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1); - - return &atomic_lock_ptr[l1_index]->lock[l2_index]; -#else /* * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index. * Using mm works here because atomic_locks is page aligned. @@ -73,26 +34,13 @@ static inline int *__atomic_hashed_lock(volatile void *v) (unsigned long)atomic_locks, 2, (ATOMIC_HASH_SHIFT + 2) - 1); return (int *)ptr; -#endif } #ifdef CONFIG_SMP /* Return whether the passed pointer is a valid atomic lock pointer. */ static int is_atomic_lock(int *p) { -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - int i; - for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) { - - if (p >= &atomic_lock_ptr[i]->lock[0] && - p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) { - return 1; - } - } - return 0; -#else return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE]; -#endif } void __atomic_fault_unlock(int *irqlock_word) @@ -111,33 +59,32 @@ static inline int *__atomic_setup(volatile void *v) return __atomic_hashed_lock(v); } -int _atomic_xchg(atomic_t *v, int n) +int _atomic_xchg(int *v, int n) { - return __atomic_xchg(&v->counter, __atomic_setup(v), n).val; + return __atomic_xchg(v, __atomic_setup(v), n).val; } EXPORT_SYMBOL(_atomic_xchg); -int _atomic_xchg_add(atomic_t *v, int i) +int _atomic_xchg_add(int *v, int i) { - return __atomic_xchg_add(&v->counter, __atomic_setup(v), i).val; + return __atomic_xchg_add(v, __atomic_setup(v), i).val; } EXPORT_SYMBOL(_atomic_xchg_add); -int _atomic_xchg_add_unless(atomic_t *v, int a, int u) +int _atomic_xchg_add_unless(int *v, int a, int u) { /* * Note: argument order is switched here since it is easier * to use the first argument consistently as the "old value" * in the assembly, as is done for _atomic_cmpxchg(). */ - return __atomic_xchg_add_unless(&v->counter, __atomic_setup(v), u, a) - .val; + return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val; } EXPORT_SYMBOL(_atomic_xchg_add_unless); -int _atomic_cmpxchg(atomic_t *v, int o, int n) +int _atomic_cmpxchg(int *v, int o, int n) { - return __atomic_cmpxchg(&v->counter, __atomic_setup(v), o, n).val; + return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val; } EXPORT_SYMBOL(_atomic_cmpxchg); @@ -160,78 +107,36 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask) EXPORT_SYMBOL(_atomic_xor); -u64 _atomic64_xchg(atomic64_t *v, u64 n) +long long _atomic64_xchg(long long *v, long long n) { - return __atomic64_xchg(&v->counter, __atomic_setup(v), n); + return __atomic64_xchg(v, __atomic_setup(v), n); } EXPORT_SYMBOL(_atomic64_xchg); -u64 _atomic64_xchg_add(atomic64_t *v, u64 i) +long long _atomic64_xchg_add(long long *v, long long i) { - return __atomic64_xchg_add(&v->counter, __atomic_setup(v), i); + return __atomic64_xchg_add(v, __atomic_setup(v), i); } EXPORT_SYMBOL(_atomic64_xchg_add); -u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u) +long long _atomic64_xchg_add_unless(long long *v, long long a, long long u) { /* * Note: argument order is switched here since it is easier * to use the first argument consistently as the "old value" * in the assembly, as is done for _atomic_cmpxchg(). */ - return __atomic64_xchg_add_unless(&v->counter, __atomic_setup(v), - u, a); + return __atomic64_xchg_add_unless(v, __atomic_setup(v), u, a); } EXPORT_SYMBOL(_atomic64_xchg_add_unless); -u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n) +long long _atomic64_cmpxchg(long long *v, long long o, long long n) { - return __atomic64_cmpxchg(&v->counter, __atomic_setup(v), o, n); + return __atomic64_cmpxchg(v, __atomic_setup(v), o, n); } EXPORT_SYMBOL(_atomic64_cmpxchg); -static inline int *__futex_setup(int __user *v) -{ - /* - * Issue a prefetch to the counter to bring it into cache. - * As for __atomic_setup, but we can't do a read into the L1 - * since it might fault; instead we do a prefetch into the L2. - */ - __insn_prefetch(v); - return __atomic_hashed_lock((int __force *)v); -} - -struct __get_user futex_set(u32 __user *v, int i) -{ - return __atomic_xchg((int __force *)v, __futex_setup(v), i); -} - -struct __get_user futex_add(u32 __user *v, int n) -{ - return __atomic_xchg_add((int __force *)v, __futex_setup(v), n); -} - -struct __get_user futex_or(u32 __user *v, int n) -{ - return __atomic_or((int __force *)v, __futex_setup(v), n); -} - -struct __get_user futex_andn(u32 __user *v, int n) -{ - return __atomic_andn((int __force *)v, __futex_setup(v), n); -} - -struct __get_user futex_xor(u32 __user *v, int n) -{ - return __atomic_xor((int __force *)v, __futex_setup(v), n); -} - -struct __get_user futex_cmpxchg(u32 __user *v, int o, int n) -{ - return __atomic_cmpxchg((int __force *)v, __futex_setup(v), o, n); -} - /* * If any of the atomic or futex routines hit a bad address (not in * the page tables at kernel PL) this routine is called. The futex @@ -250,54 +155,8 @@ struct __get_user __atomic_bad_address(int __user *addr) } -#if CHIP_HAS_CBOX_HOME_MAP() -static int __init noatomichash(char *str) -{ - pr_warning("noatomichash is deprecated.\n"); - return 1; -} -__setup("noatomichash", noatomichash); -#endif - void __init __init_atomic_per_cpu(void) { -#if ATOMIC_LOCKS_FOUND_VIA_TABLE() - - unsigned int i; - int actual_cpu; - - /* - * Before this is called from setup, we just have one lock for - * all atomic objects/operations. Here we replace the - * elements of atomic_lock_ptr so that they point at per_cpu - * integers. This seemingly over-complex approach stems from - * the fact that DEFINE_PER_CPU defines an entry for each cpu - * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1. But - * for efficient hashing of atomics to their locks we want a - * compile time constant power of 2 for the size of this - * table, so we use ATOMIC_HASH_SIZE. - * - * Here we populate atomic_lock_ptr from the per cpu - * atomic_lock_pool, interspersing by actual cpu so that - * subsequent elements are homed on consecutive cpus. - */ - - actual_cpu = cpumask_first(cpu_possible_mask); - - for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) { - /* - * Preincrement to slightly bias against using cpu 0, - * which has plenty of stuff homed on it already. - */ - actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask); - if (actual_cpu >= nr_cpu_ids) - actual_cpu = cpumask_first(cpu_possible_mask); - - atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu); - } - -#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - /* Validate power-of-two and "bigger than cpus" assumption */ BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1)); BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids); @@ -321,9 +180,4 @@ void __init __init_atomic_per_cpu(void) * That should not produce more indices than ATOMIC_HASH_SIZE. */ BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); - -#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ - - /* The futex code makes this assumption, so we validate it here. */ - BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int)); } diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index 82f64cc6365..6bda3132cd6 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S @@ -59,7 +59,7 @@ * bad kernel addresses). * * Note that if the value we would store is the same as what we - * loaded, we bypass the load. Other platforms with true atomics can + * loaded, we bypass the store. Other platforms with true atomics can * make the guarantee that a non-atomic __clear_bit(), for example, * can safely race with an atomic test_and_set_bit(); this example is * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do @@ -70,7 +70,7 @@ */ #include <linux/linkage.h> -#include <asm/atomic.h> +#include <asm/atomic_32.h> #include <asm/page.h> #include <asm/processor.h> @@ -164,6 +164,7 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic) STD_ENDPROC(__atomic\name) .ifc \bitwidth,32 .pushsection __ex_table,"a" + .align 4 .word 1b, __atomic\name .word 2b, __atomic\name .word __atomic\name, __atomic_bad_address diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 35c1d8ca5f3..9c0ec22009a 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -12,9 +12,11 @@ * more details. */ +#include <linux/export.h> #include <asm/page.h> #include <asm/cacheflush.h> #include <arch/icache.h> +#include <arch/spr_def.h> void __flush_icache_range(unsigned long start, unsigned long end) @@ -34,11 +36,38 @@ static inline void force_load(char *p) * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting * until the memory controller holds the flushed values. */ -void finv_buffer_remote(void *buffer, size_t size, int hfh) +void __attribute__((optimize("omit-frame-pointer"))) +finv_buffer_remote(void *buffer, size_t size, int hfh) { char *p, *base; size_t step_size, load_count; + + /* + * On TILEPro the striping granularity is a fixed 8KB; on + * TILE-Gx it is configurable, and we rely on the fact that + * the hypervisor always configures maximum striping, so that + * bits 9 and 10 of the PA are part of the stripe function, so + * every 512 bytes we hit a striping boundary. + * + */ +#ifdef __tilegx__ + const unsigned long STRIPE_WIDTH = 512; +#else const unsigned long STRIPE_WIDTH = 8192; +#endif + +#ifdef __tilegx__ + /* + * On TILE-Gx, we must disable the dstream prefetcher before doing + * a cache flush; otherwise, we could end up with data in the cache + * that we don't want there. Note that normally we'd do an mf + * after the SPR write to disabling the prefetcher, but we do one + * below, before any further loads, so there's no need to do it + * here. + */ + uint_reg_t old_dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); + __insn_mtspr(SPR_DSTREAM_PF, 0); +#endif /* * Flush and invalidate the buffer out of the local L1/L2 @@ -61,7 +90,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * memory, that one load would be sufficient, but since we may * be, we also need to back up to the last load issued to * another memory controller, which would be the point where - * we crossed an 8KB boundary (the granularity of striping + * we crossed a "striping" boundary (the granularity of striping * across memory controllers). Keep backing up and doing this * until we are before the beginning of the buffer, or have * hit all the controllers. @@ -75,12 +104,22 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * every cache line on a full memory stripe on each * controller" that we simply do that, to simplify the logic. * - * FIXME: See bug 9535 for some issues with this code. + * On TILE-Gx the hash-for-home function is much more complex, + * with the upshot being we can't readily guarantee we have + * hit both entries in the 128-entry AMT that were hit by any + * load in the entire range, so we just re-load them all. + * With larger buffers, we may want to consider using a hypervisor + * trap to issue loads directly to each hash-for-home tile for + * each controller (doing it from Linux would trash the TLB). */ if (hfh) { step_size = L2_CACHE_BYTES; +#ifdef __tilegx__ + load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES; +#else load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * (1 << CHIP_LOG_NUM_MSHIMS()); +#endif } else { step_size = STRIPE_WIDTH; load_count = (1 << CHIP_LOG_NUM_MSHIMS()); @@ -96,7 +135,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) /* Figure out how far back we need to go. */ base = p - (step_size * (load_count - 2)); - if ((long)base < (long)buffer) + if ((unsigned long)base < (unsigned long)buffer) base = buffer; /* @@ -109,17 +148,26 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) force_load(p); /* - * Repeat, but with inv's instead of loads, to get rid of the + * Repeat, but with finv's instead of loads, to get rid of the * data we just loaded into our own cache and the old home L3. - * No need to unroll since inv's don't target a register. + * No need to unroll since finv's don't target a register. + * The finv's are guaranteed not to actually flush the data in + * the buffer back to their home, since we just read it, so the + * lines are clean in cache; we will only invalidate those lines. */ p = (char *)buffer + size - 1; - __insn_inv(p); + __insn_finv(p); p -= step_size; p = (char *)((unsigned long)p | (step_size - 1)); for (; p >= base; p -= step_size) - __insn_inv(p); + __insn_finv(p); - /* Wait for the load+inv's (and thus finvs) to have completed. */ + /* Wait for these finv's (and thus the first finvs) to be done. */ __insn_mf(); + +#ifdef __tilegx__ + /* Reenable the prefetcher. */ + __insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf); +#endif } +EXPORT_SYMBOL_GPL(finv_buffer_remote); diff --git a/arch/tile/lib/checksum.c b/arch/tile/lib/checksum.c index e4bab5bd3f3..c3ca3e64d9d 100644 --- a/arch/tile/lib/checksum.c +++ b/arch/tile/lib/checksum.c @@ -16,19 +16,6 @@ #include <net/checksum.h> #include <linux/module.h> -static inline unsigned int longto16(unsigned long x) -{ - unsigned long ret; -#ifdef __tilegx__ - ret = __insn_v2sadu(x, 0); - ret = __insn_v2sadu(ret, 0); -#else - ret = __insn_sadh_u(x, 0); - ret = __insn_sadh_u(ret, 0); -#endif - return ret; -} - __wsum do_csum(const unsigned char *buff, int len) { int odd, count; @@ -94,7 +81,7 @@ __wsum do_csum(const unsigned char *buff, int len) } if (len & 1) result += *buff; - result = longto16(result); + result = csum_long(result); if (odd) result = swab16(result); out: diff --git a/arch/tile/lib/cpumask.c b/arch/tile/lib/cpumask.c index fdc403614d1..75947edccb2 100644 --- a/arch/tile/lib/cpumask.c +++ b/arch/tile/lib/cpumask.c @@ -16,6 +16,7 @@ #include <linux/ctype.h> #include <linux/errno.h> #include <linux/smp.h> +#include <linux/export.h> /* * Allow cropping out bits beyond the end of the array. @@ -50,3 +51,4 @@ int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits) } while (*bp != '\0' && *bp != '\n'); return 0; } +EXPORT_SYMBOL(bitmap_parselist_crop); diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c index 49284fae9d0..82733c87d67 100644 --- a/arch/tile/lib/exports.c +++ b/arch/tile/lib/exports.c @@ -18,19 +18,10 @@ /* arch/tile/lib/usercopy.S */ #include <linux/uaccess.h> -EXPORT_SYMBOL(__get_user_1); -EXPORT_SYMBOL(__get_user_2); -EXPORT_SYMBOL(__get_user_4); -EXPORT_SYMBOL(__get_user_8); -EXPORT_SYMBOL(__put_user_1); -EXPORT_SYMBOL(__put_user_2); -EXPORT_SYMBOL(__put_user_4); -EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(strnlen_user_asm); EXPORT_SYMBOL(strncpy_from_user_asm); EXPORT_SYMBOL(clear_user_asm); EXPORT_SYMBOL(flush_user_asm); -EXPORT_SYMBOL(inv_user_asm); EXPORT_SYMBOL(finv_user_asm); /* arch/tile/kernel/entry.S */ @@ -39,6 +30,15 @@ EXPORT_SYMBOL(finv_user_asm); EXPORT_SYMBOL(current_text_addr); EXPORT_SYMBOL(dump_stack); +/* arch/tile/kernel/head.S */ +EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_FUNCTION_TRACER +/* arch/tile/kernel/mcount_64.S */ +#include <asm/ftrace.h> +EXPORT_SYMBOL(__mcount); +#endif /* CONFIG_FUNCTION_TRACER */ + /* arch/tile/lib/, various memcpy files */ EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(__copy_to_user_inatomic); @@ -60,6 +60,8 @@ EXPORT_SYMBOL(hv_dev_poll_cancel); EXPORT_SYMBOL(hv_dev_close); EXPORT_SYMBOL(hv_sysconf); EXPORT_SYMBOL(hv_confstr); +EXPORT_SYMBOL(hv_get_rtc); +EXPORT_SYMBOL(hv_set_rtc); /* libgcc.a */ uint32_t __udivsi3(uint32_t dividend, uint32_t divisor); @@ -79,8 +81,6 @@ EXPORT_SYMBOL(__umoddi3); int64_t __moddi3(int64_t dividend, int64_t divisor); EXPORT_SYMBOL(__moddi3); #ifndef __tilegx__ -uint64_t __ll_mul(uint64_t n0, uint64_t n1); -EXPORT_SYMBOL(__ll_mul); int64_t __muldi3(int64_t, int64_t); EXPORT_SYMBOL(__muldi3); uint64_t __lshrdi3(uint64_t, unsigned int); @@ -89,4 +89,6 @@ uint64_t __ashrdi3(uint64_t, unsigned int); EXPORT_SYMBOL(__ashrdi3); uint64_t __ashldi3(uint64_t, unsigned int); EXPORT_SYMBOL(__ashldi3); +int __ffsdi2(uint64_t); +EXPORT_SYMBOL(__ffsdi2); #endif diff --git a/arch/tile/lib/memchr_64.c b/arch/tile/lib/memchr_64.c new file mode 100644 index 00000000000..f8196b3a950 --- /dev/null +++ b/arch/tile/lib/memchr_64.c @@ -0,0 +1,69 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> +#include "string-endian.h" + +void *memchr(const void *s, int c, size_t n) +{ + const uint64_t *last_word_ptr; + const uint64_t *p; + const char *last_byte_ptr; + uintptr_t s_int; + uint64_t goal, before_mask, v, bits; + char *ret; + + if (__builtin_expect(n == 0, 0)) { + /* Don't dereference any memory if the array is empty. */ + return NULL; + } + + /* Get an aligned pointer. */ + s_int = (uintptr_t) s; + p = (const uint64_t *)(s_int & -8); + + /* Create eight copies of the byte for which we are looking. */ + goal = copy_byte(c); + + /* Read the first word, but munge it so that bytes before the array + * will not match goal. + */ + before_mask = MASK(s_int); + v = (*p | before_mask) ^ (goal & before_mask); + + /* Compute the address of the last byte. */ + last_byte_ptr = (const char *)s + n - 1; + + /* Compute the address of the word containing the last byte. */ + last_word_ptr = (const uint64_t *)((uintptr_t) last_byte_ptr & -8); + + while ((bits = __insn_v1cmpeq(v, goal)) == 0) { + if (__builtin_expect(p == last_word_ptr, 0)) { + /* We already read the last word in the array, + * so give up. + */ + return NULL; + } + v = *++p; + } + + /* We found a match, but it might be in a byte past the end + * of the array. + */ + ret = ((char *)p) + (CFZ(bits) >> 3); + return (ret <= last_byte_ptr) ? ret : NULL; +} +EXPORT_SYMBOL(memchr); diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S index 2a419a6122d..a2771ae5da5 100644 --- a/arch/tile/lib/memcpy_32.S +++ b/arch/tile/lib/memcpy_32.S @@ -22,14 +22,6 @@ #include <linux/linkage.h> -/* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */ -#if !CHIP_HAS_COHERENT_LOCAL_CACHE() -#define memcpy __memcpy_asm -#define __copy_to_user_inatomic __copy_to_user_inatomic_asm -#define __copy_from_user_inatomic __copy_from_user_inatomic_asm -#define __copy_from_user_zeroing __copy_from_user_zeroing_asm -#endif - #define IS_MEMCPY 0 #define IS_COPY_FROM_USER 1 #define IS_COPY_FROM_USER_ZEROING 2 @@ -44,6 +36,7 @@ */ #define EX \ .pushsection __ex_table, "a"; \ + .align 4; \ .word 9f, memcpy_common_fixup; \ .popsection; \ 9 @@ -158,12 +151,9 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } { addi r3, r1, 60; andi r9, r9, -64 } -#if CHIP_HAS_WH64() /* No need to prefetch dst, we'll just do the wh64 * right before we copy a line. */ -#endif - EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } /* Intentionally stall for a few cycles to leave L2 cache alone. */ { bnzt zero, .; move r27, lr } @@ -171,21 +161,6 @@ EX: { lw r6, r3; addi r3, r3, 64 } /* Intentionally stall for a few cycles to leave L2 cache alone. */ { bnzt zero, . } EX: { lw r7, r3; addi r3, r3, 64 } -#if !CHIP_HAS_WH64() - /* Prefetch the dest */ - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bnzt zero, . } - /* Use a real load to cause a TLB miss if necessary. We aren't using - * r28, so this should be fine. - */ -EX: { lw r28, r9; addi r9, r9, 64 } - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bnzt zero, . } - { prefetch r9; addi r9, r9, 64 } - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bnzt zero, . } - { prefetch r9; addi r9, r9, 64 } -#endif /* Intentionally stall for a few cycles to leave L2 cache alone. */ { bz zero, .Lbig_loop2 } @@ -286,13 +261,8 @@ EX: { lw r7, r3; addi r3, r3, 64 } /* Fill second L1D line. */ EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ -#if CHIP_HAS_WH64() /* Prepare destination line for writing. */ EX: { wh64 r9; addi r9, r9, 64 } -#else - /* Prefetch dest line */ - { prefetch r9; addi r9, r9, 64 } -#endif /* Load seven words that are L1D hits to cover wh64 L2 usage. */ /* Load the three remaining words from the last L1D line, which @@ -330,16 +300,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ -#if CHIP_HAS_WH64() EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ -#else - /* Back up the r9 to a cache line we are already storing to - * if it gets past the end of the dest vector. Strictly speaking, - * we don't need to back up to the start of a cache line, but it's free - * and tidy, so why not? - */ -EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */ -#endif /* Store second L1D line. */ EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ @@ -403,7 +364,6 @@ EX: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 } .Ldest_is_word_aligned: -#if CHIP_HAS_DWORD_ALIGN() EX: { andi r8, r0, 63; lwadd_na r6, r1, 4} { slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned } @@ -511,26 +471,6 @@ EX: { swadd r0, r13, 4; addi r2, r2, -32 } /* Move r1 back to the point where it corresponds to r0. */ { addi r1, r1, -4 } -#else /* !CHIP_HAS_DWORD_ALIGN() */ - - /* Compute right/left shift counts and load initial source words. */ - { andi r5, r1, -4; andi r3, r1, 3 } -EX: { lw r6, r5; addi r5, r5, 4; shli r3, r3, 3 } -EX: { lw r7, r5; addi r5, r5, 4; sub r4, zero, r3 } - - /* Load and store one word at a time, using shifts and ORs - * to correct for the misaligned src. - */ -.Lcopy_unaligned_src_loop: - { shr r6, r6, r3; shl r8, r7, r4 } -EX: { lw r7, r5; or r8, r8, r6; move r6, r7 } -EX: { sw r0, r8; addi r0, r0, 4; addi r2, r2, -4 } - { addi r5, r5, 4; slti_u r8, r2, 8 } - { bzt r8, .Lcopy_unaligned_src_loop; addi r1, r1, 4 } - - { bz r2, .Lcopy_unaligned_done } -#endif /* !CHIP_HAS_DWORD_ALIGN() */ - /* Fall through */ /* @@ -614,5 +554,6 @@ memcpy_fixup_loop: .size memcpy_common_fixup, . - memcpy_common_fixup .section __ex_table,"a" + .align 4 .word .Lcfu, .Lcopy_from_user_fixup_zero_remainder .word .Lctu, .Lcopy_to_user_fixup_done diff --git a/arch/tile/lib/memcpy_64.c b/arch/tile/lib/memcpy_64.c new file mode 100644 index 00000000000..4815354b8cd --- /dev/null +++ b/arch/tile/lib/memcpy_64.c @@ -0,0 +1,367 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> +/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */ + +/* Must be 8 bytes in size. */ +#define op_t uint64_t + +/* Threshold value for when to enter the unrolled loops. */ +#define OP_T_THRES 16 + +#if CHIP_L2_LINE_SIZE() != 64 +#error "Assumes 64 byte line size" +#endif + +/* How many cache lines ahead should we prefetch? */ +#define PREFETCH_LINES_AHEAD 4 + +/* + * Provide "base versions" of load and store for the normal code path. + * The kernel provides other versions for userspace copies. + */ +#define ST(p, v) (*(p) = (v)) +#define LD(p) (*(p)) + +#ifndef USERCOPY_FUNC +#define ST1 ST +#define ST2 ST +#define ST4 ST +#define ST8 ST +#define LD1 LD +#define LD2 LD +#define LD4 LD +#define LD8 LD +#define RETVAL dstv +void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n) +#else +/* + * Special kernel version will provide implementation of the LDn/STn + * macros to return a count of uncopied bytes due to mm fault. + */ +#define RETVAL 0 +int __attribute__((optimize("omit-frame-pointer"))) +USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n) +#endif +{ + char *__restrict dst1 = (char *)dstv; + const char *__restrict src1 = (const char *)srcv; + const char *__restrict src1_end; + const char *__restrict prefetch; + op_t *__restrict dst8; /* 8-byte pointer to destination memory. */ + op_t final; /* Final bytes to write to trailing word, if any */ + long i; + + if (n < 16) { + for (; n; n--) + ST1(dst1++, LD1(src1++)); + return RETVAL; + } + + /* + * Locate the end of source memory we will copy. Don't + * prefetch past this. + */ + src1_end = src1 + n - 1; + + /* Prefetch ahead a few cache lines, but not past the end. */ + prefetch = src1; + for (i = 0; i < PREFETCH_LINES_AHEAD; i++) { + __insn_prefetch(prefetch); + prefetch += CHIP_L2_LINE_SIZE(); + prefetch = (prefetch < src1_end) ? prefetch : src1; + } + + /* Copy bytes until dst is word-aligned. */ + for (; (uintptr_t)dst1 & (sizeof(op_t) - 1); n--) + ST1(dst1++, LD1(src1++)); + + /* 8-byte pointer to destination memory. */ + dst8 = (op_t *)dst1; + + if (__builtin_expect((uintptr_t)src1 & (sizeof(op_t) - 1), 0)) { + /* Unaligned copy. */ + + op_t tmp0 = 0, tmp1 = 0, tmp2, tmp3; + const op_t *src8 = (const op_t *) ((uintptr_t)src1 & + -sizeof(op_t)); + const void *srci = (void *)src1; + int m; + + m = (CHIP_L2_LINE_SIZE() << 2) - + (((uintptr_t)dst8) & ((CHIP_L2_LINE_SIZE() << 2) - 1)); + m = (n < m) ? n : m; + m /= sizeof(op_t); + + /* Copy until 'dst' is cache-line-aligned. */ + n -= (sizeof(op_t) * m); + + switch (m % 4) { + case 0: + if (__builtin_expect(!m, 0)) + goto _M0; + tmp1 = LD8(src8++); + tmp2 = LD8(src8++); + goto _8B3; + case 2: + m += 2; + tmp3 = LD8(src8++); + tmp0 = LD8(src8++); + goto _8B1; + case 3: + m += 1; + tmp2 = LD8(src8++); + tmp3 = LD8(src8++); + goto _8B2; + case 1: + m--; + tmp0 = LD8(src8++); + tmp1 = LD8(src8++); + if (__builtin_expect(!m, 0)) + goto _8B0; + } + + do { + tmp2 = LD8(src8++); + tmp0 = __insn_dblalign(tmp0, tmp1, srci); + ST8(dst8++, tmp0); +_8B3: + tmp3 = LD8(src8++); + tmp1 = __insn_dblalign(tmp1, tmp2, srci); + ST8(dst8++, tmp1); +_8B2: + tmp0 = LD8(src8++); + tmp2 = __insn_dblalign(tmp2, tmp3, srci); + ST8(dst8++, tmp2); +_8B1: + tmp1 = LD8(src8++); + tmp3 = __insn_dblalign(tmp3, tmp0, srci); + ST8(dst8++, tmp3); + m -= 4; + } while (m); + +_8B0: + tmp0 = __insn_dblalign(tmp0, tmp1, srci); + ST8(dst8++, tmp0); + src8--; + +_M0: + if (__builtin_expect(n >= CHIP_L2_LINE_SIZE(), 0)) { + op_t tmp4, tmp5, tmp6, tmp7, tmp8; + + prefetch = ((const char *)src8) + + CHIP_L2_LINE_SIZE() * PREFETCH_LINES_AHEAD; + + for (tmp0 = LD8(src8++); n >= CHIP_L2_LINE_SIZE(); + n -= CHIP_L2_LINE_SIZE()) { + /* Prefetch and advance to next line to + prefetch, but don't go past the end. */ + __insn_prefetch(prefetch); + + /* Make sure prefetch got scheduled + earlier. */ + __asm__ ("" : : : "memory"); + + prefetch += CHIP_L2_LINE_SIZE(); + prefetch = (prefetch < src1_end) ? prefetch : + (const char *) src8; + + tmp1 = LD8(src8++); + tmp2 = LD8(src8++); + tmp3 = LD8(src8++); + tmp4 = LD8(src8++); + tmp5 = LD8(src8++); + tmp6 = LD8(src8++); + tmp7 = LD8(src8++); + tmp8 = LD8(src8++); + + tmp0 = __insn_dblalign(tmp0, tmp1, srci); + tmp1 = __insn_dblalign(tmp1, tmp2, srci); + tmp2 = __insn_dblalign(tmp2, tmp3, srci); + tmp3 = __insn_dblalign(tmp3, tmp4, srci); + tmp4 = __insn_dblalign(tmp4, tmp5, srci); + tmp5 = __insn_dblalign(tmp5, tmp6, srci); + tmp6 = __insn_dblalign(tmp6, tmp7, srci); + tmp7 = __insn_dblalign(tmp7, tmp8, srci); + + __insn_wh64(dst8); + + ST8(dst8++, tmp0); + ST8(dst8++, tmp1); + ST8(dst8++, tmp2); + ST8(dst8++, tmp3); + ST8(dst8++, tmp4); + ST8(dst8++, tmp5); + ST8(dst8++, tmp6); + ST8(dst8++, tmp7); + + tmp0 = tmp8; + } + src8--; + } + + /* Copy the rest 8-byte chunks. */ + if (n >= sizeof(op_t)) { + tmp0 = LD8(src8++); + for (; n >= sizeof(op_t); n -= sizeof(op_t)) { + tmp1 = LD8(src8++); + tmp0 = __insn_dblalign(tmp0, tmp1, srci); + ST8(dst8++, tmp0); + tmp0 = tmp1; + } + src8--; + } + + if (n == 0) + return RETVAL; + + tmp0 = LD8(src8++); + tmp1 = ((const char *)src8 <= src1_end) + ? LD8((op_t *)src8) : 0; + final = __insn_dblalign(tmp0, tmp1, srci); + + } else { + /* Aligned copy. */ + + const op_t *__restrict src8 = (const op_t *)src1; + + /* src8 and dst8 are both word-aligned. */ + if (n >= CHIP_L2_LINE_SIZE()) { + /* Copy until 'dst' is cache-line-aligned. */ + for (; (uintptr_t)dst8 & (CHIP_L2_LINE_SIZE() - 1); + n -= sizeof(op_t)) + ST8(dst8++, LD8(src8++)); + + for (; n >= CHIP_L2_LINE_SIZE(); ) { + op_t tmp0, tmp1, tmp2, tmp3; + op_t tmp4, tmp5, tmp6, tmp7; + + /* + * Prefetch and advance to next line + * to prefetch, but don't go past the + * end. + */ + __insn_prefetch(prefetch); + + /* Make sure prefetch got scheduled + earlier. */ + __asm__ ("" : : : "memory"); + + prefetch += CHIP_L2_LINE_SIZE(); + prefetch = (prefetch < src1_end) ? prefetch : + (const char *)src8; + + /* + * Do all the loads before wh64. This + * is necessary if [src8, src8+7] and + * [dst8, dst8+7] share the same cache + * line and dst8 <= src8, as can be + * the case when called from memmove, + * or with code tested on x86 whose + * memcpy always works with forward + * copies. + */ + tmp0 = LD8(src8++); + tmp1 = LD8(src8++); + tmp2 = LD8(src8++); + tmp3 = LD8(src8++); + tmp4 = LD8(src8++); + tmp5 = LD8(src8++); + tmp6 = LD8(src8++); + tmp7 = LD8(src8++); + + /* wh64 and wait for tmp7 load completion. */ + __asm__ ("move %0, %0; wh64 %1\n" + : : "r"(tmp7), "r"(dst8)); + + ST8(dst8++, tmp0); + ST8(dst8++, tmp1); + ST8(dst8++, tmp2); + ST8(dst8++, tmp3); + ST8(dst8++, tmp4); + ST8(dst8++, tmp5); + ST8(dst8++, tmp6); + ST8(dst8++, tmp7); + + n -= CHIP_L2_LINE_SIZE(); + } +#if CHIP_L2_LINE_SIZE() != 64 +# error "Fix code that assumes particular L2 cache line size." +#endif + } + + for (; n >= sizeof(op_t); n -= sizeof(op_t)) + ST8(dst8++, LD8(src8++)); + + if (__builtin_expect(n == 0, 1)) + return RETVAL; + + final = LD8(src8); + } + + /* n != 0 if we get here. Write out any trailing bytes. */ + dst1 = (char *)dst8; +#ifndef __BIG_ENDIAN__ + if (n & 4) { + ST4((uint32_t *)dst1, final); + dst1 += 4; + final >>= 32; + n &= 3; + } + if (n & 2) { + ST2((uint16_t *)dst1, final); + dst1 += 2; + final >>= 16; + n &= 1; + } + if (n) + ST1((uint8_t *)dst1, final); +#else + if (n & 4) { + ST4((uint32_t *)dst1, final >> 32); + dst1 += 4; + } + else + { + final >>= 32; + } + if (n & 2) { + ST2((uint16_t *)dst1, final >> 16); + dst1 += 2; + } + else + { + final >>= 16; + } + if (n & 1) + ST1((uint8_t *)dst1, final >> 8); +#endif + + return RETVAL; +} + +#ifdef USERCOPY_FUNC +#undef ST1 +#undef ST2 +#undef ST4 +#undef ST8 +#undef LD1 +#undef LD2 +#undef LD4 +#undef LD8 +#undef USERCOPY_FUNC +#endif diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c deleted file mode 100644 index b2fe15e0107..00000000000 --- a/arch/tile/lib/memcpy_tile64.c +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#include <linux/string.h> -#include <linux/smp.h> -#include <linux/module.h> -#include <linux/uaccess.h> -#include <asm/fixmap.h> -#include <asm/kmap_types.h> -#include <asm/tlbflush.h> -#include <hv/hypervisor.h> -#include <arch/chip.h> - - -#if !CHIP_HAS_COHERENT_LOCAL_CACHE() - -/* Defined in memcpy.S */ -extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n); -extern unsigned long __copy_to_user_inatomic_asm( - void __user *to, const void *from, unsigned long n); -extern unsigned long __copy_from_user_inatomic_asm( - void *to, const void __user *from, unsigned long n); -extern unsigned long __copy_from_user_zeroing_asm( - void *to, const void __user *from, unsigned long n); - -typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long); - -/* Size above which to consider TLB games for performance */ -#define LARGE_COPY_CUTOFF 2048 - -/* Communicate to the simulator what we are trying to do. */ -#define sim_allow_multiple_caching(b) \ - __insn_mtspr(SPR_SIM_CONTROL, \ - SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS)) - -/* - * Copy memory by briefly enabling incoherent cacheline-at-a-time mode. - * - * We set up our own source and destination PTEs that we fully control. - * This is the only way to guarantee that we don't race with another - * thread that is modifying the PTE; we can't afford to try the - * copy_{to,from}_user() technique of catching the interrupt, since - * we must run with interrupts disabled to avoid the risk of some - * other code seeing the incoherent data in our cache. (Recall that - * our cache is indexed by PA, so even if the other code doesn't use - * our kmap_atomic virtual addresses, they'll still hit in cache using - * the normal VAs that aren't supposed to hit in cache.) - */ -static void memcpy_multicache(void *dest, const void *source, - pte_t dst_pte, pte_t src_pte, int len) -{ - int idx; - unsigned long flags, newsrc, newdst; - pmd_t *pmdp; - pte_t *ptep; - int type0, type1; - int cpu = get_cpu(); - - /* - * Disable interrupts so that we don't recurse into memcpy() - * in an interrupt handler, nor accidentally reference - * the PA of the source from an interrupt routine. Also - * notify the simulator that we're playing games so we don't - * generate spurious coherency warnings. - */ - local_irq_save(flags); - sim_allow_multiple_caching(1); - - /* Set up the new dest mapping */ - type0 = kmap_atomic_idx_push(); - idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; - newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); - pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); - ptep = pte_offset_kernel(pmdp, newdst); - if (pte_val(*ptep) != pte_val(dst_pte)) { - set_pte(ptep, dst_pte); - local_flush_tlb_page(NULL, newdst, PAGE_SIZE); - } - - /* Set up the new source mapping */ - type1 = kmap_atomic_idx_push(); - idx += (type0 - type1); - src_pte = hv_pte_set_nc(src_pte); - src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */ - newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); - pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); - ptep = pte_offset_kernel(pmdp, newsrc); - __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ - local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); - - /* Actually move the data. */ - __memcpy_asm((void *)newdst, (const void *)newsrc, len); - - /* - * Remap the source as locally-cached and not OLOC'ed so that - * we can inval without also invaling the remote cpu's cache. - * This also avoids known errata with inv'ing cacheable oloc data. - */ - src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); - src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ - __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ - local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); - - /* - * Do the actual invalidation, covering the full L2 cache line - * at the end since __memcpy_asm() is somewhat aggressive. - */ - __inv_buffer((void *)newsrc, len); - - /* - * We're done: notify the simulator that all is back to normal, - * and re-enable interrupts and pre-emption. - */ - kmap_atomic_idx_pop(); - kmap_atomic_idx_pop(); - sim_allow_multiple_caching(0); - local_irq_restore(flags); - put_cpu(); -} - -/* - * Identify large copies from remotely-cached memory, and copy them - * via memcpy_multicache() if they look good, otherwise fall back - * to the particular kind of copying passed as the memcpy_t function. - */ -static unsigned long fast_copy(void *dest, const void *source, int len, - memcpy_t func) -{ - /* - * Check if it's big enough to bother with. We may end up doing a - * small copy via TLB manipulation if we're near a page boundary, - * but presumably we'll make it up when we hit the second page. - */ - while (len >= LARGE_COPY_CUTOFF) { - int copy_size, bytes_left_on_page; - pte_t *src_ptep, *dst_ptep; - pte_t src_pte, dst_pte; - struct page *src_page, *dst_page; - - /* Is the source page oloc'ed to a remote cpu? */ -retry_source: - src_ptep = virt_to_pte(current->mm, (unsigned long)source); - if (src_ptep == NULL) - break; - src_pte = *src_ptep; - if (!hv_pte_get_present(src_pte) || - !hv_pte_get_readable(src_pte) || - hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3) - break; - if (get_remote_cache_cpu(src_pte) == smp_processor_id()) - break; - src_page = pfn_to_page(hv_pte_get_pfn(src_pte)); - get_page(src_page); - if (pte_val(src_pte) != pte_val(*src_ptep)) { - put_page(src_page); - goto retry_source; - } - if (pte_huge(src_pte)) { - /* Adjust the PTE to correspond to a small page */ - int pfn = hv_pte_get_pfn(src_pte); - pfn += (((unsigned long)source & (HPAGE_SIZE-1)) - >> PAGE_SHIFT); - src_pte = pfn_pte(pfn, src_pte); - src_pte = pte_mksmall(src_pte); - } - - /* Is the destination page writable? */ -retry_dest: - dst_ptep = virt_to_pte(current->mm, (unsigned long)dest); - if (dst_ptep == NULL) { - put_page(src_page); - break; - } - dst_pte = *dst_ptep; - if (!hv_pte_get_present(dst_pte) || - !hv_pte_get_writable(dst_pte)) { - put_page(src_page); - break; - } - dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte)); - if (dst_page == src_page) { - /* - * Source and dest are on the same page; this - * potentially exposes us to incoherence if any - * part of src and dest overlap on a cache line. - * Just give up rather than trying to be precise. - */ - put_page(src_page); - break; - } - get_page(dst_page); - if (pte_val(dst_pte) != pte_val(*dst_ptep)) { - put_page(dst_page); - goto retry_dest; - } - if (pte_huge(dst_pte)) { - /* Adjust the PTE to correspond to a small page */ - int pfn = hv_pte_get_pfn(dst_pte); - pfn += (((unsigned long)dest & (HPAGE_SIZE-1)) - >> PAGE_SHIFT); - dst_pte = pfn_pte(pfn, dst_pte); - dst_pte = pte_mksmall(dst_pte); - } - - /* All looks good: create a cachable PTE and copy from it */ - copy_size = len; - bytes_left_on_page = - PAGE_SIZE - (((int)source) & (PAGE_SIZE-1)); - if (copy_size > bytes_left_on_page) - copy_size = bytes_left_on_page; - bytes_left_on_page = - PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1)); - if (copy_size > bytes_left_on_page) - copy_size = bytes_left_on_page; - memcpy_multicache(dest, source, dst_pte, src_pte, copy_size); - - /* Release the pages */ - put_page(dst_page); - put_page(src_page); - - /* Continue on the next page */ - dest += copy_size; - source += copy_size; - len -= copy_size; - } - - return func(dest, source, len); -} - -void *memcpy(void *to, const void *from, __kernel_size_t n) -{ - if (n < LARGE_COPY_CUTOFF) - return (void *)__memcpy_asm(to, from, n); - else - return (void *)fast_copy(to, from, n, __memcpy_asm); -} - -unsigned long __copy_to_user_inatomic(void __user *to, const void *from, - unsigned long n) -{ - if (n < LARGE_COPY_CUTOFF) - return __copy_to_user_inatomic_asm(to, from, n); - else - return fast_copy(to, from, n, __copy_to_user_inatomic_asm); -} - -unsigned long __copy_from_user_inatomic(void *to, const void __user *from, - unsigned long n) -{ - if (n < LARGE_COPY_CUTOFF) - return __copy_from_user_inatomic_asm(to, from, n); - else - return fast_copy(to, from, n, __copy_from_user_inatomic_asm); -} - -unsigned long __copy_from_user_zeroing(void *to, const void __user *from, - unsigned long n) -{ - if (n < LARGE_COPY_CUTOFF) - return __copy_from_user_zeroing_asm(to, from, n); - else - return fast_copy(to, from, n, __copy_from_user_zeroing_asm); -} - -#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */ diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c new file mode 100644 index 00000000000..88c7016492c --- /dev/null +++ b/arch/tile/lib/memcpy_user_64.c @@ -0,0 +1,94 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Do memcpy(), but trap and return "n" when a load or store faults. + * + * Note: this idiom only works when memcpy() compiles to a leaf function. + * Here leaf function not only means it does not have calls, but also + * requires no stack operations (sp, stack frame pointer) and no + * use of callee-saved registers, else "jrp lr" will be incorrect since + * unwinding stack frame is bypassed. Since memcpy() is not complex so + * these conditions are satisfied here, but we need to be careful when + * modifying this file. This is not a clean solution but is the best + * one so far. + * + * Also note that we are capturing "n" from the containing scope here. + */ + +#define _ST(p, inst, v) \ + ({ \ + asm("1: " #inst " %0, %1;" \ + ".pushsection .coldtext.memcpy,\"ax\";" \ + "2: { move r0, %2; jrp lr };" \ + ".section __ex_table,\"a\";" \ + ".align 8;" \ + ".quad 1b, 2b;" \ + ".popsection" \ + : "=m" (*(p)) : "r" (v), "r" (n)); \ + }) + +#define _LD(p, inst) \ + ({ \ + unsigned long __v; \ + asm("1: " #inst " %0, %1;" \ + ".pushsection .coldtext.memcpy,\"ax\";" \ + "2: { move r0, %2; jrp lr };" \ + ".section __ex_table,\"a\";" \ + ".align 8;" \ + ".quad 1b, 2b;" \ + ".popsection" \ + : "=r" (__v) : "m" (*(p)), "r" (n)); \ + __v; \ + }) + +#define USERCOPY_FUNC __copy_to_user_inatomic +#define ST1(p, v) _ST((p), st1, (v)) +#define ST2(p, v) _ST((p), st2, (v)) +#define ST4(p, v) _ST((p), st4, (v)) +#define ST8(p, v) _ST((p), st, (v)) +#define LD1 LD +#define LD2 LD +#define LD4 LD +#define LD8 LD +#include "memcpy_64.c" + +#define USERCOPY_FUNC __copy_from_user_inatomic +#define ST1 ST +#define ST2 ST +#define ST4 ST +#define ST8 ST +#define LD1(p) _LD((p), ld1u) +#define LD2(p) _LD((p), ld2u) +#define LD4(p) _LD((p), ld4u) +#define LD8(p) _LD((p), ld) +#include "memcpy_64.c" + +#define USERCOPY_FUNC __copy_in_user_inatomic +#define ST1(p, v) _ST((p), st1, (v)) +#define ST2(p, v) _ST((p), st2, (v)) +#define ST4(p, v) _ST((p), st4, (v)) +#define ST8(p, v) _ST((p), st, (v)) +#define LD1(p) _LD((p), ld1u) +#define LD2(p) _LD((p), ld2u) +#define LD4(p) _LD((p), ld4u) +#define LD8(p) _LD((p), ld) +#include "memcpy_64.c" + +unsigned long __copy_from_user_zeroing(void *to, const void __user *from, + unsigned long n) +{ + unsigned long rc = __copy_from_user_inatomic(to, from, n); + if (unlikely(rc)) + memset(to + n - rc, 0, rc); + return rc; +} diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c index 57dbb3a5bff..2042bfe6595 100644 --- a/arch/tile/lib/memset_32.c +++ b/arch/tile/lib/memset_32.c @@ -12,13 +12,10 @@ * more details. */ -#include <arch/chip.h> - #include <linux/types.h> #include <linux/string.h> #include <linux/module.h> - -#undef memset +#include <arch/chip.h> void *memset(void *s, int c, size_t n) { @@ -26,11 +23,7 @@ void *memset(void *s, int c, size_t n) int n32; uint32_t v16, v32; uint8_t *out8 = s; -#if !CHIP_HAS_WH64() - int ahead32; -#else int to_align32; -#endif /* Experimentation shows that a trivial tight loop is a win up until * around a size of 20, where writing a word at a time starts to win. @@ -61,21 +54,6 @@ void *memset(void *s, int c, size_t n) return s; } -#if !CHIP_HAS_WH64() - /* Use a spare issue slot to start prefetching the first cache - * line early. This instruction is free as the store can be buried - * in otherwise idle issue slots doing ALU ops. - */ - __insn_prefetch(out8); - - /* We prefetch the end so that a short memset that spans two cache - * lines gets some prefetching benefit. Again we believe this is free - * to issue. - */ - __insn_prefetch(&out8[n - 1]); -#endif /* !CHIP_HAS_WH64() */ - - /* Align 'out8'. We know n >= 3 so this won't write past the end. */ while (((uintptr_t) out8 & 3) != 0) { *out8++ = c; @@ -96,90 +74,6 @@ void *memset(void *s, int c, size_t n) /* This must be at least 8 or the following loop doesn't work. */ #define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4) -#if !CHIP_HAS_WH64() - - ahead32 = CACHE_LINE_SIZE_IN_WORDS; - - /* We already prefetched the first and last cache lines, so - * we only need to do more prefetching if we are storing - * to more than two cache lines. - */ - if (n32 > CACHE_LINE_SIZE_IN_WORDS * 2) { - int i; - - /* Prefetch the next several cache lines. - * This is the setup code for the software-pipelined - * loop below. - */ -#define MAX_PREFETCH 5 - ahead32 = n32 & -CACHE_LINE_SIZE_IN_WORDS; - if (ahead32 > MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS) - ahead32 = MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS; - - for (i = CACHE_LINE_SIZE_IN_WORDS; - i < ahead32; i += CACHE_LINE_SIZE_IN_WORDS) - __insn_prefetch(&out32[i]); - } - - if (n32 > ahead32) { - while (1) { - int j; - - /* Prefetch by reading one word several cache lines - * ahead. Since loads are non-blocking this will - * cause the full cache line to be read while we are - * finishing earlier cache lines. Using a store - * here causes microarchitectural performance - * problems where a victimizing store miss goes to - * the head of the retry FIFO and locks the pipe for - * a few cycles. So a few subsequent stores in this - * loop go into the retry FIFO, and then later - * stores see other stores to the same cache line - * are already in the retry FIFO and themselves go - * into the retry FIFO, filling it up and grinding - * to a halt waiting for the original miss to be - * satisfied. - */ - __insn_prefetch(&out32[ahead32]); - -#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0 -#error "Unhandled CACHE_LINE_SIZE_IN_WORDS" -#endif - - n32 -= CACHE_LINE_SIZE_IN_WORDS; - - /* Save icache space by only partially unrolling - * this loop. - */ - for (j = CACHE_LINE_SIZE_IN_WORDS / 4; j > 0; j--) { - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - } - - /* To save compiled code size, reuse this loop even - * when we run out of prefetching to do by dropping - * ahead32 down. - */ - if (n32 <= ahead32) { - /* Not even a full cache line left, - * so stop now. - */ - if (n32 < CACHE_LINE_SIZE_IN_WORDS) - break; - - /* Choose a small enough value that we don't - * prefetch past the end. There's no sense - * in touching cache lines we don't have to. - */ - ahead32 = CACHE_LINE_SIZE_IN_WORDS - 1; - } - } - } - -#else /* CHIP_HAS_WH64() */ - /* Determine how many words we need to emit before the 'out32' * pointer becomes aligned modulo the cache line size. */ @@ -236,8 +130,6 @@ void *memset(void *s, int c, size_t n) n32 &= CACHE_LINE_SIZE_IN_WORDS - 1; } -#endif /* CHIP_HAS_WH64() */ - /* Now handle any leftover values. */ if (n32 != 0) { do { diff --git a/arch/tile/lib/memset_64.c b/arch/tile/lib/memset_64.c new file mode 100644 index 00000000000..03ef69cd73d --- /dev/null +++ b/arch/tile/lib/memset_64.c @@ -0,0 +1,142 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> +#include <arch/chip.h> +#include "string-endian.h" + +void *memset(void *s, int c, size_t n) +{ + uint64_t *out64; + int n64, to_align64; + uint64_t v64; + uint8_t *out8 = s; + + /* Experimentation shows that a trivial tight loop is a win up until + * around a size of 20, where writing a word at a time starts to win. + */ +#define BYTE_CUTOFF 20 + +#if BYTE_CUTOFF < 7 + /* This must be at least at least this big, or some code later + * on doesn't work. + */ +#error "BYTE_CUTOFF is too small" +#endif + + if (n < BYTE_CUTOFF) { + /* Strangely, this turns out to be the tightest way to + * write this loop. + */ + if (n != 0) { + do { + /* Strangely, combining these into one line + * performs worse. + */ + *out8 = c; + out8++; + } while (--n != 0); + } + + return s; + } + + /* Align 'out8'. We know n >= 7 so this won't write past the end. */ + while (((uintptr_t) out8 & 7) != 0) { + *out8++ = c; + --n; + } + + /* Align 'n'. */ + while (n & 7) + out8[--n] = c; + + out64 = (uint64_t *) out8; + n64 = n >> 3; + + /* Tile input byte out to 64 bits. */ + v64 = copy_byte(c); + + /* This must be at least 8 or the following loop doesn't work. */ +#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8) + + /* Determine how many words we need to emit before the 'out32' + * pointer becomes aligned modulo the cache line size. + */ + to_align64 = (-((uintptr_t)out64 >> 3)) & + (CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1); + + /* Only bother aligning and using wh64 if there is at least + * one full cache line to process. This check also prevents + * overrunning the end of the buffer with alignment words. + */ + if (to_align64 <= n64 - CACHE_LINE_SIZE_IN_DOUBLEWORDS) { + int lines_left; + + /* Align out64 mod the cache line size so we can use wh64. */ + n64 -= to_align64; + for (; to_align64 != 0; to_align64--) { + *out64 = v64; + out64++; + } + + /* Use unsigned divide to turn this into a right shift. */ + lines_left = (unsigned)n64 / CACHE_LINE_SIZE_IN_DOUBLEWORDS; + + do { + /* Only wh64 a few lines at a time, so we don't + * exceed the maximum number of victim lines. + */ + int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS()) + ? lines_left + : CHIP_MAX_OUTSTANDING_VICTIMS()); + uint64_t *wh = out64; + int i = x; + int j; + + lines_left -= x; + + do { + __insn_wh64(wh); + wh += CACHE_LINE_SIZE_IN_DOUBLEWORDS; + } while (--i); + + for (j = x * (CACHE_LINE_SIZE_IN_DOUBLEWORDS / 4); + j != 0; j--) { + *out64++ = v64; + *out64++ = v64; + *out64++ = v64; + *out64++ = v64; + } + } while (lines_left != 0); + + /* We processed all full lines above, so only this many + * words remain to be processed. + */ + n64 &= CACHE_LINE_SIZE_IN_DOUBLEWORDS - 1; + } + + /* Now handle any leftover values. */ + if (n64 != 0) { + do { + *out64 = v64; + out64++; + } while (--n64 != 0); + } + + return s; +} +EXPORT_SYMBOL(memset); diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index cb0999fb64b..b34f79aada4 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -101,7 +101,7 @@ EXPORT_SYMBOL(arch_spin_unlock_wait); * preserve the semantic that the same read lock can be acquired in an * interrupt context. */ -inline int arch_read_trylock(arch_rwlock_t *rwlock) +int arch_read_trylock(arch_rwlock_t *rwlock) { u32 val; __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); @@ -144,7 +144,7 @@ void arch_read_unlock(arch_rwlock_t *rwlock) for (;;) { __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); val = __insn_tns((int *)&rwlock->lock); - if (likely(val & 1) == 0) { + if (likely((val & 1) == 0)) { rwlock->lock = val - (1 << _RD_COUNT_SHIFT); __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); break; diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c new file mode 100644 index 00000000000..d6fb9581e98 --- /dev/null +++ b/arch/tile/lib/spinlock_64.c @@ -0,0 +1,104 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/spinlock.h> +#include <linux/module.h> +#include <asm/processor.h> + +#include "spinlock_common.h" + +/* + * Read the spinlock value without allocating in our cache and without + * causing an invalidation to another cpu with a copy of the cacheline. + * This is important when we are spinning waiting for the lock. + */ +static inline u32 arch_spin_read_noalloc(void *lock) +{ + return atomic_cmpxchg((atomic_t *)lock, -1, -1); +} + +/* + * Wait until the high bits (current) match my ticket. + * If we notice the overflow bit set on entry, we clear it. + */ +void arch_spin_lock_slow(arch_spinlock_t *lock, u32 my_ticket) +{ + if (unlikely(my_ticket & __ARCH_SPIN_NEXT_OVERFLOW)) { + __insn_fetchand4(&lock->lock, ~__ARCH_SPIN_NEXT_OVERFLOW); + my_ticket &= ~__ARCH_SPIN_NEXT_OVERFLOW; + } + + for (;;) { + u32 val = arch_spin_read_noalloc(lock); + u32 delta = my_ticket - arch_spin_current(val); + if (delta == 0) + return; + relax((128 / CYCLES_PER_RELAX_LOOP) * delta); + } +} +EXPORT_SYMBOL(arch_spin_lock_slow); + +/* + * Check the lock to see if it is plausible, and try to get it with cmpxchg(). + */ +int arch_spin_trylock(arch_spinlock_t *lock) +{ + u32 val = arch_spin_read_noalloc(lock); + if (unlikely(arch_spin_current(val) != arch_spin_next(val))) + return 0; + return cmpxchg(&lock->lock, val, (val + 1) & ~__ARCH_SPIN_NEXT_OVERFLOW) + == val; +} +EXPORT_SYMBOL(arch_spin_trylock); + +void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + u32 iterations = 0; + while (arch_spin_is_locked(lock)) + delay_backoff(iterations++); +} +EXPORT_SYMBOL(arch_spin_unlock_wait); + +/* + * If the read lock fails due to a writer, we retry periodically + * until the value is positive and we write our incremented reader count. + */ +void __read_lock_failed(arch_rwlock_t *rw) +{ + u32 val; + int iterations = 0; + do { + delay_backoff(iterations++); + val = __insn_fetchaddgez4(&rw->lock, 1); + } while (unlikely(arch_write_val_locked(val))); +} +EXPORT_SYMBOL(__read_lock_failed); + +/* + * If we failed because there were readers, clear the "writer" bit + * so we don't block additional readers. Otherwise, there was another + * writer anyway, so our "fetchor" made no difference. Then wait, + * issuing periodic fetchor instructions, till we get the lock. + */ +void __write_lock_failed(arch_rwlock_t *rw, u32 val) +{ + int iterations = 0; + do { + if (!arch_write_val_locked(val)) + val = __insn_fetchand4(&rw->lock, ~__WRITE_LOCK_BIT); + delay_backoff(iterations++); + val = __insn_fetchor4(&rw->lock, __WRITE_LOCK_BIT); + } while (val != 0); +} +EXPORT_SYMBOL(__write_lock_failed); diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h index c1010980913..6ac37509fac 100644 --- a/arch/tile/lib/spinlock_common.h +++ b/arch/tile/lib/spinlock_common.h @@ -60,5 +60,5 @@ static void delay_backoff(int iterations) loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & (loops - 1); - relax(1 << exponent); + relax(loops); } diff --git a/arch/tile/lib/strchr_32.c b/arch/tile/lib/strchr_32.c index c94e6f7ae7b..841fe696301 100644 --- a/arch/tile/lib/strchr_32.c +++ b/arch/tile/lib/strchr_32.c @@ -16,8 +16,6 @@ #include <linux/string.h> #include <linux/module.h> -#undef strchr - char *strchr(const char *s, int c) { int z, g; diff --git a/arch/tile/lib/strchr_64.c b/arch/tile/lib/strchr_64.c new file mode 100644 index 00000000000..fe6e31c06f8 --- /dev/null +++ b/arch/tile/lib/strchr_64.c @@ -0,0 +1,62 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> +#include "string-endian.h" + +char *strchr(const char *s, int c) +{ + int z, g; + + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *)(s_int & -8); + + /* Create eight copies of the byte for which we are looking. */ + const uint64_t goal = copy_byte(c); + + /* Read the first aligned word, but force bytes before the string to + * match neither zero nor goal (we make sure the high bit of each + * byte is 1, and the low 7 bits are all the opposite of the goal + * byte). + */ + const uint64_t before_mask = MASK(s_int); + uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui(before_mask, 1)); + + uint64_t zero_matches, goal_matches; + while (1) { + /* Look for a terminating '\0'. */ + zero_matches = __insn_v1cmpeqi(v, 0); + + /* Look for the goal byte. */ + goal_matches = __insn_v1cmpeq(v, goal); + + if (__builtin_expect((zero_matches | goal_matches) != 0, 0)) + break; + + v = *++p; + } + + z = CFZ(zero_matches); + g = CFZ(goal_matches); + + /* If we found c before '\0' we got a match. Note that if c == '\0' + * then g == z, and we correctly return the address of the '\0' + * rather than NULL. + */ + return (g <= z) ? ((char *)p) + (g >> 3) : NULL; +} +EXPORT_SYMBOL(strchr); diff --git a/arch/tile/lib/string-endian.h b/arch/tile/lib/string-endian.h new file mode 100644 index 00000000000..2e49cbfe937 --- /dev/null +++ b/arch/tile/lib/string-endian.h @@ -0,0 +1,44 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Provide a mask based on the pointer alignment that + * sets up non-zero bytes before the beginning of the string. + * The MASK expression works because shift counts are taken mod 64. + * Also, specify how to count "first" and "last" bits + * when the bits have been read as a word. + */ + +#include <asm/byteorder.h> + +#ifdef __LITTLE_ENDIAN +#define MASK(x) (__insn_shl(1ULL, (x << 3)) - 1) +#define NULMASK(x) ((2ULL << x) - 1) +#define CFZ(x) __insn_ctz(x) +#define REVCZ(x) __insn_clz(x) +#else +#define MASK(x) (__insn_shl(-2LL, ((-x << 3) - 1))) +#define NULMASK(x) (-2LL << (63 - x)) +#define CFZ(x) __insn_clz(x) +#define REVCZ(x) __insn_ctz(x) +#endif + +/* + * Create eight copies of the byte in a uint64_t. Byte Shuffle uses + * the bytes of srcB as the index into the dest vector to select a + * byte. With all indices of zero, the first byte is copied into all + * the other bytes. + */ +static inline uint64_t copy_byte(uint8_t byte) +{ + return __insn_shufflebytes(byte, 0, 0); +} diff --git a/arch/tile/lib/strlen_32.c b/arch/tile/lib/strlen_32.c index 4974292a553..f26f88e11e4 100644 --- a/arch/tile/lib/strlen_32.c +++ b/arch/tile/lib/strlen_32.c @@ -16,8 +16,6 @@ #include <linux/string.h> #include <linux/module.h> -#undef strlen - size_t strlen(const char *s) { /* Get an aligned pointer. */ diff --git a/arch/tile/lib/strlen_64.c b/arch/tile/lib/strlen_64.c new file mode 100644 index 00000000000..9583fc3361f --- /dev/null +++ b/arch/tile/lib/strlen_64.c @@ -0,0 +1,35 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> +#include "string-endian.h" + +size_t strlen(const char *s) +{ + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *)(s_int & -8); + + /* Read and MASK the first word. */ + uint64_t v = *p | MASK(s_int); + + uint64_t bits; + while ((bits = __insn_v1cmpeqi(v, 0)) == 0) + v = *++p; + + return ((const char *)p) + (CFZ(bits) >> 3) - s; +} +EXPORT_SYMBOL(strlen); diff --git a/arch/tile/lib/strnlen_32.c b/arch/tile/lib/strnlen_32.c new file mode 100644 index 00000000000..1434141d9e0 --- /dev/null +++ b/arch/tile/lib/strnlen_32.c @@ -0,0 +1,47 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> + +size_t strnlen(const char *s, size_t count) +{ + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint32_t *p = (const uint32_t *)(s_int & -4); + size_t bytes_read = sizeof(*p) - (s_int & (sizeof(*p) - 1)); + size_t len; + uint32_t v, bits; + + /* Avoid page fault risk by not reading any bytes when count is 0. */ + if (count == 0) + return 0; + + /* Read first word, but force bytes before the string to be nonzero. */ + v = *p | ((1 << ((s_int << 3) & 31)) - 1); + + while ((bits = __insn_seqb(v, 0)) == 0) { + if (bytes_read >= count) { + /* Read COUNT bytes and didn't find the terminator. */ + return count; + } + v = *++p; + bytes_read += sizeof(v); + } + + len = ((const char *) p) + (__insn_ctz(bits) >> 3) - s; + return (len < count ? len : count); +} +EXPORT_SYMBOL(strnlen); diff --git a/arch/tile/lib/strnlen_64.c b/arch/tile/lib/strnlen_64.c new file mode 100644 index 00000000000..2e8de6a5136 --- /dev/null +++ b/arch/tile/lib/strnlen_64.c @@ -0,0 +1,48 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/module.h> +#include "string-endian.h" + +size_t strnlen(const char *s, size_t count) +{ + /* Get an aligned pointer. */ + const uintptr_t s_int = (uintptr_t) s; + const uint64_t *p = (const uint64_t *)(s_int & -8); + size_t bytes_read = sizeof(*p) - (s_int & (sizeof(*p) - 1)); + size_t len; + uint64_t v, bits; + + /* Avoid page fault risk by not reading any bytes when count is 0. */ + if (count == 0) + return 0; + + /* Read and MASK the first word. */ + v = *p | MASK(s_int); + + while ((bits = __insn_v1cmpeqi(v, 0)) == 0) { + if (bytes_read >= count) { + /* Read COUNT bytes and didn't find the terminator. */ + return count; + } + v = *++p; + bytes_read += sizeof(v); + } + + len = ((const char *) p) + (CFZ(bits) >> 3) - s; + return (len < count ? len : count); +} +EXPORT_SYMBOL(strnlen); diff --git a/arch/tile/lib/uaccess.c b/arch/tile/lib/uaccess.c index f8d398c9ee7..030abe3ee4f 100644 --- a/arch/tile/lib/uaccess.c +++ b/arch/tile/lib/uaccess.c @@ -22,11 +22,3 @@ int __range_ok(unsigned long addr, unsigned long size) is_arch_mappable_range(addr, size)); } EXPORT_SYMBOL(__range_ok); - -#ifdef CONFIG_DEBUG_COPY_FROM_USER -void copy_from_user_overflow(void) -{ - WARN(1, "Buffer overflow detected!\n"); -} -EXPORT_SYMBOL(copy_from_user_overflow); -#endif diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S index 979f76d8374..1bc16222463 100644 --- a/arch/tile/lib/usercopy_32.S +++ b/arch/tile/lib/usercopy_32.S @@ -19,82 +19,6 @@ /* Access user memory, but use MMU to avoid propagating kernel exceptions. */ - .pushsection .fixup,"ax" - -get_user_fault: - { move r0, zero; move r1, zero } - { movei r2, -EFAULT; jrp lr } - ENDPROC(get_user_fault) - -put_user_fault: - { movei r0, -EFAULT; jrp lr } - ENDPROC(put_user_fault) - - .popsection - -/* - * __get_user_N functions take a pointer in r0, and return 0 in r2 - * on success, with the value in r0; or else -EFAULT in r2. - */ -#define __get_user_N(bytes, LOAD) \ - STD_ENTRY(__get_user_##bytes); \ -1: { LOAD r0, r0; move r1, zero; move r2, zero }; \ - jrp lr; \ - STD_ENDPROC(__get_user_##bytes); \ - .pushsection __ex_table,"a"; \ - .word 1b, get_user_fault; \ - .popsection - -__get_user_N(1, lb_u) -__get_user_N(2, lh_u) -__get_user_N(4, lw) - -/* - * __get_user_8 takes a pointer in r0, and returns 0 in r2 - * on success, with the value in r0/r1; or else -EFAULT in r2. - */ - STD_ENTRY(__get_user_8); -1: { lw r0, r0; addi r1, r0, 4 }; -2: { lw r1, r1; move r2, zero }; - jrp lr; - STD_ENDPROC(__get_user_8); - .pushsection __ex_table,"a"; - .word 1b, get_user_fault; - .word 2b, get_user_fault; - .popsection - -/* - * __put_user_N functions take a value in r0 and a pointer in r1, - * and return 0 in r0 on success or -EFAULT on failure. - */ -#define __put_user_N(bytes, STORE) \ - STD_ENTRY(__put_user_##bytes); \ -1: { STORE r1, r0; move r0, zero }; \ - jrp lr; \ - STD_ENDPROC(__put_user_##bytes); \ - .pushsection __ex_table,"a"; \ - .word 1b, put_user_fault; \ - .popsection - -__put_user_N(1, sb) -__put_user_N(2, sh) -__put_user_N(4, sw) - -/* - * __put_user_8 takes a value in r0/r1 and a pointer in r2, - * and returns 0 in r0 on success or -EFAULT on failure. - */ -STD_ENTRY(__put_user_8) -1: { sw r2, r0; addi r2, r2, 4 } -2: { sw r2, r1; move r0, zero } - jrp lr - STD_ENDPROC(__put_user_8) - .pushsection __ex_table,"a" - .word 1b, put_user_fault - .word 2b, put_user_fault - .popsection - - /* * strnlen_user_asm takes the pointer in r0, and the length bound in r1. * It returns the length, including the terminating NUL, or zero on exception. @@ -112,6 +36,7 @@ strnlen_user_fault: { move r0, zero; jrp lr } ENDPROC(strnlen_user_fault) .section __ex_table,"a" + .align 4 .word 1b, strnlen_user_fault .popsection @@ -123,18 +48,20 @@ strnlen_user_fault: */ STD_ENTRY(strncpy_from_user_asm) { bz r2, 2f; move r3, r0 } -1: { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 } +1: { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 } { sb r0, r4; addi r0, r0, 1 } - bz r2, 2f - bnzt r4, 1b - addi r0, r0, -1 /* don't count the trailing NUL */ -2: { sub r0, r0, r3; jrp lr } + bz r4, 2f + bnzt r2, 1b + { sub r0, r0, r3; jrp lr } +2: addi r0, r0, -1 /* don't count the trailing NUL */ + { sub r0, r0, r3; jrp lr } STD_ENDPROC(strncpy_from_user_asm) .pushsection .fixup,"ax" strncpy_from_user_fault: { movei r0, -EFAULT; jrp lr } ENDPROC(strncpy_from_user_fault) .section __ex_table,"a" + .align 4 .word 1b, strncpy_from_user_fault .popsection @@ -153,6 +80,7 @@ STD_ENTRY(clear_user_asm) bnzt r1, 1b 2: { move r0, r1; jrp lr } .pushsection __ex_table,"a" + .align 4 .word 1b, 2b .popsection @@ -162,6 +90,7 @@ STD_ENTRY(clear_user_asm) 2: { move r0, r1; jrp lr } STD_ENDPROC(clear_user_asm) .pushsection __ex_table,"a" + .align 4 .word 1b, 2b .popsection @@ -181,25 +110,7 @@ STD_ENTRY(flush_user_asm) 2: { move r0, r1; jrp lr } STD_ENDPROC(flush_user_asm) .pushsection __ex_table,"a" - .word 1b, 2b - .popsection - -/* - * inv_user_asm takes the user target address in r0 and the - * number of bytes to invalidate in r1. - * It returns the number of not inv'able bytes (hopefully zero) in r0. - */ -STD_ENTRY(inv_user_asm) - bz r1, 2f - { movei r2, L2_CACHE_BYTES; add r1, r0, r1 } - { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 } - { and r0, r0, r2; and r1, r1, r2 } - { sub r1, r1, r0 } -1: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() } - { addi r0, r0, CHIP_INV_STRIDE(); bnzt r1, 1b } -2: { move r0, r1; jrp lr } - STD_ENDPROC(inv_user_asm) - .pushsection __ex_table,"a" + .align 4 .word 1b, 2b .popsection @@ -219,5 +130,6 @@ STD_ENTRY(finv_user_asm) 2: { move r0, r1; jrp lr } STD_ENDPROC(finv_user_asm) .pushsection __ex_table,"a" + .align 4 .word 1b, 2b .popsection diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S new file mode 100644 index 00000000000..b3b31a3306f --- /dev/null +++ b/arch/tile/lib/usercopy_64.S @@ -0,0 +1,135 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/linkage.h> +#include <asm/errno.h> +#include <asm/cache.h> +#include <arch/chip.h> + +/* Access user memory, but use MMU to avoid propagating kernel exceptions. */ + +/* + * strnlen_user_asm takes the pointer in r0, and the length bound in r1. + * It returns the length, including the terminating NUL, or zero on exception. + * If length is greater than the bound, returns one plus the bound. + */ +STD_ENTRY(strnlen_user_asm) + { beqz r1, 2f; addi r3, r0, -1 } /* bias down to include NUL */ +1: { ld1u r4, r0; addi r1, r1, -1 } + beqz r4, 2f + { bnezt r1, 1b; addi r0, r0, 1 } +2: { sub r0, r0, r3; jrp lr } + STD_ENDPROC(strnlen_user_asm) + .pushsection .fixup,"ax" +strnlen_user_fault: + { move r0, zero; jrp lr } + ENDPROC(strnlen_user_fault) + .section __ex_table,"a" + .align 8 + .quad 1b, strnlen_user_fault + .popsection + +/* + * strncpy_from_user_asm takes the kernel target pointer in r0, + * the userspace source pointer in r1, and the length bound (including + * the trailing NUL) in r2. On success, it returns the string length + * (not including the trailing NUL), or -EFAULT on failure. + */ +STD_ENTRY(strncpy_from_user_asm) + { beqz r2, 2f; move r3, r0 } +1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 } + { st1 r0, r4; addi r0, r0, 1 } + beqz r4, 2f + bnezt r2, 1b + { sub r0, r0, r3; jrp lr } +2: addi r0, r0, -1 /* don't count the trailing NUL */ + { sub r0, r0, r3; jrp lr } + STD_ENDPROC(strncpy_from_user_asm) + .pushsection .fixup,"ax" +strncpy_from_user_fault: + { movei r0, -EFAULT; jrp lr } + ENDPROC(strncpy_from_user_fault) + .section __ex_table,"a" + .align 8 + .quad 1b, strncpy_from_user_fault + .popsection + +/* + * clear_user_asm takes the user target address in r0 and the + * number of bytes to zero in r1. + * It returns the number of uncopiable bytes (hopefully zero) in r0. + * Note that we don't use a separate .fixup section here since we fall + * through into the "fixup" code as the last straight-line bundle anyway. + */ +STD_ENTRY(clear_user_asm) + { beqz r1, 2f; or r2, r0, r1 } + andi r2, r2, 7 + beqzt r2, .Lclear_aligned_user_asm +1: { st1 r0, zero; addi r0, r0, 1; addi r1, r1, -1 } + bnezt r1, 1b +2: { move r0, r1; jrp lr } + .pushsection __ex_table,"a" + .align 8 + .quad 1b, 2b + .popsection + +.Lclear_aligned_user_asm: +1: { st r0, zero; addi r0, r0, 8; addi r1, r1, -8 } + bnezt r1, 1b +2: { move r0, r1; jrp lr } + STD_ENDPROC(clear_user_asm) + .pushsection __ex_table,"a" + .align 8 + .quad 1b, 2b + .popsection + +/* + * flush_user_asm takes the user target address in r0 and the + * number of bytes to flush in r1. + * It returns the number of unflushable bytes (hopefully zero) in r0. + */ +STD_ENTRY(flush_user_asm) + beqz r1, 2f + { movei r2, L2_CACHE_BYTES; add r1, r0, r1 } + { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 } + { and r0, r0, r2; and r1, r1, r2 } + { sub r1, r1, r0 } +1: { flush r0; addi r1, r1, -CHIP_FLUSH_STRIDE() } + { addi r0, r0, CHIP_FLUSH_STRIDE(); bnezt r1, 1b } +2: { move r0, r1; jrp lr } + STD_ENDPROC(flush_user_asm) + .pushsection __ex_table,"a" + .align 8 + .quad 1b, 2b + .popsection + +/* + * finv_user_asm takes the user target address in r0 and the + * number of bytes to flush-invalidate in r1. + * It returns the number of not finv'able bytes (hopefully zero) in r0. + */ +STD_ENTRY(finv_user_asm) + beqz r1, 2f + { movei r2, L2_CACHE_BYTES; add r1, r0, r1 } + { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 } + { and r0, r0, r2; and r1, r1, r2 } + { sub r1, r1, r0 } +1: { finv r0; addi r1, r1, -CHIP_FINV_STRIDE() } + { addi r0, r0, CHIP_FINV_STRIDE(); bnezt r1, 1b } +2: { move r0, r1; jrp lr } + STD_ENDPROC(finv_user_asm) + .pushsection __ex_table,"a" + .align 8 + .quad 1b, 2b + .popsection diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c index 55e58e93bfc..23f044e8a7a 100644 --- a/arch/tile/mm/elf.c +++ b/arch/tile/mm/elf.c @@ -21,6 +21,8 @@ #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/sections.h> +#include <asm/vdso.h> +#include <arch/sim.h> /* Notify a running simulator, if any, that an exec just occurred. */ static void sim_notify_exec(const char *binary_name) @@ -35,28 +37,57 @@ static void sim_notify_exec(const char *binary_name) } while (c); } -static int notify_exec(void) +static int notify_exec(struct mm_struct *mm) { - int retval = 0; /* failure */ - struct vm_area_struct *vma = current->mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) + char *buf, *path; + struct vm_area_struct *vma; + + if (!sim_is_simulator()) + return 1; + + if (mm->exe_file == NULL) + return 0; + + for (vma = current->mm->mmap; ; vma = vma->vm_next) { + if (vma == NULL) + return 0; + if (vma->vm_file == mm->exe_file) break; - vma = vma->vm_next; } - if (vma) { - char *buf = (char *) __get_free_page(GFP_KERNEL); - if (buf) { - char *path = d_path(&vma->vm_file->f_path, - buf, PAGE_SIZE); - if (!IS_ERR(path)) { - sim_notify_exec(path); - retval = 1; - } - free_page((unsigned long)buf); + + buf = (char *) __get_free_page(GFP_KERNEL); + if (buf == NULL) + return 0; + + path = d_path(&mm->exe_file->f_path, buf, PAGE_SIZE); + if (IS_ERR(path)) { + free_page((unsigned long)buf); + return 0; + } + + /* + * Notify simulator of an ET_DYN object so we know the load address. + * The somewhat cryptic overuse of SIM_CONTROL_DLOPEN allows us + * to be backward-compatible with older simulator releases. + */ + if (vma->vm_start == (ELF_ET_DYN_BASE & PAGE_MASK)) { + char buf[64]; + int i; + + snprintf(buf, sizeof(buf), "0x%lx:@", vma->vm_start); + for (i = 0; ; ++i) { + char c = buf[i]; + __insn_mtspr(SPR_SIM_CONTROL, + (SIM_CONTROL_DLOPEN + | (c << _SIM_CONTROL_OPERATOR_BITS))); + if (c == '\0') + break; } } - return retval; + + sim_notify_exec(path); + free_page((unsigned long)buf); + return 1; } /* Notify a running simulator, if any, that we loaded an interpreter. */ @@ -72,63 +103,23 @@ static void sim_notify_interp(unsigned long load_addr) } -/* Kernel address of page used to map read-only kernel data into userspace. */ -static void *vdso_page; - -/* One-entry array used for install_special_mapping. */ -static struct page *vdso_pages[1]; - -static int __init vdso_setup(void) -{ - vdso_page = (void *)get_zeroed_page(GFP_ATOMIC); - memcpy(vdso_page, __rt_sigreturn, __rt_sigreturn_end - __rt_sigreturn); - vdso_pages[0] = virt_to_page(vdso_page); - return 0; -} -device_initcall(vdso_setup); - -const char *arch_vma_name(struct vm_area_struct *vma) -{ - if (vma->vm_private_data == vdso_pages) - return "[vdso]"; -#ifndef __tilegx__ - if (vma->vm_start == MEM_USER_INTRPT) - return "[intrpt]"; -#endif - return NULL; -} - int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) { struct mm_struct *mm = current->mm; - unsigned long vdso_base; int retval = 0; + down_write(&mm->mmap_sem); + /* * Notify the simulator that an exec just occurred. * If we can't find the filename of the mapping, just use * whatever was passed as the linux_binprm filename. */ - if (!notify_exec()) + if (!notify_exec(mm)) sim_notify_exec(bprm->filename); - down_write(&mm->mmap_sem); - - /* - * MAYWRITE to allow gdb to COW and set breakpoints - * - * Make sure the vDSO gets into every core dump. Dumping its - * contents makes post-mortem fully interpretable later - * without matching up the same kernel and hardware config to - * see what PC values meant. - */ - vdso_base = VDSO_BASE; - retval = install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, - vdso_pages); + retval = setup_vdso_pages(); #ifndef __tilegx__ /* @@ -140,7 +131,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, if (!retval) { unsigned long addr = MEM_USER_INTRPT; addr = mmap_region(NULL, addr, INTRPT_SIZE, - MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0); if (addr > (unsigned long) -PAGE_SIZE) diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 51f8663bf07..6c0571216a9 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -34,8 +34,8 @@ #include <linux/hugetlb.h> #include <linux/syscalls.h> #include <linux/uaccess.h> +#include <linux/kdebug.h> -#include <asm/system.h> #include <asm/pgalloc.h> #include <asm/sections.h> #include <asm/traps.h> @@ -43,15 +43,18 @@ #include <arch/interrupts.h> -static noinline void force_sig_info_fault(int si_signo, int si_code, - unsigned long address, int fault_num, struct task_struct *tsk) +static noinline void force_sig_info_fault(const char *type, int si_signo, + int si_code, unsigned long address, + int fault_num, + struct task_struct *tsk, + struct pt_regs *regs) { siginfo_t info; if (unlikely(tsk->pid < 2)) { panic("Signal %d (code %d) at %#lx sent to %s!", si_signo, si_code & 0xffff, address, - tsk->pid ? "init" : "the idle task"); + is_idle_task(tsk) ? "the idle task" : "init"); } info.si_signo = si_signo; @@ -59,6 +62,7 @@ static noinline void force_sig_info_fault(int si_signo, int si_code, info.si_code = si_code; info.si_addr = (void __user *)address; info.si_trapno = fault_num; + trace_unhandled_signal(type, regs, address, si_signo); force_sig_info(si_signo, &info, tsk); } @@ -67,15 +71,17 @@ static noinline void force_sig_info_fault(int si_signo, int si_code, * Synthesize the fault a PL0 process would get by doing a word-load of * an unaligned address or a high kernel address. */ -SYSCALL_DEFINE2(cmpxchg_badaddr, unsigned long, address, - struct pt_regs *, regs) +SYSCALL_DEFINE1(cmpxchg_badaddr, unsigned long, address) { + struct pt_regs *regs = current_pt_regs(); + if (address >= PAGE_OFFSET) - force_sig_info_fault(SIGSEGV, SEGV_MAPERR, address, - INT_DTLB_MISS, current); + force_sig_info_fault("atomic segfault", SIGSEGV, SEGV_MAPERR, + address, INT_DTLB_MISS, current, regs); else - force_sig_info_fault(SIGBUS, BUS_ADRALN, address, - INT_UNALIGN_DATA, current); + force_sig_info_fault("atomic alignment fault", SIGBUS, + BUS_ADRALN, address, + INT_UNALIGN_DATA, current, regs); /* * Adjust pc to point at the actual instruction, which is unusual @@ -117,16 +123,15 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) return NULL; - if (!pmd_present(*pmd)) { + if (!pmd_present(*pmd)) set_pmd(pmd, *pmd_k); - arch_flush_lazy_mmu_mode(); - } else + else BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k)); return pmd_k; } /* - * Handle a fault on the vmalloc or module mapping area + * Handle a fault on the vmalloc area. */ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) { @@ -144,8 +149,6 @@ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) pmd_k = vmalloc_sync_one(pgd, address); if (!pmd_k) return -1; - if (pmd_huge(*pmd_k)) - return 0; /* support TILE huge_vmap() API */ pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; @@ -183,7 +186,7 @@ static pgd_t *get_current_pgd(void) HV_Context ctx = hv_inquire_context(); unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT; struct page *pgd_page = pfn_to_page(pgd_pfn); - BUG_ON(PageHighMem(pgd_page)); /* oops, HIGHPTE? */ + BUG_ON(PageHighMem(pgd_page)); return (pgd_t *) __va(ctx.page_table); } @@ -199,9 +202,14 @@ static pgd_t *get_current_pgd(void) * interrupt or a critical region, and must do as little as possible. * Similarly, we can't use atomic ops here, since we may be handling a * fault caused by an atomic op access. + * + * If we find a migrating PTE while we're in an NMI context, and we're + * at a PC that has a registered exception handler, we don't wait, + * since this thread may (e.g.) have been interrupted while migrating + * its own stack, which would then cause us to self-deadlock. */ static int handle_migrating_pte(pgd_t *pgd, int fault_num, - unsigned long address, + unsigned long address, unsigned long pc, int is_kernel_mode, int write) { pud_t *pud; @@ -223,6 +231,8 @@ static int handle_migrating_pte(pgd_t *pgd, int fault_num, pte_offset_kernel(pmd, address); pteval = *pte; if (pte_migrating(pteval)) { + if (in_nmi() && search_exception_tables(pc)) + return 0; wait_for_migration(pte); return 1; } @@ -262,12 +272,15 @@ static int handle_page_fault(struct pt_regs *regs, int si_code; int is_kernel_mode; pgd_t *pgd; + unsigned int flags; /* on TILE, protection faults are always writes */ if (!is_page_fault) write = 1; - is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL); + flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + is_kernel_mode = !user_mode(regs); tsk = validate_current(); @@ -296,7 +309,7 @@ static int handle_page_fault(struct pt_regs *regs, * rather than trying to patch up the existing PTE. */ pgd = get_current_pgd(); - if (handle_migrating_pte(pgd, fault_num, address, + if (handle_migrating_pte(pgd, fault_num, address, regs->pc, is_kernel_mode, write)) return 1; @@ -331,9 +344,12 @@ static int handle_page_fault(struct pt_regs *regs, /* * If we're trying to touch user-space addresses, we must * be either at PL0, or else with interrupts enabled in the - * kernel, so either way we can re-enable interrupts here. + * kernel, so either way we can re-enable interrupts here + * unless we are doing atomic access to user space with + * interrupts disabled. */ - local_irq_enable(); + if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) + local_irq_enable(); mm = tsk->mm; @@ -346,6 +362,9 @@ static int handle_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } + if (!is_kernel_mode) + flags |= FAULT_FLAG_USER; + /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -368,6 +387,8 @@ static int handle_page_fault(struct pt_regs *regs, vma = NULL; /* happy compiler */ goto bad_area_nosemaphore; } + +retry: down_read(&mm->mmap_sem); } @@ -404,18 +425,22 @@ good_area: #endif if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!is_page_fault || !(vma->vm_flags & VM_READ)) goto bad_area; } - survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, write); + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return 0; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -423,33 +448,33 @@ good_area: goto do_sigbus; BUG(); } - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + + /* + * No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + goto retry; + } + } -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() - /* - * If this was an asynchronous fault, - * restart the appropriate engine. - */ - switch (fault_num) { #if CHIP_HAS_TILE_DMA() + /* If this was a DMA TLB fault, restart the DMA engine. */ + switch (fault_num) { case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: case INT_DMATLB_ACCESS: case INT_DMATLB_ACCESS_DWNCL: __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); break; -#endif -#if CHIP_HAS_SN_PROC() - case INT_SNITLB_MISS: - case INT_SNITLB_MISS_DWNCL: - __insn_mtspr(SPR_SNCTL, - __insn_mfspr(SPR_SNCTL) & - ~SPR_SNCTL__FRZPROC_MASK); - break; -#endif } #endif @@ -471,8 +496,8 @@ bad_area_nosemaphore: */ local_irq_enable(); - force_sig_info_fault(SIGSEGV, si_code, address, - fault_num, tsk); + force_sig_info_fault("segfault", SIGSEGV, si_code, address, + fault_num, tsk, regs); return 0; } @@ -510,7 +535,7 @@ no_context: if (unlikely(tsk->pid < 2)) { panic("Kernel page fault running %s!", - tsk->pid ? "init" : "the idle task"); + is_idle_task(tsk) ? "the idle task" : "init"); } /* @@ -530,15 +555,10 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_global_init(tsk)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - pr_alert("VM: killing process %s\n", tsk->comm); - if (!is_kernel_mode) - do_group_exit(SIGKILL); - goto no_context; + if (is_kernel_mode) + goto no_context; + pagefault_out_of_memory(); + return 0; do_sigbus: up_read(&mm->mmap_sem); @@ -547,7 +567,8 @@ do_sigbus: if (is_kernel_mode) goto no_context; - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, fault_num, tsk); + force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address, + fault_num, tsk, regs); return 0; } @@ -660,7 +681,7 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, */ if (fault_num == INT_DTLB_ACCESS) write = 1; - if (handle_migrating_pte(pgd, fault_num, address, 1, write)) + if (handle_migrating_pte(pgd, fault_num, address, pc, 1, write)) return state; /* Return zero so that we continue on with normal fault handling. */ @@ -683,8 +704,60 @@ void do_page_fault(struct pt_regs *regs, int fault_num, { int is_page_fault; +#ifdef CONFIG_KPROBES + /* + * This is to notify the fault handler of the kprobes. The + * exception code is redundant as it is also carried in REGS, + * but we pass it anyhow. + */ + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, + regs->faultnum, SIGSEGV) == NOTIFY_STOP) + return; +#endif + +#ifdef __tilegx__ + /* + * We don't need early do_page_fault_ics() support, since unlike + * Pro we don't need to worry about unlocking the atomic locks. + * There is only one current case in GX where we touch any memory + * under ICS other than our own kernel stack, and we handle that + * here. (If we crash due to trying to touch our own stack, + * we're in too much trouble for C code to help out anyway.) + */ + if (write & ~1) { + unsigned long pc = write & ~1; + if (pc >= (unsigned long) __start_unalign_asm_code && + pc < (unsigned long) __end_unalign_asm_code) { + struct thread_info *ti = current_thread_info(); + /* + * Our EX_CONTEXT is still what it was from the + * initial unalign exception, but now we've faulted + * on the JIT page. We would like to complete the + * page fault however is appropriate, and then retry + * the instruction that caused the unalign exception. + * Our state has been "corrupted" by setting the low + * bit in "sp", and stashing r0..r3 in the + * thread_info area, so we revert all of that, then + * continue as if this were a normal page fault. + */ + regs->sp &= ~1UL; + regs->regs[0] = ti->unalign_jit_tmp[0]; + regs->regs[1] = ti->unalign_jit_tmp[1]; + regs->regs[2] = ti->unalign_jit_tmp[2]; + regs->regs[3] = ti->unalign_jit_tmp[3]; + write &= 1; + } else { + pr_alert("%s/%d: ICS set at page fault at %#lx: %#lx\n", + current->comm, current->pid, pc, address); + show_regs(regs); + do_group_exit(SIGKILL); + return; + } + } +#else /* This case should have been handled by do_page_fault_ics(). */ BUG_ON(write & ~1); +#endif #if CHIP_HAS_TILE_DMA() /* @@ -713,10 +786,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num, case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: #endif -#if CHIP_HAS_SN_PROC() - case INT_SNITLB_MISS: - case INT_SNITLB_MISS_DWNCL: -#endif is_page_fault = 1; break; @@ -732,7 +801,8 @@ void do_page_fault(struct pt_regs *regs, int fault_num, panic("Bad fault number %d in do_page_fault", fault_num); } - if (EX1_PL(regs->ex1) != USER_PL) { +#if CHIP_HAS_TILE_DMA() + if (!user_mode(regs)) { struct async_tlb *async; switch (fault_num) { #if CHIP_HAS_TILE_DMA() @@ -743,12 +813,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num, async = ¤t->thread.dma_async_tlb; break; #endif -#if CHIP_HAS_SN_PROC() - case INT_SNITLB_MISS: - case INT_SNITLB_MISS_DWNCL: - async = ¤t->thread.sn_async_tlb; - break; -#endif default: async = NULL; } @@ -775,19 +839,28 @@ void do_page_fault(struct pt_regs *regs, int fault_num, return; } } +#endif handle_page_fault(regs, fault_num, is_page_fault, address, write); } -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() +#if CHIP_HAS_TILE_DMA() /* - * Check an async_tlb structure to see if a deferred fault is waiting, - * and if so pass it to the page-fault code. + * This routine effectively re-issues asynchronous page faults + * when we are returning to user space. */ -static void handle_async_page_fault(struct pt_regs *regs, - struct async_tlb *async) +void do_async_page_fault(struct pt_regs *regs) { + struct async_tlb *async = ¤t->thread.dma_async_tlb; + + /* + * Clear thread flag early. If we re-interrupt while processing + * code here, we will reset it and recall this routine before + * returning to user space. + */ + clear_thread_flag(TIF_ASYNC_TLB); + if (async->fault_num) { /* * Clear async->fault_num before calling the page-fault @@ -801,35 +874,15 @@ static void handle_async_page_fault(struct pt_regs *regs, async->address, async->is_write); } } -#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */ - +#endif /* CHIP_HAS_TILE_DMA() */ -/* - * This routine effectively re-issues asynchronous page faults - * when we are returning to user space. - */ -void do_async_page_fault(struct pt_regs *regs) -{ - /* - * Clear thread flag early. If we re-interrupt while processing - * code here, we will reset it and recall this routine before - * returning to user space. - */ - clear_thread_flag(TIF_ASYNC_TLB); - -#if CHIP_HAS_TILE_DMA() - handle_async_page_fault(regs, ¤t->thread.dma_async_tlb); -#endif -#if CHIP_HAS_SN_PROC() - handle_async_page_fault(regs, ¤t->thread.sn_async_tlb); -#endif -} void vmalloc_sync_all(void) { #ifdef __tilegx__ /* Currently all L1 kernel pmd's are static and shared. */ - BUG_ON(pgd_index(VMALLOC_END) != pgd_index(VMALLOC_START)); + BUILD_BUG_ON(pgd_index(VMALLOC_END - PAGE_SIZE) != + pgd_index(VMALLOC_START)); #else /* * Note that races in the updates of insync and start aren't diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index 31dbbd9afe4..0dc21829477 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c @@ -93,7 +93,7 @@ static DEFINE_PER_CPU(struct kmap_amps, amps); * If we examine it earlier we are exposed to a race where it looks * writable earlier, but becomes immutable before we write the PTE. */ -static void kmap_atomic_register(struct page *page, enum km_type type, +static void kmap_atomic_register(struct page *page, int type, unsigned long va, pte_t *ptep, pte_t pteval) { unsigned long flags; @@ -114,7 +114,6 @@ static void kmap_atomic_register(struct page *page, enum km_type type, list_add(&->list, &_list); set_pte(ptep, pteval); - arch_flush_lazy_mmu_mode(); spin_unlock(&_lock); homecache_kpte_unlock(flags); @@ -224,12 +223,12 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) } EXPORT_SYMBOL(kmap_atomic_prot); -void *__kmap_atomic(struct page *page) +void *kmap_atomic(struct page *page) { /* PAGE_NONE is a magic value that tells us to check immutability. */ return kmap_atomic_prot(page, PAGE_NONE); } -EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(kmap_atomic); void __kunmap_atomic(void *kvaddr) { @@ -259,7 +258,6 @@ void __kunmap_atomic(void *kvaddr) BUG_ON(vaddr >= (unsigned long)high_memory); } - arch_flush_lazy_mmu_mode(); pagefault_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index cbe6f4f9eca..33294fdc402 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -30,6 +30,7 @@ #include <linux/cache.h> #include <linux/smp.h> #include <linux/module.h> +#include <linux/hugetlb.h> #include <asm/page.h> #include <asm/sections.h> @@ -42,12 +43,9 @@ #include "migrate.h" -#if CHIP_HAS_COHERENT_LOCAL_CACHE() - /* * The noallocl2 option suppresses all use of the L2 cache to cache - * locally from a remote home. There's no point in using it if we - * don't have coherent local caching, though. + * locally from a remote home. */ static int __write_once noallocl2; static int __init set_noallocl2(char *str) @@ -57,16 +55,6 @@ static int __init set_noallocl2(char *str) } early_param("noallocl2", set_noallocl2); -#else - -#define noallocl2 0 - -#endif - -/* Provide no-op versions of these routines to keep flush_remote() cleaner. */ -#define mark_caches_evicted_start() 0 -#define mark_caches_evicted_finish(mask, timestamp) do {} while (0) - /* * Update the irq_stat for cpus that we are going to interrupt @@ -106,7 +94,6 @@ static void hv_flush_update(const struct cpumask *cache_cpumask, * there's never any good reason for hv_flush_remote() to fail. * - Accepts a 32-bit PFN rather than a 64-bit PA, which generally * is the type that Linux wants to pass around anyway. - * - Centralizes the mark_caches_evicted() handling. * - Canonicalizes that lengths of zero make cpumasks NULL. * - Handles deferring TLB flushes for dataplane tiles. * - Tracks remote interrupts in the per-cpu irq_cpustat_t. @@ -125,7 +112,6 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, HV_Remote_ASID *asids, int asidcount) { int rc; - int timestamp = 0; /* happy compiler */ struct cpumask cache_cpumask_copy, tlb_cpumask_copy; struct cpumask *cache_cpumask, *tlb_cpumask; HV_PhysAddr cache_pa; @@ -156,15 +142,11 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length, asids, asidcount); cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT; - if (cache_control & HV_FLUSH_EVICT_L2) - timestamp = mark_caches_evicted_start(); rc = hv_flush_remote(cache_pa, cache_control, cpumask_bits(cache_cpumask), tlb_va, tlb_length, tlb_pgsize, cpumask_bits(tlb_cpumask), asids, asidcount); - if (cache_control & HV_FLUSH_EVICT_L2) - mark_caches_evicted_finish(cache_cpumask, timestamp); if (rc == 0) return; cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy); @@ -179,85 +161,88 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, panic("Unsafe to continue."); } -void flush_remote_page(struct page *page, int order) +static void homecache_finv_page_va(void* va, int home) { - int i, pages = (1 << order); - for (i = 0; i < pages; ++i, ++page) { - void *p = kmap_atomic(page); - int hfh = 0; - int home = page_home(page); -#if CHIP_HAS_CBOX_HOME_MAP() - if (home == PAGE_HOME_HASH) - hfh = 1; - else -#endif - BUG_ON(home < 0 || home >= NR_CPUS); - finv_buffer_remote(p, PAGE_SIZE, hfh); - kunmap_atomic(p); + int cpu = get_cpu(); + if (home == cpu) { + finv_buffer_local(va, PAGE_SIZE); + } else if (home == PAGE_HOME_HASH) { + finv_buffer_remote(va, PAGE_SIZE, 1); + } else { + BUG_ON(home < 0 || home >= NR_CPUS); + finv_buffer_remote(va, PAGE_SIZE, 0); } + put_cpu(); } -void homecache_evict(const struct cpumask *mask) +void homecache_finv_map_page(struct page *page, int home) { - flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); + unsigned long flags; + unsigned long va; + pte_t *ptep; + pte_t pte; + + if (home == PAGE_HOME_UNCACHED) + return; + local_irq_save(flags); +#ifdef CONFIG_HIGHMEM + va = __fix_to_virt(FIX_KMAP_BEGIN + kmap_atomic_idx_push() + + (KM_TYPE_NR * smp_processor_id())); +#else + va = __fix_to_virt(FIX_HOMECACHE_BEGIN + smp_processor_id()); +#endif + ptep = virt_to_kpte(va); + pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL); + __set_pte(ptep, pte_set_home(pte, home)); + homecache_finv_page_va((void *)va, home); + __pte_clear(ptep); + hv_flush_page(va, PAGE_SIZE); +#ifdef CONFIG_HIGHMEM + kmap_atomic_idx_pop(); +#endif + local_irq_restore(flags); } -/* - * Return a mask of the cpus whose caches currently own these pages. - * The return value is whether the pages are all coherently cached - * (i.e. none are immutable, incoherent, or uncached). - */ -static int homecache_mask(struct page *page, int pages, - struct cpumask *home_mask) +static void homecache_finv_page_home(struct page *page, int home) { - int i; - int cached_coherently = 1; - cpumask_clear(home_mask); - for (i = 0; i < pages; ++i) { - int home = page_home(&page[i]); - if (home == PAGE_HOME_IMMUTABLE || - home == PAGE_HOME_INCOHERENT) { - cpumask_copy(home_mask, cpu_possible_mask); - return 0; - } -#if CHIP_HAS_CBOX_HOME_MAP() - if (home == PAGE_HOME_HASH) { - cpumask_or(home_mask, home_mask, &hash_for_home_map); - continue; - } -#endif - if (home == PAGE_HOME_UNCACHED) { - cached_coherently = 0; - continue; - } - BUG_ON(home < 0 || home >= NR_CPUS); - cpumask_set_cpu(home, home_mask); - } - return cached_coherently; + if (!PageHighMem(page) && home == page_home(page)) + homecache_finv_page_va(page_address(page), home); + else + homecache_finv_map_page(page, home); } -/* - * Return the passed length, or zero if it's long enough that we - * believe we should evict the whole L2 cache. - */ -static unsigned long cache_flush_length(unsigned long length) +static inline bool incoherent_home(int home) { - return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length; + return home == PAGE_HOME_IMMUTABLE || home == PAGE_HOME_INCOHERENT; } -/* Flush a page out of whatever cache(s) it is in. */ -void homecache_flush_cache(struct page *page, int order) +static void homecache_finv_page_internal(struct page *page, int force_map) { - int pages = 1 << order; - int length = cache_flush_length(pages * PAGE_SIZE); - unsigned long pfn = page_to_pfn(page); - struct cpumask home_mask; - - homecache_mask(page, pages, &home_mask); - flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0); - sim_validate_lines_evicted(PFN_PHYS(pfn), pages * PAGE_SIZE); + int home = page_home(page); + if (home == PAGE_HOME_UNCACHED) + return; + if (incoherent_home(home)) { + int cpu; + for_each_cpu(cpu, &cpu_cacheable_map) + homecache_finv_map_page(page, cpu); + } else if (force_map) { + /* Force if, e.g., the normal mapping is migrating. */ + homecache_finv_map_page(page, home); + } else { + homecache_finv_page_home(page, home); + } + sim_validate_lines_evicted(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE); } +void homecache_finv_page(struct page *page) +{ + homecache_finv_page_internal(page, 0); +} + +void homecache_evict(const struct cpumask *mask) +{ + flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); +} /* Report the home corresponding to a given PTE. */ static int pte_to_home(pte_t pte) @@ -271,10 +256,8 @@ static int pte_to_home(pte_t pte) return PAGE_HOME_INCOHERENT; case HV_PTE_MODE_UNCACHED: return PAGE_HOME_UNCACHED; -#if CHIP_HAS_CBOX_HOME_MAP() case HV_PTE_MODE_CACHE_HASH_L3: return PAGE_HOME_HASH; -#endif } panic("Bad PTE %#llx\n", pte.val); } @@ -331,20 +314,16 @@ pte_t pte_set_home(pte_t pte, int home) HV_PTE_MODE_CACHE_NO_L3); } } else -#if CHIP_HAS_CBOX_HOME_MAP() if (hash_default) pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); else -#endif pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); pte = hv_pte_set_nc(pte); break; -#if CHIP_HAS_CBOX_HOME_MAP() case PAGE_HOME_HASH: pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); break; -#endif default: BUG_ON(home < 0 || home >= NR_CPUS || @@ -354,7 +333,6 @@ pte_t pte_set_home(pte_t pte, int home) break; } -#if CHIP_HAS_NC_AND_NOALLOC_BITS() if (noallocl2) pte = hv_pte_set_no_alloc_l2(pte); @@ -363,7 +341,6 @@ pte_t pte_set_home(pte_t pte, int home) hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); } -#endif /* Checking this case here gives a better panic than from the hv. */ BUG_ON(hv_pte_get_mode(pte) == 0); @@ -379,21 +356,16 @@ EXPORT_SYMBOL(pte_set_home); * so they're not suitable for anything but infrequent use. */ -#if CHIP_HAS_CBOX_HOME_MAP() -static inline int initial_page_home(void) { return PAGE_HOME_HASH; } -#else -static inline int initial_page_home(void) { return 0; } -#endif - int page_home(struct page *page) { if (PageHighMem(page)) { - return initial_page_home(); + return PAGE_HOME_HASH; } else { unsigned long kva = (unsigned long)page_address(page); - return pte_to_home(*virt_to_pte(NULL, kva)); + return pte_to_home(*virt_to_kpte(kva)); } } +EXPORT_SYMBOL(page_home); void homecache_change_page_home(struct page *page, int order, int home) { @@ -409,12 +381,13 @@ void homecache_change_page_home(struct page *page, int order, int home) NULL, 0); for (i = 0; i < pages; ++i, kva += PAGE_SIZE) { - pte_t *ptep = virt_to_pte(NULL, kva); + pte_t *ptep = virt_to_kpte(kva); pte_t pteval = *ptep; BUG_ON(!pte_present(pteval) || pte_huge(pteval)); __set_pte(ptep, pte_set_home(pteval, home)); } } +EXPORT_SYMBOL(homecache_change_page_home); struct page *homecache_alloc_pages(gfp_t gfp_mask, unsigned int order, int home) @@ -439,19 +412,25 @@ struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, return page; } -void homecache_free_pages(unsigned long addr, unsigned int order) +void __homecache_free_pages(struct page *page, unsigned int order) { - struct page *page; - - if (addr == 0) - return; - - VM_BUG_ON(!virt_addr_valid((void *)addr)); - page = virt_to_page((void *)addr); if (put_page_testzero(page)) { - int pages = (1 << order); - homecache_change_page_home(page, order, initial_page_home()); - while (pages--) - __free_page(page++); + homecache_change_page_home(page, order, PAGE_HOME_HASH); + if (order == 0) { + free_hot_cold_page(page, false); + } else { + init_page_count(page); + __free_pages(page, order); + } + } +} +EXPORT_SYMBOL(__homecache_free_pages); + +void homecache_free_pages(unsigned long addr, unsigned int order) +{ + if (addr != 0) { + VM_BUG_ON(!virt_addr_valid((void *)addr)); + __homecache_free_pages(virt_to_page((void *)addr), order); } } +EXPORT_SYMBOL(homecache_free_pages); diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 42cfcba4e1e..e514899e110 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -27,85 +27,129 @@ #include <linux/mman.h> #include <asm/tlb.h> #include <asm/tlbflush.h> +#include <asm/setup.h> + +#ifdef CONFIG_HUGETLB_SUPER_PAGES + +/* + * Provide an additional huge page size (in addition to the regular default + * huge page size) if no "hugepagesz" arguments are specified. + * Note that it must be smaller than the default huge page size so + * that it's possible to allocate them on demand from the buddy allocator. + * You can change this to 64K (on a 16K build), 256K, 1M, or 4M, + * or not define it at all. + */ +#define ADDITIONAL_HUGE_SIZE (1024 * 1024UL) + +/* "Extra" page-size multipliers, one per level of the page table. */ +int huge_shift[HUGE_SHIFT_ENTRIES] = { +#ifdef ADDITIONAL_HUGE_SIZE +#define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE) + [HUGE_SHIFT_PAGE] = ADDITIONAL_HUGE_SHIFT +#endif +}; + +#endif pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgd; pud_t *pud; - pte_t *pte = NULL; - /* We do not yet support multiple huge page sizes. */ - BUG_ON(sz != PMD_SIZE); + addr &= -sz; /* Mask off any low bits in the address. */ pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); - if (pud) - pte = (pte_t *) pmd_alloc(mm, pud, addr); - BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); - return pte; +#ifdef CONFIG_HUGETLB_SUPER_PAGES + if (sz >= PGDIR_SIZE) { + BUG_ON(sz != PGDIR_SIZE && + sz != PGDIR_SIZE << huge_shift[HUGE_SHIFT_PGDIR]); + return (pte_t *)pud; + } else { + pmd_t *pmd = pmd_alloc(mm, pud, addr); + if (sz >= PMD_SIZE) { + BUG_ON(sz != PMD_SIZE && + sz != (PMD_SIZE << huge_shift[HUGE_SHIFT_PMD])); + return (pte_t *)pmd; + } + else { + if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE]) + panic("Unexpected page size %#lx\n", sz); + return pte_alloc_map(mm, NULL, pmd, addr); + } + } +#else + BUG_ON(sz != PMD_SIZE); + return (pte_t *) pmd_alloc(mm, pud, addr); +#endif } -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +static pte_t *get_pte(pte_t *base, int index, int level) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd = NULL; - - pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) - pmd = pmd_offset(pud, addr); + pte_t *ptep = base + index; +#ifdef CONFIG_HUGETLB_SUPER_PAGES + if (!pte_present(*ptep) && huge_shift[level] != 0) { + unsigned long mask = -1UL << huge_shift[level]; + pte_t *super_ptep = base + (index & mask); + pte_t pte = *super_ptep; + if (pte_present(pte) && pte_super(pte)) + ptep = super_ptep; } - return (pte_t *) pmd; +#endif + return ptep; } -#ifdef HUGETLB_TEST -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { - unsigned long start = address; - int length = 1; - int nr; - struct page *page; - struct vm_area_struct *vma; - - vma = find_vma(mm, addr); - if (!vma || !is_vm_hugetlb_page(vma)) - return ERR_PTR(-EINVAL); - - pte = huge_pte_offset(mm, address); - - /* hugetlb should be locked, and hence, prefaulted */ - WARN_ON(!pte || pte_none(*pte)); - - page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; - - WARN_ON(!PageHead(page)); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; +#ifdef CONFIG_HUGETLB_SUPER_PAGES + pte_t *pte; +#endif - return page; -} + /* Get the top-level page table entry. */ + pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0); -int pmd_huge(pmd_t pmd) -{ - return 0; -} + /* We don't have four levels. */ + pud = pud_offset(pgd, addr); +#ifndef __PAGETABLE_PUD_FOLDED +# error support fourth page table level +#endif + if (!pud_present(*pud)) + return NULL; + + /* Check for an L0 huge PTE, if we have three levels. */ +#ifndef __PAGETABLE_PMD_FOLDED + if (pud_huge(*pud)) + return (pte_t *)pud; + + pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud), + pmd_index(addr), 1); + if (!pmd_present(*pmd)) + return NULL; +#else + pmd = pmd_offset(pud, addr); +#endif -int pud_huge(pud_t pud) -{ - return 0; -} + /* Check for an L1 huge PTE. */ + if (pmd_huge(*pmd)) + return (pte_t *)pmd; + +#ifdef CONFIG_HUGETLB_SUPER_PAGES + /* Check for an L2 huge PTE. */ + pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2); + if (!pte_present(*pte)) + return NULL; + if (pte_super(*pte)) + return pte; +#endif -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ return NULL; } -#else - struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { @@ -149,50 +193,21 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -#endif - #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start_addr; - - if (len > mm->cached_hole_size) { - start_addr = mm->free_area_cache; - } else { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } - -full_search: - addr = ALIGN(start_addr, huge_page_size(h)); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (!vma || addr + len <= vma->vm_start) { - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = ALIGN(vma->vm_end, huge_page_size(h)); - } + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); } static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, @@ -200,92 +215,30 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long pgoff, unsigned long flags) { struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *prev_vma; - unsigned long base = mm->mmap_base, addr = addr0; - unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; - - /* don't allow allocations above current base */ - if (mm->free_area_cache > base) - mm->free_area_cache = base; - - if (len <= largest_hole) { - largest_hole = 0; - mm->free_area_cache = base; - } -try_again: - /* make sure it can fit in the remaining address space */ - if (mm->free_area_cache < len) - goto fail; - - /* either no address requested or can't fit in requested address hole */ - addr = (mm->free_area_cache - len) & huge_page_mask(h); - do { - /* - * Lookup failure means no vma is above this address, - * i.e. return with success: - */ - vma = find_vma_prev(mm, addr, &prev_vma); - if (!vma) { - return addr; - break; - } + struct vm_unmapped_area_info info; + unsigned long addr; - /* - * new region fits between prev_vma->vm_end and - * vma->vm_start, use it: - */ - if (addr + len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { - /* remember the address as a hint for next time */ - mm->cached_hole_size = largest_hole; - mm->free_area_cache = addr; - return addr; - } else { - /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } - } + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = PAGE_SIZE; + info.high_limit = current->mm->mmap_base; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + addr = vm_unmapped_area(&info); - /* remember the largest hole we saw so far */ - if (addr + largest_hole < vma->vm_start) - largest_hole = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = (vma->vm_start - len) & huge_page_mask(h); - - } while (len <= vma->vm_start); - -fail: - /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (first_time) { - mm->free_area_cache = base; - largest_hole = 0; - first_time = 0; - goto try_again; - } /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; - addr = hugetlb_get_unmapped_area_bottomup(file, addr0, - len, pgoff, flags); - - /* - * Restore the topdown base: - */ - mm->free_area_cache = base; - mm->cached_hole_size = ~0UL; + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } return addr; } @@ -322,21 +275,102 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return hugetlb_get_unmapped_area_topdown(file, addr, len, pgoff, flags); } +#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ -static __init int setup_hugepagesz(char *opt) +#ifdef CONFIG_HUGETLB_SUPER_PAGES +static __init int __setup_hugepagesz(unsigned long ps) { - unsigned long ps = memparse(opt, &opt); - if (ps == PMD_SIZE) { - hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); - } else if (ps == PUD_SIZE) { - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + int log_ps = __builtin_ctzl(ps); + int level, base_shift; + + if ((1UL << log_ps) != ps || (log_ps & 1) != 0) { + pr_warn("Not enabling %ld byte huge pages;" + " must be a power of four.\n", ps); + return -EINVAL; + } + + if (ps > 64*1024*1024*1024UL) { + pr_warn("Not enabling %ld MB huge pages;" + " largest legal value is 64 GB .\n", ps >> 20); + return -EINVAL; + } else if (ps >= PUD_SIZE) { + static long hv_jpage_size; + if (hv_jpage_size == 0) + hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO); + if (hv_jpage_size != PUD_SIZE) { + pr_warn("Not enabling >= %ld MB huge pages:" + " hypervisor reports size %ld\n", + PUD_SIZE >> 20, hv_jpage_size); + return -EINVAL; + } + level = 0; + base_shift = PUD_SHIFT; + } else if (ps >= PMD_SIZE) { + level = 1; + base_shift = PMD_SHIFT; + } else if (ps > PAGE_SIZE) { + level = 2; + base_shift = PAGE_SHIFT; } else { - pr_err("hugepagesz: Unsupported page size %lu M\n", - ps >> 20); - return 0; + pr_err("hugepagesz: huge page size %ld too small\n", ps); + return -EINVAL; + } + + if (log_ps != base_shift) { + int shift_val = log_ps - base_shift; + if (huge_shift[level] != 0) { + int old_shift = base_shift + huge_shift[level]; + pr_warn("Not enabling %ld MB huge pages;" + " already have size %ld MB.\n", + ps >> 20, (1UL << old_shift) >> 20); + return -EINVAL; + } + if (hv_set_pte_super_shift(level, shift_val) != 0) { + pr_warn("Not enabling %ld MB huge pages;" + " no hypervisor support.\n", ps >> 20); + return -EINVAL; + } + printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20); + huge_shift[level] = shift_val; + } + + hugetlb_add_hstate(log_ps - PAGE_SHIFT); + + return 0; +} + +static bool saw_hugepagesz; + +static __init int setup_hugepagesz(char *opt) +{ + if (!saw_hugepagesz) { + saw_hugepagesz = true; + memset(huge_shift, 0, sizeof(huge_shift)); } - return 1; + return __setup_hugepagesz(memparse(opt, NULL)); } __setup("hugepagesz=", setup_hugepagesz); -#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ +#ifdef ADDITIONAL_HUGE_SIZE +/* + * Provide an additional huge page size if no "hugepagesz" args are given. + * In that case, all the cores have properly set up their hv super_shift + * already, but we need to notify the hugetlb code to enable the + * new huge page size from the Linux point of view. + */ +static __init int add_default_hugepagesz(void) +{ + if (!saw_hugepagesz) { + BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE >= PMD_SIZE || + ADDITIONAL_HUGE_SIZE <= PAGE_SIZE); + BUILD_BUG_ON((PAGE_SIZE << ADDITIONAL_HUGE_SHIFT) != + ADDITIONAL_HUGE_SIZE); + BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT & 1); + hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT); + } + return 0; +} +arch_initcall(add_default_hugepagesz); +#endif + +#endif /* CONFIG_HUGETLB_SUPER_PAGES */ diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index d6e87fda2fb..bfb3127b4df 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -38,7 +38,6 @@ #include <linux/uaccess.h> #include <asm/mmu_context.h> #include <asm/processor.h> -#include <asm/system.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/dma.h> @@ -60,8 +59,6 @@ unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE; EXPORT_SYMBOL(VMALLOC_RESERVE); #endif -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - /* Create an L2 page table */ static pte_t * __init alloc_pte(void) { @@ -85,7 +82,7 @@ static int num_l2_ptes[MAX_NUMNODES]; static void init_prealloc_ptes(int node, int pages) { - BUG_ON(pages & (HV_L2_ENTRIES-1)); + BUG_ON(pages & (PTRS_PER_PTE - 1)); if (pages) { num_l2_ptes[node] = pages; l2_ptes[node] = __alloc_bootmem(pages * sizeof(pte_t), @@ -109,10 +106,8 @@ pte_t *get_prealloc_pte(unsigned long pfn) */ static int initial_heap_home(void) { -#if CHIP_HAS_CBOX_HOME_MAP() if (hash_default) return PAGE_HOME_HASH; -#endif return smp_processor_id(); } @@ -134,14 +129,9 @@ static void __init assign_pte(pmd_t *pmd, pte_t *page_table) #ifdef __tilegx__ -#if HV_L1_SIZE != HV_L2_SIZE -# error Rework assumption that L1 and L2 page tables are same size. -#endif - -/* Since pmd_t arrays and pte_t arrays are the same size, just use casts. */ static inline pmd_t *alloc_pmd(void) { - return (pmd_t *)alloc_pte(); + return __alloc_bootmem(L1_KERNEL_PGTABLE_SIZE, HV_PAGE_TABLE_ALIGN, 0); } static inline void assign_pmd(pud_t *pud, pmd_t *pmd) @@ -158,7 +148,21 @@ void __init shatter_pmd(pmd_t *pmd) assign_pte(pmd, pte); } -#ifdef CONFIG_HIGHMEM +#ifdef __tilegx__ +static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) +{ + pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va); + if (pud_none(*pud)) + assign_pmd(pud, alloc_pmd()); + return pmd_offset(pud, va); +} +#else +static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) +{ + return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va); +} +#endif + /* * This function initializes a certain range of kernel virtual memory * with new bootmem page tables, everywhere page tables are missing in @@ -171,34 +175,24 @@ void __init shatter_pmd(pmd_t *pmd) * checking the pgd every time. */ static void __init page_table_range_init(unsigned long start, - unsigned long end, pgd_t *pgd_base) + unsigned long end, pgd_t *pgd) { - pgd_t *pgd; - int pgd_idx; unsigned long vaddr; - - vaddr = start; - pgd_idx = pgd_index(vaddr); - pgd = pgd_base + pgd_idx; - - for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - pmd_t *pmd = pmd_offset(pud_offset(pgd, vaddr), vaddr); + start = round_down(start, PMD_SIZE); + end = round_up(end, PMD_SIZE); + for (vaddr = start; vaddr < end; vaddr += PMD_SIZE) { + pmd_t *pmd = get_pmd(pgd, vaddr); if (pmd_none(*pmd)) assign_pte(pmd, alloc_pte()); - vaddr += PMD_SIZE; } } -#endif /* CONFIG_HIGHMEM */ - -#if CHIP_HAS_CBOX_HOME_MAP() static int __initdata ktext_hash = 1; /* .text pages */ static int __initdata kdata_hash = 1; /* .data and .bss pages */ int __write_once hash_default = 1; /* kernel allocator pages */ EXPORT_SYMBOL(hash_default); int __write_once kstack_hash = 1; /* if no homecaching, use h4h */ -#endif /* CHIP_HAS_CBOX_HOME_MAP */ /* * CPUs to use to for striping the pages of kernel data. If hash-for-home @@ -216,14 +210,12 @@ int __write_once kdata_huge; /* if no homecaching, small pages */ static pgprot_t __init construct_pgprot(pgprot_t prot, int home) { prot = pte_set_home(prot, home); -#if CHIP_HAS_CBOX_HOME_MAP() if (home == PAGE_HOME_IMMUTABLE) { if (ktext_hash) prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); else prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); } -#endif return prot; } @@ -235,40 +227,28 @@ static pgprot_t __init init_pgprot(ulong address) { int cpu; unsigned long page; - enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; + enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; -#if CHIP_HAS_CBOX_HOME_MAP() /* For kdata=huge, everything is just hash-for-home. */ if (kdata_huge) return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); -#endif /* We map the aliased pages of permanent text inaccessible. */ if (address < (ulong) _sinittext - CODE_DELTA) return PAGE_NONE; - /* - * We map read-only data non-coherent for performance. We could - * use neighborhood caching on TILE64, but it's not clear it's a win. - */ + /* We map read-only data non-coherent for performance. */ if ((address >= (ulong) __start_rodata && address < (ulong) __end_rodata) || address == (ulong) empty_zero_page) { return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE); } - /* As a performance optimization, keep the boot init stack here. */ - if (address >= (ulong)&init_thread_union && - address < (ulong)&init_thread_union + THREAD_SIZE) - return construct_pgprot(PAGE_KERNEL, smp_processor_id()); - #ifndef __tilegx__ -#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() /* Force the atomic_locks[] array page to be hash-for-home. */ if (address == (ulong) atomic_locks) return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); #endif -#endif /* * Everything else that isn't data or bss is heap, so mark it @@ -286,28 +266,18 @@ static pgprot_t __init init_pgprot(ulong address) if (address >= (ulong) _end || address < (ulong) _einitdata) return construct_pgprot(PAGE_KERNEL, initial_heap_home()); -#if CHIP_HAS_CBOX_HOME_MAP() /* Use hash-for-home if requested for data/bss. */ if (kdata_hash) return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); -#endif - - /* - * Make the w1data homed like heap to start with, to avoid - * making it part of the page-striped data area when we're just - * going to convert it to read-only soon anyway. - */ - if (address >= (ulong)__w1data_begin && address < (ulong)__w1data_end) - return construct_pgprot(PAGE_KERNEL, initial_heap_home()); /* * Otherwise we just hand out consecutive cpus. To avoid * requiring this function to hold state, we just walk forward from - * _sdata by PAGE_SIZE, skipping the readonly and init data, to reach - * the requested address, while walking cpu home around kdata_mask. - * This is typically no more than a dozen or so iterations. + * __end_rodata by PAGE_SIZE, skipping the readonly and init data, to + * reach the requested address, while walking cpu home around + * kdata_mask. This is typically no more than a dozen or so iterations. */ - page = (((ulong)__w1data_end) + PAGE_SIZE - 1) & PAGE_MASK; + page = (((ulong)__end_rodata) + PAGE_SIZE - 1) & PAGE_MASK; BUG_ON(address < page || address >= (ulong)_end); cpu = cpumask_first(&kdata_mask); for (; page < address; page += PAGE_SIZE) { @@ -317,11 +287,9 @@ static pgprot_t __init init_pgprot(ulong address) if (page == (ulong)empty_zero_page) continue; #ifndef __tilegx__ -#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() if (page == (ulong)atomic_locks) continue; #endif -#endif cpu = cpumask_next(cpu, &kdata_mask); if (cpu == NR_CPUS) cpu = cpumask_first(&kdata_mask); @@ -364,7 +332,7 @@ static int __init setup_ktext(char *str) ktext_arg_seen = 1; - /* Default setting on Tile64: use a huge page */ + /* Default setting: use a huge page */ if (strcmp(str, "huge") == 0) pr_info("ktext: using one huge locally cached page\n"); @@ -410,28 +378,11 @@ static inline pgprot_t ktext_set_nocache(pgprot_t prot) { if (!ktext_nocache) prot = hv_pte_set_nc(prot); -#if CHIP_HAS_NC_AND_NOALLOC_BITS() else prot = hv_pte_set_no_alloc_l2(prot); -#endif return prot; } -#ifndef __tilegx__ -static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) -{ - return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va); -} -#else -static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) -{ - pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va); - if (pud_none(*pud)) - assign_pmd(pud, alloc_pmd()); - return pmd_offset(pud, va); -} -#endif - /* Temporary page table we use for staging. */ static pgd_t pgtables[PTRS_PER_PGD] __attribute__((aligned(HV_PAGE_TABLE_ALIGN))); @@ -452,6 +403,7 @@ static pgd_t pgtables[PTRS_PER_PGD] */ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) { + unsigned long long irqmask; unsigned long address, pfn; pmd_t *pmd; pte_t *pte; @@ -460,7 +412,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) struct cpumask kstripe_mask; int rc, i; -#if CHIP_HAS_CBOX_HOME_MAP() if (ktext_arg_seen && ktext_hash) { pr_warning("warning: \"ktext\" boot argument ignored" " if \"kcache_hash\" sets up text hash-for-home\n"); @@ -477,7 +428,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) " kcache_hash=all or =allbutstack\n"); kdata_huge = 0; } -#endif /* * Set up a mask for cpus to use for kernel striping. @@ -558,8 +508,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } } - address = MEM_SV_INTRPT; + address = MEM_SV_START; pmd = get_pmd(pgtables, address); + pfn = 0; /* code starts at PA 0 */ if (ktext_small) { /* Allocate an L2 PTE for the kernel text */ int cpu = 0; @@ -581,11 +532,16 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) prot = ktext_set_nocache(prot); } - BUG_ON(address != (unsigned long)_stext); - pfn = 0; /* code starts at PA 0 */ - pte = alloc_pte(); - for (pte_ofs = 0; address < (unsigned long)_einittext; - pfn++, pte_ofs++, address += PAGE_SIZE) { + BUG_ON(address != (unsigned long)_text); + pte = NULL; + for (; address < (unsigned long)_einittext; + pfn++, address += PAGE_SIZE) { + pte_ofs = pte_index(address); + if (pte_ofs == 0) { + if (pte) + assign_pte(pmd++, pte); + pte = alloc_pte(); + } if (!ktext_local) { prot = set_remote_cache_cpu(prot, cpu); cpu = cpumask_next(cpu, &ktext_mask); @@ -594,17 +550,16 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } pte[pte_ofs] = pfn_pte(pfn, prot); } - assign_pte(pmd, pte); + if (pte) + assign_pte(pmd, pte); } else { pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); pteval = pte_mkhuge(pteval); -#if CHIP_HAS_CBOX_HOME_MAP() if (ktext_hash) { pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_HASH_L3); pteval = ktext_set_nocache(pteval); } else -#endif /* CHIP_HAS_CBOX_HOME_MAP() */ if (cpumask_weight(&ktext_mask) == 1) { pteval = set_remote_cache_cpu(pteval, cpumask_first(&ktext_mask)); @@ -617,7 +572,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) else pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_NO_L3); - *(pte_t *)pmd = pteval; + for (; address < (unsigned long)_einittext; + pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE) + *(pte_t *)(pmd++) = pfn_pte(pfn, pteval); } /* Set swapper_pgprot here so it is flushed to memory right away. */ @@ -632,10 +589,13 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) * - install pgtables[] as the real page table * - flush the TLB so the new page table takes effect */ + irqmask = interrupt_mask_save_mask(); + interrupt_mask_set_mask(-1ULL); rc = flush_and_install_context(__pa(pgtables), init_pgprot((unsigned long)pgtables), __get_cpu_var(current_asid), cpumask_bits(my_cpu_mask)); + interrupt_mask_restore_mask(irqmask); BUG_ON(rc != 0); /* Copy the page table back to the normal swapper_pg_dir. */ @@ -698,6 +658,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) #endif /* CONFIG_HIGHMEM */ +#ifndef CONFIG_64BIT static void __init init_free_pfn_range(unsigned long start, unsigned long end) { unsigned long pfn; @@ -727,7 +688,7 @@ static void __init init_free_pfn_range(unsigned long start, unsigned long end) } init_page_count(page); __free_pages(page, order); - totalram_pages += count; + adjust_managed_page_count(page, count); page += count; pfn += count; @@ -740,16 +701,15 @@ static void __init set_non_bootmem_pages_init(void) for_each_zone(z) { unsigned long start, end; int nid = z->zone_pgdat->node_id; +#ifdef CONFIG_HIGHMEM int idx = zone_idx(z); +#endif start = z->zone_start_pfn; - if (start == 0) - continue; /* bootmem */ end = start + z->spanned_pages; - if (idx == ZONE_NORMAL) { - BUG_ON(start != node_start_pfn[nid]); - start = node_free_pfn[nid]; - } + start = max(start, node_free_pfn[nid]); + start = max(start, max_low_pfn); + #ifdef CONFIG_HIGHMEM if (idx == ZONE_HIGHMEM) totalhigh_pages += z->spanned_pages; @@ -770,6 +730,7 @@ static void __init set_non_bootmem_pages_init(void) init_free_pfn_range(start, end); } } +#endif /* * paging_init() sets up the page tables - note that all of lowmem is @@ -777,9 +738,6 @@ static void __init set_non_bootmem_pages_init(void) */ void __init paging_init(void) { -#ifdef CONFIG_HIGHMEM - unsigned long vaddr, end; -#endif #ifdef __tilegx__ pud_t *pud; #endif @@ -787,14 +745,11 @@ void __init paging_init(void) kernel_physical_mapping_init(pgd_base); + /* Fixed mappings, only the page table structure has to be created. */ + page_table_range_init(fix_to_virt(__end_of_fixed_addresses - 1), + FIXADDR_TOP, pgd_base); + #ifdef CONFIG_HIGHMEM - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; - page_table_range_init(vaddr, end, pgd_base); permanent_kmaps_init(pgd_base); #endif @@ -806,7 +761,7 @@ void __init paging_init(void) * changing init_mm once we get up and running, and there's no * need for e.g. vmalloc_sync_all(). */ - BUILD_BUG_ON(pgd_index(VMALLOC_START) != pgd_index(VMALLOC_END)); + BUILD_BUG_ON(pgd_index(VMALLOC_START) != pgd_index(VMALLOC_END - 1)); pud = pud_offset(pgd_base + pgd_index(VMALLOC_START), VMALLOC_START); assign_pmd(pud, alloc_pmd()); #endif @@ -831,15 +786,13 @@ static void __init set_max_mapnr_init(void) void __init mem_init(void) { - int codesize, datasize, initsize; int i; #ifndef __tilegx__ void *last; #endif #ifdef CONFIG_FLATMEM - if (!mem_map) - BUG(); + BUG_ON(!mem_map); #endif #ifdef CONFIG_HIGHMEM @@ -857,24 +810,14 @@ void __init mem_init(void) set_max_mapnr_init(); /* this will put all bootmem onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); +#ifndef CONFIG_64BIT /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ set_non_bootmem_pages_init(); +#endif - codesize = (unsigned long)&_etext - (unsigned long)&_text; - datasize = (unsigned long)&_end - (unsigned long)&_sdata; - initsize = (unsigned long)&_einittext - (unsigned long)&_sinittext; - initsize += (unsigned long)&_einitdata - (unsigned long)&_sinitdata; - - pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - datasize >> 10, - initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) - ); + mem_init_print_info(NULL); /* * In debug mode, dump some interesting memory mappings. @@ -885,10 +828,6 @@ void __init mem_init(void) printk(KERN_DEBUG " PKMAP %#lx - %#lx\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1); #endif -#ifdef CONFIG_HUGEVMAP - printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n", - HUGE_VMAP_BASE, HUGE_VMAP_END - 1); -#endif printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n", _VMALLOC_START, _VMALLOC_END - 1); #ifdef __tilegx__ @@ -944,6 +883,14 @@ int remove_memory(u64 start, u64 size) { return -EINVAL; } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + /* TODO */ + return -EBUSY; +} +#endif #endif struct kmem_cache *pgd_cache; @@ -955,26 +902,6 @@ void __init pgtable_cache_init(void) panic("pgtable_cache_init(): Cannot create pgd cache"); } -#if !CHIP_HAS_COHERENT_LOCAL_CACHE() -/* - * The __w1data area holds data that is only written during initialization, - * and is read-only and thus freely cacheable thereafter. Fix the page - * table entries that cover that region accordingly. - */ -static void mark_w1data_ro(void) -{ - /* Loop over page table entries */ - unsigned long addr = (unsigned long)__w1data_begin; - BUG_ON((addr & (PAGE_SIZE-1)) != 0); - for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) { - unsigned long pfn = kaddr_to_pfn((void *)addr); - pte_t *ptep = virt_to_pte(NULL, addr); - BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */ - set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO)); - } -} -#endif - #ifdef CONFIG_DEBUG_PAGEALLOC static long __write_once initfree; #else @@ -985,7 +912,7 @@ static long __write_once initfree = 1; static int __init set_initfree(char *str) { long val; - if (strict_strtol(str, 0, &val) == 0) { + if (kstrtol(str, 0, &val) == 0) { initfree = val; pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't"); @@ -1014,7 +941,7 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end) */ int pfn = kaddr_to_pfn((void *)addr); struct page *page = pfn_to_page(pfn); - pte_t *ptep = virt_to_pte(NULL, addr); + pte_t *ptep = virt_to_kpte(addr); if (!initfree) { /* * If debugging page accesses then do not free @@ -1025,31 +952,24 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end) pte_clear(&init_mm, addr, ptep); continue; } - __ClearPageReserved(page); - init_page_count(page); if (pte_huge(*ptep)) BUG_ON(!kdata_huge); else set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL)); memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_page(addr); - totalram_pages++; + free_reserved_page(page); } pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10); } void free_initmem(void) { - const unsigned long text_delta = MEM_SV_INTRPT - PAGE_OFFSET; + const unsigned long text_delta = MEM_SV_START - PAGE_OFFSET; /* - * Evict the dirty initdata on the boot cpu, evict the w1data - * wherever it's homed, and evict all the init code everywhere. - * We are guaranteed that no one will touch the init pages any - * more, and although other cpus may be touching the w1data, - * we only actually change the caching on tile64, which won't - * be keeping local copies in the other tiles' caches anyway. + * Evict the cache on all cores to avoid incoherence. + * We are guaranteed that no one will touch the init pages any more. */ homecache_evict(&cpu_cacheable_map); @@ -1060,26 +980,11 @@ void free_initmem(void) /* * Free the pages mapped from 0xc0000000 that correspond to code - * pages from MEM_SV_INTRPT that we won't use again after init. + * pages from MEM_SV_START that we won't use again after init. */ free_init_pages("unused kernel text", (unsigned long)_sinittext - text_delta, (unsigned long)_einittext - text_delta); - -#if !CHIP_HAS_COHERENT_LOCAL_CACHE() - /* - * Upgrade the .w1data section to globally cached. - * We don't do this on tilepro, since the cache architecture - * pretty much makes it irrelevant, and in any case we end - * up having racing issues with other tiles that may touch - * the data after we flush the cache but before we update - * the PTEs and flush the TLBs, causing sharer shootdowns - * later. Even though this is to clean data, it seems like - * an unnecessary complication. - */ - mark_w1data_ro(); -#endif - /* Do a global TLB flush so everyone sees the changes. */ flush_tlb_all(); } diff --git a/arch/tile/mm/migrate.h b/arch/tile/mm/migrate.h index cd45a0837fa..91683d97917 100644 --- a/arch/tile/mm/migrate.h +++ b/arch/tile/mm/migrate.h @@ -24,6 +24,9 @@ /* * This function is used as a helper when setting up the initial * page table (swapper_pg_dir). + * + * You must mask ALL interrupts prior to invoking this code, since + * you can't legally touch the stack during the cache flush. */ extern int flush_and_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid, @@ -39,6 +42,9 @@ extern int flush_and_install_context(HV_PhysAddr page_table, HV_PTE access, * * Note that any non-NULL pointers must not point to the page that * is handled by the stack_pte itself. + * + * You must mask ALL interrupts prior to invoking this code, since + * you can't legally touch the stack during the cache flush. */ extern int homecache_migrate_stack_and_flush(pte_t stack_pte, unsigned long va, size_t length, pte_t *stack_ptep, diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S index ac01a7cdf77..772085491bf 100644 --- a/arch/tile/mm/migrate_32.S +++ b/arch/tile/mm/migrate_32.S @@ -40,8 +40,7 @@ #define FRAME_R32 16 #define FRAME_R33 20 #define FRAME_R34 24 -#define FRAME_R35 28 -#define FRAME_SIZE 32 +#define FRAME_SIZE 28 @@ -66,12 +65,11 @@ #define r_my_cpumask r5 /* Locals (callee-save); must not be more than FRAME_xxx above. */ -#define r_save_ics r30 -#define r_context_lo r31 -#define r_context_hi r32 -#define r_access_lo r33 -#define r_access_hi r34 -#define r_asid r35 +#define r_context_lo r30 +#define r_context_hi r31 +#define r_access_lo r32 +#define r_access_hi r33 +#define r_asid r34 STD_ENTRY(flush_and_install_context) /* @@ -104,11 +102,7 @@ STD_ENTRY(flush_and_install_context) sw r_tmp, r33 addi r_tmp, sp, FRAME_R34 } - { - sw r_tmp, r34 - addi r_tmp, sp, FRAME_R35 - } - sw r_tmp, r35 + sw r_tmp, r34 /* Move some arguments to callee-save registers. */ { @@ -121,13 +115,6 @@ STD_ENTRY(flush_and_install_context) } move r_asid, r_asid_in - /* Disable interrupts, since we can't use our stack. */ - { - mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION - movei r_tmp, 1 - } - mtspr INTERRUPT_CRITICAL_SECTION, r_tmp - /* First, flush our L2 cache. */ { move r0, zero /* cache_pa */ @@ -149,7 +136,7 @@ STD_ENTRY(flush_and_install_context) move r8, zero /* asids */ move r9, zero /* asidcount */ } - jal hv_flush_remote + jal _hv_flush_remote bnz r0, .Ldone /* Now install the new page table. */ @@ -163,9 +150,9 @@ STD_ENTRY(flush_and_install_context) } { move r4, r_asid - movei r5, HV_CTX_DIRECTIO + moveli r5, HV_CTX_DIRECTIO | CTX_PAGE_FLAG } - jal hv_install_context + jal _hv_install_context bnz r0, .Ldone /* Finally, flush the TLB. */ @@ -175,9 +162,6 @@ STD_ENTRY(flush_and_install_context) } .Ldone: - /* Reset interrupts back how they were before. */ - mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics - /* Restore the callee-saved registers and return. */ addli lr, sp, FRAME_SIZE { @@ -202,10 +186,6 @@ STD_ENTRY(flush_and_install_context) } { lw r34, r_tmp - addli r_tmp, sp, FRAME_R35 - } - { - lw r35, r_tmp addi sp, sp, FRAME_SIZE } jrp lr diff --git a/arch/tile/mm/migrate_64.S b/arch/tile/mm/migrate_64.S new file mode 100644 index 00000000000..a49eee38f87 --- /dev/null +++ b/arch/tile/mm/migrate_64.S @@ -0,0 +1,167 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * This routine is a helper for migrating the home of a set of pages to + * a new cpu. See the documentation in homecache.c for more information. + */ + +#include <linux/linkage.h> +#include <linux/threads.h> +#include <asm/page.h> +#include <asm/thread_info.h> +#include <asm/types.h> +#include <asm/asm-offsets.h> +#include <hv/hypervisor.h> + + .text + +/* + * First, some definitions that apply to all the code in the file. + */ + +/* Locals (caller-save) */ +#define r_tmp r10 +#define r_save_sp r11 + +/* What we save where in the stack frame; must include all callee-saves. */ +#define FRAME_SP 8 +#define FRAME_R30 16 +#define FRAME_R31 24 +#define FRAME_R32 32 +#define FRAME_SIZE 40 + + + + +/* + * On entry: + * + * r0 the new context PA to install (moved to r_context) + * r1 PTE to use for context access (moved to r_access) + * r2 ASID to use for new context (moved to r_asid) + * r3 pointer to cpumask with just this cpu set in it (r_my_cpumask) + */ + +/* Arguments (caller-save) */ +#define r_context_in r0 +#define r_access_in r1 +#define r_asid_in r2 +#define r_my_cpumask r3 + +/* Locals (callee-save); must not be more than FRAME_xxx above. */ +#define r_context r30 +#define r_access r31 +#define r_asid r32 + +/* + * Caller-save locals and frame constants are the same as + * for homecache_migrate_stack_and_flush. + */ + +STD_ENTRY(flush_and_install_context) + /* + * Create a stack frame; we can't touch it once we flush the + * cache until we install the new page table and flush the TLB. + */ + { + move r_save_sp, sp + st sp, lr + addi sp, sp, -FRAME_SIZE + } + addi r_tmp, sp, FRAME_SP + { + st r_tmp, r_save_sp + addi r_tmp, sp, FRAME_R30 + } + { + st r_tmp, r30 + addi r_tmp, sp, FRAME_R31 + } + { + st r_tmp, r31 + addi r_tmp, sp, FRAME_R32 + } + st r_tmp, r32 + + /* Move some arguments to callee-save registers. */ + { + move r_context, r_context_in + move r_access, r_access_in + } + move r_asid, r_asid_in + + /* First, flush our L2 cache. */ + { + move r0, zero /* cache_pa */ + moveli r1, hw2_last(HV_FLUSH_EVICT_L2) /* cache_control */ + } + { + shl16insli r1, r1, hw1(HV_FLUSH_EVICT_L2) + move r2, r_my_cpumask /* cache_cpumask */ + } + { + shl16insli r1, r1, hw0(HV_FLUSH_EVICT_L2) + move r3, zero /* tlb_va */ + } + { + move r4, zero /* tlb_length */ + move r5, zero /* tlb_pgsize */ + } + { + move r6, zero /* tlb_cpumask */ + move r7, zero /* asids */ + } + { + move r8, zero /* asidcount */ + jal _hv_flush_remote + } + bnez r0, 1f + + /* Now install the new page table. */ + { + move r0, r_context + move r1, r_access + } + { + move r2, r_asid + moveli r3, HV_CTX_DIRECTIO | CTX_PAGE_FLAG + } + jal _hv_install_context + bnez r0, 1f + + /* Finally, flush the TLB. */ + { + movei r0, 0 /* preserve_global */ + jal hv_flush_all + } + +1: /* Restore the callee-saved registers and return. */ + addli lr, sp, FRAME_SIZE + { + ld lr, lr + addli r_tmp, sp, FRAME_R30 + } + { + ld r30, r_tmp + addli r_tmp, sp, FRAME_R31 + } + { + ld r31, r_tmp + addli r_tmp, sp, FRAME_R32 + } + { + ld r32, r_tmp + addi sp, sp, FRAME_SIZE + } + jrp lr + STD_ENDPROC(flush_and_install_context) diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c index f96f4cec602..851a94e6ae5 100644 --- a/arch/tile/mm/mmap.c +++ b/arch/tile/mm/mmap.c @@ -58,18 +58,36 @@ void arch_pick_mmap_layout(struct mm_struct *mm) #else int is_32bit = 0; #endif + unsigned long random_factor = 0UL; + + /* + * 8 bits of randomness in 32bit mmaps, 24 address space bits + * 12 bits of randomness in 64bit mmaps, 28 address space bits + */ + if (current->flags & PF_RANDOMIZE) { + if (is_32bit) + random_factor = get_random_int() % (1<<8); + else + random_factor = get_random_int() % (1<<12); + + random_factor <<= PAGE_SHIFT; + } /* * Use standard layout if the expected stack growth is unlimited * or we are running native 64 bits. */ - if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) { - mm->mmap_base = TASK_UNMAPPED_BASE; + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(mm); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long range_end = mm->brk + 0x02000000; + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; +} diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index de7d8e21e01..5e86eac4bfa 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -27,7 +27,6 @@ #include <linux/vmalloc.h> #include <linux/smp.h> -#include <asm/system.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/fixmap.h> @@ -62,7 +61,7 @@ void show_mem(unsigned int filter) global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE), global_page_state(NR_FILE_PAGES), - nr_swap_pages); + get_nr_swap_pages()); for_each_zone(zone) { unsigned long flags, order, total = 0, largest_order = -1; @@ -84,64 +83,6 @@ void show_mem(unsigned int filter) } } -/* - * Associate a virtual page frame with a given physical page frame - * and protection flags for that frame. - */ -static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = swapper_pg_dir + pgd_index(vaddr); - if (pgd_none(*pgd)) { - BUG(); - return; - } - pud = pud_offset(pgd, vaddr); - if (pud_none(*pud)) { - BUG(); - return; - } - pmd = pmd_offset(pud, vaddr); - if (pmd_none(*pmd)) { - BUG(); - return; - } - pte = pte_offset_kernel(pmd, vaddr); - /* <pfn,flags> stored as-is, to permit clearing entries */ - set_pte(pte, pfn_pte(pfn, flags)); - - /* - * It's enough to flush this one mapping. - * This appears conservative since it is only called - * from __set_fixmap. - */ - local_flush_tlb_page(NULL, vaddr, PAGE_SIZE); -} - -void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - set_pte_pfn(address, phys >> PAGE_SHIFT, flags); -} - -#if defined(CONFIG_HIGHPTE) -pte_t *_pte_offset_map(pmd_t *dir, unsigned long address) -{ - pte_t *pte = kmap_atomic(pmd_page(*dir)) + - (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK; - return &pte[pte_index(address)]; -} -#endif - /** * shatter_huge_page() - ensure a given address is mapped by a small page. * @@ -178,23 +119,19 @@ void shatter_huge_page(unsigned long addr) if (!pmd_huge_page(*pmd)) return; - /* - * Grab the pgd_lock, since we may need it to walk the pgd_list, - * and since we need some kind of lock here to avoid races. - */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock_irqsave(&init_mm.page_table_lock, flags); if (!pmd_huge_page(*pmd)) { /* Lost the race to convert the huge page. */ - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); return; } /* Shatter the huge page into the preallocated L2 page table. */ - pmd_populate_kernel(&init_mm, pmd, - get_prealloc_pte(pte_pfn(*(pte_t *)pmd))); + pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd))); #ifdef __PAGETABLE_PMD_FOLDED /* Walk every pgd on the system and update the pmd there. */ + spin_lock(&pgd_lock); list_for_each(pos, &pgd_list) { pmd_t *copy_pmd; pgd = list_to_pgd(pos) + pgd_index(addr); @@ -202,6 +139,7 @@ void shatter_huge_page(unsigned long addr) copy_pmd = pmd_offset(pud, addr); __set_pmd(copy_pmd, *pmd); } + spin_unlock(&pgd_lock); #endif /* Tell every cpu to notice the change. */ @@ -209,7 +147,7 @@ void shatter_huge_page(unsigned long addr) cpu_possible_mask, NULL, 0); /* Hold the lock until the TLB flush is finished to avoid races. */ - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); } /* @@ -218,9 +156,13 @@ void shatter_huge_page(unsigned long addr) * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. - * The locking scheme was chosen on the basis of manfred's - * recommendations and having no core impact whatsoever. - * -- wli + * + * The lock is always taken with interrupts disabled, unlike on x86 + * and other platforms, because we need to take the lock in + * shatter_huge_page(), which may be called from an interrupt context. + * We are not at risk from the tlbflush IPI deadlock that was seen on + * x86, since we use the flush_remote() API to have the hypervisor do + * the TLB flushes regardless of irq disabling. */ DEFINE_SPINLOCK(pgd_lock); LIST_HEAD(pgd_list); @@ -288,35 +230,32 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) #define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER) -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, + int order) { gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO; struct page *p; -#if L2_USER_PGTABLE_ORDER > 0 int i; -#endif - -#ifdef CONFIG_HIGHPTE - flags |= __GFP_HIGHMEM; -#endif p = alloc_pages(flags, L2_USER_PGTABLE_ORDER); if (p == NULL) return NULL; -#if L2_USER_PGTABLE_ORDER > 0 + if (!pgtable_page_ctor(p)) { + __free_pages(p, L2_USER_PGTABLE_ORDER); + return NULL; + } + /* * Make every page have a page_count() of one, not just the first. * We don't use __GFP_COMP since it doesn't look like it works * correctly with tlb_remove_page(). */ - for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { + for (i = 1; i < order; ++i) { init_page_count(p+i); inc_zone_page_state(p+i, NR_PAGETABLE); } -#endif - pgtable_page_ctor(p); return p; } @@ -325,28 +264,28 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) * process). We have to correct whatever pte_alloc_one() did before * returning the pages to the allocator. */ -void pte_free(struct mm_struct *mm, struct page *p) +void pgtable_free(struct mm_struct *mm, struct page *p, int order) { int i; pgtable_page_dtor(p); __free_page(p); - for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { + for (i = 1; i < order; ++i) { __free_page(p+i); dec_zone_page_state(p+i, NR_PAGETABLE); } } -void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, - unsigned long address) +void __pgtable_free_tlb(struct mmu_gather *tlb, struct page *pte, + unsigned long address, int order) { int i; pgtable_page_dtor(pte); tlb_remove_page(tlb, pte); - for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { + for (i = 1; i < order; ++i) { tlb_remove_page(tlb, pte + i); dec_zone_page_state(pte + i, NR_PAGETABLE); } @@ -389,6 +328,17 @@ void ptep_set_wrprotect(struct mm_struct *mm, #endif +/* + * Return a pointer to the PTE that corresponds to the given + * address in the given page table. A NULL page table just uses + * the standard kernel page table; the preferred API in this case + * is virt_to_kpte(). + * + * The returned pointer can point to a huge page in other levels + * of the page table than the bottom, if the huge page is present + * in the page table. For bottom-level PTEs, the returned pointer + * can point to a PTE that is either present or not. + */ pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr) { pgd_t *pgd; @@ -402,13 +352,23 @@ pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr) pud = pud_offset(pgd, addr); if (!pud_present(*pud)) return NULL; + if (pud_huge_page(*pud)) + return (pte_t *)pud; pmd = pmd_offset(pud, addr); - if (pmd_huge_page(*pmd)) - return (pte_t *)pmd; if (!pmd_present(*pmd)) return NULL; + if (pmd_huge_page(*pmd)) + return (pte_t *)pmd; return pte_offset_kernel(pmd, addr); } +EXPORT_SYMBOL(virt_to_pte); + +pte_t *virt_to_kpte(unsigned long kaddr) +{ + BUG_ON(kaddr < PAGE_OFFSET); + return virt_to_pte(NULL, kaddr); +} +EXPORT_SYMBOL(virt_to_kpte); pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu) { @@ -470,10 +430,18 @@ void __set_pte(pte_t *ptep, pte_t pte) void set_pte(pte_t *ptep, pte_t pte) { - struct page *page = pfn_to_page(pte_pfn(pte)); - - /* Update the home of a PTE if necessary */ - pte = pte_set_home(pte, page_home(page)); + if (pte_present(pte) && + (!CHIP_HAS_MMIO() || hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) { + /* The PTE actually references physical memory. */ + unsigned long pfn = pte_pfn(pte); + if (pfn_valid(pfn)) { + /* Update the home of the PTE from the struct page. */ + pte = pte_set_home(pte, page_home(pfn_to_page(pfn))); + } else if (hv_pte_get_mode(pte) == 0) { + /* remap_pfn_range(), etc, must supply PTE mode. */ + panic("set_pte(): out-of-range PFN and mode 0\n"); + } + } __set_pte(ptep, pte); } @@ -481,7 +449,7 @@ void set_pte(pte_t *ptep, pte_t pte) /* Can this mm load a PTE with cached_priority set? */ static inline int mm_is_priority_cached(struct mm_struct *mm) { - return mm->context.priority_cached; + return mm->context.priority_cached != 0; } /* @@ -491,8 +459,8 @@ static inline int mm_is_priority_cached(struct mm_struct *mm) void start_mm_caching(struct mm_struct *mm) { if (!mm_is_priority_cached(mm)) { - mm->context.priority_cached = -1U; - hv_set_caching(-1U); + mm->context.priority_cached = -1UL; + hv_set_caching(-1UL); } } @@ -507,7 +475,7 @@ void start_mm_caching(struct mm_struct *mm) * Presumably we'll come back later and have more luck and clear * the value then; for now we'll just keep the cache marked for priority. */ -static unsigned int update_priority_cached(struct mm_struct *mm) +static unsigned long update_priority_cached(struct mm_struct *mm) { if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) { struct vm_area_struct *vm; @@ -575,20 +543,13 @@ void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, addr = area->addr; if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, phys_addr, pgprot)) { - remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); + free_vm_area(area); return NULL; } return (__force void __iomem *) (offset + (char *)addr); } EXPORT_SYMBOL(ioremap_prot); -/* Map a PCI MMIO bus address into VA space. */ -void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) -{ - panic("ioremap for PCI MMIO is not supported"); -} -EXPORT_SYMBOL(ioremap); - /* Unmap an MMIO VA mapping. */ void iounmap(volatile void __iomem *addr_in) { @@ -606,12 +567,7 @@ void iounmap(volatile void __iomem *addr_in) in parallel. Reuse of the virtual address is prevented by leaving it in the global lists until we're done with it. cpa takes care of the direct mappings. */ - read_lock(&vmlist_lock); - for (p = vmlist; p; p = p->next) { - if (p->addr == addr) - break; - } - read_unlock(&vmlist_lock); + p = find_vm_area((void *)addr); if (!p) { pr_err("iounmap: bad address %p\n", addr); |
