aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.h4
-rw-r--r--lib/Target/ARM/ARM.td16
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp22
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp25
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp33
-rw-r--r--lib/Target/ARM/ARMCallingConv.h2
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp20
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp2
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp29
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h6
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp143
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp10
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp121
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp76
-rw-r--r--lib/Target/ARM/ARMISelLowering.h8
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp8
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--lib/Target/ARM/ARMNaClRewritePass.cpp2
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td3
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp3
-rw-r--r--lib/Target/ARM/ARMSubtarget.h8
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp15
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h22
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp159
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmLexer.cpp134
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp186
-rw-r--r--lib/Target/ARM/AsmParser/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/CMakeLists.txt2
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp16
-rwxr-xr-xlib/Target/ARM/LICENSE.TXT47
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp28
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp16
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp201
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h27
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp18
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp14
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/Makefile2
-rw-r--r--lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp4
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp17
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp6
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp250
-rw-r--r--lib/Target/CMakeLists.txt1
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp26
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h2
-rw-r--r--lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp8
-rw-r--r--[-rwxr-xr-x]lib/Target/Hexagon/HexagonAsmPrinter.h0
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h2
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td9
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td159
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td364
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h13
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp8
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp12
-rw-r--r--lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp2
-rw-r--r--lib/Target/LLVMBuild.txt2
-rw-r--r--lib/Target/MBlaze/AsmParser/CMakeLists.txt1
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp112
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp49
-rw-r--r--lib/Target/MBlaze/CMakeLists.txt1
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp6
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h5
-rw-r--r--lib/Target/MBlaze/InstPrinter/CMakeLists.txt2
-rw-r--r--lib/Target/MBlaze/MBlazeAsmPrinter.cpp8
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.cpp4
-rw-r--r--lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp8
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp10
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp10
-rw-r--r--lib/Target/MBlaze/MBlazeMCInstLower.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp6
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp3
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h10
-rw-r--r--lib/Target/MBlaze/MBlazeTargetObjectFile.cpp6
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp4
-rw-r--r--lib/Target/MBlaze/Makefile3
-rw-r--r--lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp2
-rw-r--r--lib/Target/MSP430/InstPrinter/CMakeLists.txt2
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp6
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp4
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp10
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp16
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h1
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h11
-rw-r--r--lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp2
-rw-r--r--lib/Target/Mangler.cpp6
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp282
-rw-r--r--lib/Target/Mips/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp27
-rw-r--r--lib/Target/Mips/InstPrinter/CMakeLists.txt2
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp5
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp3
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp14
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp8
-rw-r--r--lib/Target/Mips/Makefile2
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp48
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp139
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h28
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td42
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp36
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h16
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td311
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp8
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp4
-rw-r--r--lib/Target/Mips/MipsCondMov.td127
-rw-r--r--lib/Target/Mips/MipsDSPInstrFormats.td5
-rw-r--r--lib/Target/Mips/MipsDSPInstrInfo.td6
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp4
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp8
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp84
-rw-r--r--lib/Target/Mips/MipsISelLowering.h5
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td420
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td553
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td928
-rw-r--r--lib/Target/Mips/MipsJITInfo.cpp2
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp2
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp2
-rw-r--r--lib/Target/Mips/MipsNaClRewritePass.cpp2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp4
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td45
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp4
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp10
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp6
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp13
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h23
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp6
-rw-r--r--lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTX.h4
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h2
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp26
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h2
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h2
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp14
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp18
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h2
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h4
-rw-r--r--lib/Target/NVPTX/NVPTXSplitBBatBar.cpp8
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h12
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp10
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h8
-rw-r--r--lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp2
-rw-r--r--lib/Target/NVPTX/VectorElementize.cpp77
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp86
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp31
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h3
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h3
-rw-r--r--lib/Target/PowerPC/PPC.h4
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp225
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp2
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp10
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp9
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp26
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp110
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h66
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td73
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td13
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp6
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp11
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp3
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h11
-rw-r--r--lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp2
-rw-r--r--lib/Target/R600/AMDGPU.h49
-rw-r--r--lib/Target/R600/AMDGPU.td40
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp138
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.h44
-rw-r--r--lib/Target/R600/AMDGPUCodeEmitter.h49
-rw-r--r--lib/Target/R600/AMDGPUConvertToISA.cpp62
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp417
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h144
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp257
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h148
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td74
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td190
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td62
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.cpp83
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.h34
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp51
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h63
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.td22
-rw-r--r--lib/Target/R600/AMDGPUStructurizeCFG.cpp714
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp87
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h65
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp142
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h70
-rw-r--r--lib/Target/R600/AMDIL.h106
-rw-r--r--lib/Target/R600/AMDIL7XXDevice.cpp115
-rw-r--r--lib/Target/R600/AMDIL7XXDevice.h72
-rw-r--r--lib/Target/R600/AMDILBase.td85
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp3049
-rw-r--r--lib/Target/R600/AMDILDevice.cpp124
-rw-r--r--lib/Target/R600/AMDILDevice.h117
-rw-r--r--lib/Target/R600/AMDILDeviceInfo.cpp94
-rw-r--r--lib/Target/R600/AMDILDeviceInfo.h88
-rw-r--r--lib/Target/R600/AMDILDevices.h19
-rw-r--r--lib/Target/R600/AMDILEvergreenDevice.cpp169
-rw-r--r--lib/Target/R600/AMDILEvergreenDevice.h93
-rw-r--r--lib/Target/R600/AMDILFrameLowering.cpp47
-rw-r--r--lib/Target/R600/AMDILFrameLowering.h40
-rw-r--r--lib/Target/R600/AMDILISelDAGToDAG.cpp485
-rw-r--r--lib/Target/R600/AMDILISelLowering.cpp651
-rw-r--r--lib/Target/R600/AMDILInstrInfo.td208
-rw-r--r--lib/Target/R600/AMDILIntrinsicInfo.cpp79
-rw-r--r--lib/Target/R600/AMDILIntrinsicInfo.h49
-rw-r--r--lib/Target/R600/AMDILIntrinsics.td242
-rw-r--r--lib/Target/R600/AMDILNIDevice.cpp65
-rw-r--r--lib/Target/R600/AMDILNIDevice.h57
-rw-r--r--lib/Target/R600/AMDILPeepholeOptimizer.cpp1215
-rw-r--r--lib/Target/R600/AMDILRegisterInfo.td107
-rw-r--r--lib/Target/R600/AMDILSIDevice.cpp45
-rw-r--r--lib/Target/R600/AMDILSIDevice.h39
-rw-r--r--lib/Target/R600/CMakeLists.txt55
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp132
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h52
-rw-r--r--lib/Target/R600/InstPrinter/CMakeLists.txt7
-rw-r--r--lib/Target/R600/InstPrinter/LLVMBuild.txt24
-rw-r--r--lib/Target/R600/InstPrinter/Makefile15
-rw-r--r--lib/Target/R600/LLVMBuild.txt32
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp90
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp85
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h30
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h60
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp113
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h55
-rw-r--r--lib/Target/R600/MCTargetDesc/CMakeLists.txt10
-rw-r--r--lib/Target/R600/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/R600/MCTargetDesc/Makefile16
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp574
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp298
-rw-r--r--lib/Target/R600/Makefile23
-rw-r--r--lib/Target/R600/Processors.td29
-rw-r--r--lib/Target/R600/R600Defines.h79
-rw-r--r--lib/Target/R600/R600ExpandSpecialInstrs.cpp334
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp909
-rw-r--r--lib/Target/R600/R600ISelLowering.h72
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp666
-rw-r--r--lib/Target/R600/R600InstrInfo.h168
-rw-r--r--lib/Target/R600/R600Instructions.td1724
-rw-r--r--lib/Target/R600/R600Intrinsics.td32
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.cpp34
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h39
-rw-r--r--lib/Target/R600/R600RegisterInfo.cpp89
-rw-r--r--lib/Target/R600/R600RegisterInfo.h55
-rw-r--r--lib/Target/R600/R600RegisterInfo.td107
-rw-r--r--lib/Target/R600/R600Schedule.td36
-rw-r--r--lib/Target/R600/SIAnnotateControlFlow.cpp329
-rw-r--r--lib/Target/R600/SIAssignInterpRegs.cpp152
-rw-r--r--lib/Target/R600/SIISelLowering.cpp511
-rw-r--r--lib/Target/R600/SIISelLowering.h62
-rw-r--r--lib/Target/R600/SIInstrFormats.td146
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp89
-rw-r--r--lib/Target/R600/SIInstrInfo.h62
-rw-r--r--lib/Target/R600/SIInstrInfo.td589
-rw-r--r--lib/Target/R600/SIInstructions.td1351
-rw-r--r--lib/Target/R600/SIIntrinsics.td52
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp331
-rw-r--r--lib/Target/R600/SILowerLiteralConstants.cpp108
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp20
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.h34
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp48
-rw-r--r--lib/Target/R600/SIRegisterInfo.h47
-rw-r--r--lib/Target/R600/SIRegisterInfo.td167
-rw-r--r--lib/Target/R600/SISchedule.td15
-rw-r--r--lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp26
-rw-r--r--lib/Target/R600/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/R600/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/R600/TargetInfo/Makefile15
-rw-r--r--lib/Target/README.txt15
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp4
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp2
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp6
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h11
-rw-r--r--lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp2
-rw-r--r--lib/Target/Target.cpp5
-rw-r--r--lib/Target/TargetIntrinsicInfo.cpp2
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp10
-rw-r--r--lib/Target/TargetMachine.cpp6
-rw-r--r--lib/Target/TargetMachineC.cpp4
-rw-r--r--lib/Target/TargetTransformImpl.cpp372
-rw-r--r--lib/Target/X86/AsmParser/CMakeLists.txt1
-rw-r--r--lib/Target/X86/AsmParser/X86AsmLexer.cpp159
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp101
-rw-r--r--lib/Target/X86/CMakeLists.txt3
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp6
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp10
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp19
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp12
-rw-r--r--lib/Target/X86/Makefile3
-rw-r--r--lib/Target/X86/README.txt37
-rw-r--r--lib/Target/X86/TargetInfo/X86TargetInfo.cpp2
-rw-r--r--lib/Target/X86/X86.h7
-rw-r--r--lib/Target/X86/X86.td12
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp24
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp2
-rw-r--r--lib/Target/X86/X86FastISel.cpp44
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp2
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp29
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp19
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1312
-rw-r--r--lib/Target/X86/X86ISelLowering.h70
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td342
-rw-r--r--lib/Target/X86/X86InstrCMovSetCC.td12
-rw-r--r--lib/Target/X86/X86InstrCompiler.td258
-rw-r--r--lib/Target/X86/X86InstrFMA.td18
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td6
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp711
-rw-r--r--lib/Target/X86/X86InstrInfo.td144
-rw-r--r--lib/Target/X86/X86InstrSSE.td1589
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td6
-rw-r--r--lib/Target/X86/X86JITInfo.cpp2
-rw-r--r--lib/Target/X86/X86JITInfo.h2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp2
-rw-r--r--lib/Target/X86/X86NaClRewriteFinalPass.cpp2
-rw-r--r--lib/Target/X86/X86PadShortFunction.cpp212
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp17
-rw-r--r--lib/Target/X86/X86Schedule.td5
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td1
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp3
-rw-r--r--lib/Target/X86/X86Subtarget.h7
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp29
-rw-r--r--lib/Target/X86/X86TargetMachine.h26
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp6
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp383
-rw-r--r--lib/Target/XCore/CMakeLists.txt4
-rw-r--r--lib/Target/XCore/Disassembler/CMakeLists.txt5
-rw-r--r--lib/Target/XCore/Disassembler/LLVMBuild.txt23
-rw-r--r--lib/Target/XCore/Disassembler/Makefile16
-rw-r--r--lib/Target/XCore/Disassembler/XCoreDisassembler.cpp324
-rw-r--r--lib/Target/XCore/InstPrinter/CMakeLists.txt7
-rw-r--r--lib/Target/XCore/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/XCore/InstPrinter/Makefile16
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp97
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.h44
-rw-r--r--lib/Target/XCore/LLVMBuild.txt3
-rw-r--r--lib/Target/XCore/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp14
-rw-r--r--lib/Target/XCore/Makefile6
-rw-r--r--lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp2
-rw-r--r--lib/Target/XCore/XCore.td6
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp66
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp11
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp12
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp12
-rw-r--r--lib/Target/XCore/XCoreInstrFormats.td127
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td355
-rw-r--r--lib/Target/XCore/XCoreMCInstLower.cpp117
-rw-r--r--lib/Target/XCore/XCoreMCInstLower.h42
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp4
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.td6
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp4
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h11
382 files changed, 29021 insertions, 6011 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 0ac92f1ee8..0ad4183679 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -51,6 +51,10 @@ FunctionPass *createThumb2SizeReductionPass();
FunctionPass *createARMNaClRewritePass();
/* @LOCALMOD-END */
+/// \brief Creates an ARM-specific Target Transformation Info pass.
+ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
+
+
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 5ea251a795..a76715a092 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -89,6 +89,10 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;
+def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
+ "AvoidMOVsShifterOperand", "true",
+ "Avoid movs instructions with shifter operand">;
+
// Some processors perform return stack prediction. CodeGen should avoid issue
// "normal" call instructions to callees which do not return.
def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
@@ -152,6 +156,7 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
[FeatureNEONForFP, FeatureT2XtPk,
FeatureVFP4, FeatureMP, FeatureHWDiv,
FeatureHWDivARM, FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
FeatureHasSlowFPVMLx]>;
// FIXME: It has not been determined if A15 has these features.
@@ -159,6 +164,12 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
"Cortex-A15 ARM processors",
[FeatureT2XtPk, FeatureFP16,
FeatureAvoidPartialCPSR]>;
+def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
+ "Cortex-R5 ARM processors",
+ [FeatureSlowFPBrcc, FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
class ProcNoItin<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
@@ -243,6 +254,11 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
def : ProcessorModel<"cortex-a15", CortexA9Model,
[ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureHasRAS]>;
+// FIXME: R5 has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r5", CortexA8Model,
+ [ProcR5, HasV7Ops, FeatureDB,
+ FeatureVFP3, FeatureDSPThumb2,
+ FeatureHasRAS]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 3d59374aee..737a498dcd 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -29,9 +29,11 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -41,7 +43,6 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -49,7 +50,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Type.h"
#include <cctype>
using namespace llvm;
@@ -189,7 +189,7 @@ namespace {
const size_t TagHeaderSize = 1 + 4;
Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
- Streamer.EmitBytes(CurrentVendor, 0);
+ Streamer.EmitBytes(CurrentVendor);
Streamer.EmitIntValue(0, 1); // '\0'
Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
@@ -199,14 +199,14 @@ namespace {
// emit each field as its type (ULEB or String)
for (unsigned int i=0; i<Contents.size(); ++i) {
AttributeItemType item = Contents[i];
- Streamer.EmitULEB128IntValue(item.Tag, 0);
+ Streamer.EmitULEB128IntValue(item.Tag);
switch (item.Type) {
default: llvm_unreachable("Invalid attribute type");
case AttributeItemType::NumericAttribute:
- Streamer.EmitULEB128IntValue(item.IntValue, 0);
+ Streamer.EmitULEB128IntValue(item.IntValue);
break;
case AttributeItemType::TextAttribute:
- Streamer.EmitBytes(item.StringValue.upper(), 0);
+ Streamer.EmitBytes(item.StringValue.upper());
Streamer.EmitIntValue(0, 1); // '\0'
break;
}
@@ -761,7 +761,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(0, 4/*size*/);
else
// Internal to current translation unit.
//
@@ -771,7 +771,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// We need to fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
- 4/*size*/, 0/*addrspace*/);
+ 4/*size*/);
}
Stubs.clear();
@@ -789,7 +789,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
OutContext),
- 4/*size*/, 0/*addrspace*/);
+ 4/*size*/);
}
Stubs.clear();
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 2e90866f27..b6c8d108ee 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -27,9 +27,9 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
@@ -464,8 +464,9 @@ PredicateInstruction(MachineInstr *MI,
unsigned Opc = MI->getOpcode();
if (isUncondBranchOpcode(Opc)) {
MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
- MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
- MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
return true;
}
@@ -1154,6 +1155,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
// All clear, widen the COPY.
DEBUG(dbgs() << "widening: " << *MI);
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
// Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg
// or some other super-register.
@@ -1165,14 +1167,14 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
MI->setDesc(get(ARM::VMOVD));
MI->getOperand(0).setReg(DstRegD);
MI->getOperand(1).setReg(SrcRegD);
- AddDefaultPred(MachineInstrBuilder(MI));
+ AddDefaultPred(MIB);
// We are now reading SrcRegD instead of SrcRegS. This may upset the
// register scavenger and machine verifier, so we need to indicate that we
// are reading an undefined value from SrcRegD, but a proper value from
// SrcRegS.
MI->getOperand(1).setIsUndef();
- MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit);
+ MIB.addReg(SrcRegS, RegState::Implicit);
// SrcRegD may actually contain an unrelated value in the ssub_1
// sub-register. Don't kill it. Only kill the ssub_0 sub-register.
@@ -1716,7 +1718,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
// same register as operand 0.
MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
FalseReg.setImplicit();
- NewMI->addOperand(FalseReg);
+ NewMI.addOperand(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
// The caller will erase MI, but not DefMI.
@@ -3329,8 +3331,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI->getParent()->getParent();
- if (MF->getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize))
+ if (MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize))
--Latency;
}
return Latency;
@@ -3821,7 +3824,7 @@ void
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
unsigned DstReg, SrcReg, DReg;
unsigned Lane;
- MachineInstrBuilder MIB(MI);
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
const TargetRegisterInfo *TRI = &getRegisterInfo();
switch (MI->getOpcode()) {
default:
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index e814fda1f8..72e23aafff 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -28,11 +28,10 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -45,19 +44,9 @@
using namespace llvm;
-static cl::opt<bool>
-ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
- cl::desc("Force use of virtual base registers for stack load/store"));
-static cl::opt<bool>
-EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
- cl::desc("Enable pre-regalloc stack frame index allocation"));
-static cl::opt<bool>
-EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
- cl::desc("Enable use of a base pointer for complex stack frames"));
-
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
- : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti),
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti),
FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
BasePtr(ARM::R6) {
}
@@ -284,9 +273,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- if (!EnableBasePointer)
- return false;
-
// When outgoing call frames are so large that we adjust the stack pointer
// around the call, we can no longer use the stack pointer to reach the
// emergency spill slot.
@@ -332,8 +318,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// pointer adjustments around calls.
if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
return true;
- if (!EnableBasePointer)
- return false;
// A base pointer is required and allowed. Check that it isn't too late to
// reserve it.
return MRI->canReserveReg(BasePtr);
@@ -346,7 +330,8 @@ needsStackRealignment(const MachineFunction &MF) const {
unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
- F->getFnAttributes().hasAttribute(Attributes::StackAlignment));
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
return requiresRealignment && canRealignStack(MF);
}
@@ -423,7 +408,7 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
requiresVirtualBaseRegisters(const MachineFunction &MF) const {
- return EnableLocalStackAlloc;
+ return true;
}
static void
@@ -562,8 +547,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
case ARM::VLDRS: case ARM::VLDRD:
case ARM::VSTRS: case ARM::VSTRD:
case ARM::tSTRspi: case ARM::tLDRspi:
- if (ForceAllBaseRegAlloc)
- return true;
break;
default:
return false;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 0bd1c3ee2f..e6e8c3d5fa 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -18,8 +18,8 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 4b6a768e89..5e8e1739a9 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -28,9 +28,9 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -371,12 +371,16 @@ FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
}
bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
- MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget());
+
+ assert((Target.getRelocationModel() != Reloc::Default ||
+ Target.getRelocationModel() != Reloc::Static) &&
"JIT relocation model must be set to static or default!");
- JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo();
- II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo();
- TD = MF.getTarget().getDataLayout();
+
+ JTI = static_cast<ARMJITInfo*>(Target.getJITInfo());
+ II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo());
+ TD = Target.getDataLayout();
+
Subtarget = &TM.getSubtarget<ARMSubtarget>();
MCPEs = &MF.getConstantPool()->getConstants();
MJTEs = 0;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 54ccbdddaa..57c2cce36c 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -26,7 +26,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 1820b323f8..4e703ec3c1 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -14,11 +14,11 @@
#include "ARMConstantPoolValue.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Constant.h"
-#include "llvm/Constants.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Type.h"
#include <cstdlib>
using namespace llvm;
@@ -206,11 +206,7 @@ ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s,
bool AddCurrentAddress)
: ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
AddCurrentAddress),
- S(strdup(s)) {}
-
-ARMConstantPoolSymbol::~ARMConstantPoolSymbol() {
- free((void*)S);
-}
+ S(s) {}
ARMConstantPoolSymbol *
ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
@@ -218,14 +214,6 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false);
}
-static bool CPV_streq(const char *S1, const char *S2) {
- if (S1 == S2)
- return true;
- if (S1 && S2 && strcmp(S1, S2) == 0)
- return true;
- return false;
-}
-
int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
unsigned AlignMask = Alignment - 1;
@@ -238,7 +226,7 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV);
if (!APS) continue;
- if (CPV_streq(APS->S, S) && equals(APS))
+ if (APS->S == S && equals(APS))
return i;
}
}
@@ -248,12 +236,11 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV);
- return ACPS && CPV_streq(ACPS->S, S) &&
- ARMConstantPoolValue::hasSameValue(ACPV);
+ return ACPS && ACPS->S == S && ARMConstantPoolValue::hasSameValue(ACPV);
}
void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
- ID.AddPointer(S);
+ ID.AddString(S);
ARMConstantPoolValue::addSelectionDAGCSEId(ID);
}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 24f2fcb666..b6e0fe7c7c 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -164,19 +164,17 @@ public:
/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
/// symbols.
class ARMConstantPoolSymbol : public ARMConstantPoolValue {
- const char *S; // ExtSymbol being loaded.
+ const std::string S; // ExtSymbol being loaded.
ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id,
unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
bool AddCurrentAddress);
public:
- ~ARMConstantPoolSymbol();
-
static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s,
unsigned ID, unsigned char PCAdj);
- const char *getSymbol() const { return S; }
+ const char *getSymbol() const { return S.c_str(); }
virtual int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 9cacf1b000..e072a2cb85 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -20,7 +20,6 @@
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -30,13 +29,14 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -178,24 +178,24 @@ class ARMFastISel : public FastISel {
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt);
- bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+ bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
- bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
- void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
+ void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
bool ARMIsMemCpySmall(uint64_t Len);
bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
unsigned Alignment);
- unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
- unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
- unsigned ARMMaterializeInt(const Constant *C, EVT VT);
- unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
- unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
- unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+ unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
+ unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned ARMMaterializeInt(const Constant *C, MVT VT);
+ unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
+ unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
+ unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
unsigned ARMSelectCallOp(bool UseReg);
- unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, EVT VT);
+ unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
// Call handling routines.
private:
@@ -221,7 +221,7 @@ class ARMFastISel : public FastISel {
bool isARMNEONPred(const MachineInstr *MI);
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
- void AddLoadStoreOperands(EVT VT, Address &Addr,
+ void AddLoadStoreOperands(MVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
unsigned Flags, bool useAM3);
};
@@ -487,7 +487,7 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
// TODO: Don't worry about 64-bit now, but when this is fixed remove the
// checks from the various callers.
-unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
if (VT == MVT::f64) return 0;
unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
@@ -497,7 +497,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
return MoveReg;
}
-unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
if (VT == MVT::i64) return 0;
unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
@@ -510,7 +510,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
// For double width floating point we need to materialize two constants
// (the high and the low) into integer registers then use a move to get
// the combined constant into an FP reg.
-unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
const APFloat Val = CFP->getValueAPF();
bool is64bit = VT == MVT::f64;
@@ -554,7 +554,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
return DestReg;
}
-unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
return false;
@@ -616,7 +616,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
return DestReg;
}
-unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// For now 32-bit only.
if (VT != MVT::i32) return 0;
@@ -724,10 +724,11 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
// This assert should help detect some regressions early.
assert(!FlagSfiDisableCP && "unexpected call to TargetMaterializeConstant");
// @LOCALMOD-END
- EVT VT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
- if (!VT.isSimple()) return 0;
+ if (!CEVT.isSimple()) return 0;
+ MVT VT = CEVT.getSimpleVT();
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return ARMMaterializeFP(CFP, VT);
@@ -903,12 +904,9 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
return Addr.Base.Reg != 0;
}
-void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
-
- assert(VT.isSimple() && "Non-simple types are invalid here!");
-
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
bool needsLowering = false;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: llvm_unreachable("Unhandled load/store type!");
case MVT::i1:
case MVT::i8:
@@ -959,13 +957,12 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
}
}
-void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
const MachineInstrBuilder &MIB,
unsigned Flags, bool useAM3) {
// addrmode5 output depends on the selection dag addressing dividing the
// offset by 4 that it then later multiplies. Do this here as well.
- if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
- VT.getSimpleVT().SimpleTy == MVT::f64)
+ if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
Addr.Offset /= 4;
// Frame base works a bit differently. Handle it separately.
@@ -1008,14 +1005,13 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
AddOptionalDefs(MIB);
}
-bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment, bool isZExt, bool allocReg) {
- assert(VT.isSimple() && "Non-simple types are invalid here!");
unsigned Opc;
bool useAM3 = false;
bool needVMOV = false;
const TargetRegisterClass *RC;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1:
@@ -1132,11 +1128,11 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
return true;
}
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment) {
unsigned StrOpc;
bool useAM3 = false;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1: {
@@ -1410,8 +1406,9 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt) {
Type *Ty = Src1Value->getType();
- EVT SrcVT = TLI.getValueType(Ty, true);
- if (!SrcVT.isSimple()) return false;
+ EVT SrcEVT = TLI.getValueType(Ty, true);
+ if (!SrcEVT.isSimple()) return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
if (isFloat && !Subtarget->hasVFP2())
@@ -1448,7 +1445,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
unsigned CmpOpc;
bool isICmp = true;
bool needsExt = false;
- switch (SrcVT.getSimpleVT().SimpleTy) {
+ switch (SrcVT.SimpleTy) {
default: return false;
// TODO: Verify compares.
case MVT::f32:
@@ -1600,7 +1597,10 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
return false;
Value *Src = I->getOperand(0);
- EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ if (!SrcEVT.isSimple())
+ return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
@@ -1609,8 +1609,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
// Handle sign-extension.
if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
- EVT DestVT = MVT::i32;
- SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT,
+ SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
/*isZExt*/!isSigned);
if (SrcReg == 0) return false;
}
@@ -1816,7 +1815,9 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
}
bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
- EVT VT = TLI.getValueType(I->getType(), true);
+ EVT FPVT = TLI.getValueType(I->getType(), true);
+ if (!FPVT.isSimple()) return false;
+ MVT VT = FPVT.getSimpleVT();
// We can get here in the case when we want to use NEON for our fp
// operations, but can't figure out how to. Just use the vfp instructions
@@ -1847,7 +1848,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
unsigned Op2 = getRegForValue(I->getOperand(1));
if (Op2 == 0) return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addReg(Op1).addReg(Op2));
@@ -2060,7 +2061,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
if (RVLocs.size() == 2 && RetVT == MVT::f64) {
// For this move we copy into two registers and then move into the
// double fp reg we want.
- EVT DestVT = RVLocs[0].getValVT();
+ MVT DestVT = RVLocs[0].getValVT();
const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
unsigned ResultReg = createResultReg(DstRC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -2075,7 +2076,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
UpdateValueMap(I, ResultReg);
} else {
assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
- EVT CopyVT = RVLocs[0].getValVT();
+ MVT CopyVT = RVLocs[0].getValVT();
// Special handling for extended integers.
if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
@@ -2106,8 +2107,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
- Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -2134,8 +2134,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
return false;
unsigned SrcReg = Reg + VA.getValNo();
- EVT RVVT = TLI.getValueType(RV->getType());
- EVT DestVT = VA.getValVT();
+ EVT RVEVT = TLI.getValueType(RV->getType());
+ if (!RVEVT.isSimple()) return false;
+ MVT RVVT = RVEVT.getSimpleVT();
+ MVT DestVT = VA.getValVT();
// Special handling for extended integers.
if (RVVT != DestVT) {
if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
@@ -2180,7 +2182,9 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
GlobalValue::ExternalLinkage, 0, Name);
- return ARMMaterializeGV(GV, TLI.getValueType(GV->getType()));
+ EVT LCREVT = TLI.getValueType(GV->getType());
+ if (!LCREVT.isSimple()) return 0;
+ return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
}
// A quick function that will emit a call for a named libcall in F with the
@@ -2340,16 +2344,16 @@ bool ARMFastISel::SelectCall(const Instruction *I,
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attributes::SExt))
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attributes::ZExt))
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
// FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attributes::InReg) ||
- CS.paramHasAttr(AttrInd, Attributes::StructRet) ||
- CS.paramHasAttr(AttrInd, Attributes::Nest) ||
- CS.paramHasAttr(AttrInd, Attributes::ByVal))
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+ CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+ CS.paramHasAttr(AttrInd, Attribute::ByVal))
return false;
Type *ArgTy = (*i)->getType();
@@ -2592,7 +2596,7 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {
return true;
}
-unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
+unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool isZExt) {
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
return 0;
@@ -2600,8 +2604,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
unsigned Opc;
bool isBoolZext = false;
const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
- if (!SrcVT.isSimple()) return 0;
- switch (SrcVT.getSimpleVT().SimpleTy) {
+ switch (SrcVT.SimpleTy) {
default: return 0;
case MVT::i16:
if (!Subtarget->hasV6Ops()) return 0;
@@ -2648,14 +2651,18 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
- EVT SrcVT, DestVT;
- SrcVT = TLI.getValueType(SrcTy, true);
- DestVT = TLI.getValueType(DestTy, true);
-
bool isZExt = isa<ZExtInst>(I);
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg) return false;
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(SrcTy, true);
+ DestEVT = TLI.getValueType(DestTy, true);
+ if (!SrcEVT.isSimple()) return false;
+ if (!DestEVT.isSimple()) return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
if (ResultReg == 0) return false;
UpdateValueMap(I, ResultReg);
@@ -2835,7 +2842,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
}
unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
- unsigned Align, EVT VT) {
+ unsigned Align, MVT VT) {
bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
ARMConstantPoolConstant *CPV =
ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index e335141d24..0f162e9514 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -16,14 +16,13 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Function.h"
-#include "llvm/Function.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
// @LOCALMOD-START
@@ -779,7 +778,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
if (DeleteRet) {
- MIB->copyImplicitOps(&*MI);
+ MIB.copyImplicitOps(&*MI);
MI->eraseFromParent();
}
MI = MIB;
@@ -1236,7 +1235,8 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
return;
// Naked functions don't spill callee-saved registers.
- if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+ if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
return;
// We are planning to use NEON instructions vst1 / vld1.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 00214663c7..f4e77f3255 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -16,17 +16,17 @@
#include "ARMBaseInstrInfo.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -87,6 +87,8 @@ public:
return "ARM Instruction Selection";
}
+ virtual void PreprocessISelDAG();
+
/// getI32Imm - Return a target constant of type i32 with the specified
/// value.
inline SDValue getI32Imm(unsigned Imm) {
@@ -339,6 +341,87 @@ static bool isScaledConstantInRange(SDValue Node, int Scale,
return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
}
+void ARMDAGToDAGISel::PreprocessISelDAG() {
+ if (!Subtarget->hasV6T2Ops())
+ return;
+
+ bool isThumb2 = Subtarget->isThumb();
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ if (N->getOpcode() != ISD::ADD)
+ continue;
+
+ // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
+ // leading zeros, followed by consecutive set bits, followed by 1 or 2
+ // trailing zeros, e.g. 1020.
+ // Transform the expression to
+ // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
+ // of trailing zeros of c2. The left shift would be folded as an shifter
+ // operand of 'add' and the 'and' and 'srl' would become a bits extraction
+ // node (UBFX).
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned And_imm = 0;
+ if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
+ if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
+ std::swap(N0, N1);
+ }
+ if (!And_imm)
+ continue;
+
+ // Check if the AND mask is an immediate of the form: 000.....1111111100
+ unsigned TZ = CountTrailingZeros_32(And_imm);
+ if (TZ != 1 && TZ != 2)
+ // Be conservative here. Shifter operands aren't always free. e.g. On
+ // Swift, left shifter operand of 1 / 2 for free but others are not.
+ // e.g.
+ // ubfx r3, r1, #16, #8
+ // ldr.w r3, [r0, r3, lsl #2]
+ // vs.
+ // mov.w r9, #1020
+ // and.w r2, r9, r1, lsr #14
+ // ldr r2, [r0, r2]
+ continue;
+ And_imm >>= TZ;
+ if (And_imm & (And_imm + 1))
+ continue;
+
+ // Look for (and (srl X, c1), c2).
+ SDValue Srl = N1.getOperand(0);
+ unsigned Srl_imm = 0;
+ if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
+ (Srl_imm <= 2))
+ continue;
+
+ // Make sure first operand is not a shifter operand which would prevent
+ // folding of the left shift.
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (isThumb2) {
+ if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
+ continue;
+ } else {
+ if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
+ SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
+ continue;
+ }
+
+ // Now make the transformation.
+ Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32,
+ Srl.getOperand(0),
+ CurDAG->getConstant(Srl_imm+TZ, MVT::i32));
+ N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32,
+ Srl, CurDAG->getConstant(And_imm, MVT::i32));
+ N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32,
+ N1, CurDAG->getConstant(TZ, MVT::i32));
+ CurDAG->UpdateNodeOperands(N, N0, N1);
+ }
+}
+
/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
/// least on current ARM implementations) which should be avoidded.
@@ -2218,10 +2301,10 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
if (!Subtarget->hasV6T2Ops())
return NULL;
- unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+ unsigned Opc = isSigned
+ ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
: (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
-
// For unsigned extracts, check for a shift right and mask
unsigned And_imm = 0;
if (N->getOpcode() == ISD::AND) {
@@ -2239,7 +2322,29 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
// Note: The width operand is encoded as width-1.
unsigned Width = CountTrailingOnes_32(And_imm) - 1;
unsigned LSB = Srl_imm;
+
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
+ // It's cheaper to use a right shift to extract the top bits.
+ if (Subtarget->isThumb()) {
+ Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ CurDAG->getTargetConstant(LSB, MVT::i32),
+ getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ }
+
+ // ARM models shift instructions as MOVsi with shifter operand.
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
+ SDValue ShOpc =
+ CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB),
+ MVT::i32);
+ SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
+ getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5);
+ }
+
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
CurDAG->getTargetConstant(Width, MVT::i32),
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 45d52b62ac..0e15313b9c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -25,7 +25,6 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -35,19 +34,20 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
// @LOCALMOD-START
namespace llvm {
@@ -904,10 +904,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// due to the common occurrence of cross class copies and subregister insertions
// and extractions.
std::pair<const TargetRegisterClass*, uint8_t>
-ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+ARMTargetLowering::findRepresentativeClass(MVT VT) const{
const TargetRegisterClass *RRC = 0;
uint8_t Cost = 1;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(VT);
// Use DPR as representative register class for all floating point
@@ -1091,7 +1091,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
-const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive D registers.
@@ -1661,8 +1661,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::MinSize);
+ bool HasMinSizeAttr = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
@@ -6899,7 +6899,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
DefRegs[OI->getReg()] = true;
}
- MachineInstrBuilder MIB(&*II);
+ MachineInstrBuilder MIB(*MF, &*II);
for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
unsigned Reg = SavedRegs[i];
@@ -6976,8 +6976,9 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
- if (!MF->getFunction()->getFnAttributes().
- hasAttribute(Attributes::NoImplicitFloat) &&
+ if (!MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
if ((Align % 16 == 0) && SizeVal >= 16) {
ldrOpc = ARM::VLD1q32wb_fixed;
@@ -9741,33 +9742,33 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
const Function *F = MF.getFunction();
// See if we can use NEON instructions for this...
- if (IsZeroVal &&
+ if ((!IsMemset || ZeroMemset) &&
Subtarget->hasNEON() &&
- !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
+ !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat)) {
bool Fast;
- if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) ||
- (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) &&
- Fast))) {
+ if (Size >= 16 &&
+ (memOpAlign(SrcAlign, DstAlign, 16) ||
+ (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) {
return MVT::v2f64;
- } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) ||
- (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) &&
- Fast))) {
+ } else if (Size >= 8 &&
+ (memOpAlign(SrcAlign, DstAlign, 8) ||
+ (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) {
return MVT::f64;
}
}
// Lowering to i32/i16 if the size permits.
- if (Size >= 4) {
+ if (Size >= 4)
return MVT::i32;
- } else if (Size >= 2) {
+ else if (Size >= 2)
return MVT::i16;
- }
// Let the target-independent logic figure it out.
return MVT::Other;
@@ -10570,24 +10571,6 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return false;
}
-bool ARMTargetLowering::isIntImmLegal(const APInt &Imm, EVT VT) const {
- if (VT.getSizeInBits() > 32)
- return false;
-
- int32_t ImmVal = Imm.getSExtValue();
- if (!Subtarget->isThumb()) {
- return (ImmVal >= 0 && ImmVal < 65536) ||
- (ARM_AM::getSOImmVal(ImmVal) != -1) ||
- (ARM_AM::getSOImmVal(~ImmVal) != -1);
- } else if (Subtarget->isThumb2()) {
- return (ImmVal >= 0 && ImmVal < 65536) ||
- (ARM_AM::getT2SOImmVal(ImmVal) != -1) ||
- (ARM_AM::getT2SOImmVal(~ImmVal) != -1);
- } else /*Thumb1*/ {
- return (ImmVal >= 0 && ImmVal < 256);
- }
-}
-
/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
/// specified in the intrinsic calls.
@@ -10669,3 +10652,4 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
}
+
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index c7ee13798f..e099d38200 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -294,7 +294,7 @@ namespace llvm {
virtual EVT getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const;
@@ -369,7 +369,7 @@ namespace llvm {
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
- virtual const TargetRegisterClass *getRegClassFor(EVT VT) const;
+ virtual const TargetRegisterClass *getRegClassFor(MVT VT) const;
/// getMaximalGlobalOffset - Returns the maximal possible offset which can
/// be used for loads / stores from the global.
@@ -390,14 +390,12 @@ namespace llvm {
/// materialize the FP immediate as a load from a constant pool.
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
- virtual bool isIntImmLegal(const APInt &Imm, EVT VT) const;
-
virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const;
protected:
std::pair<const TargetRegisterClass*, uint8_t>
- findRepresentativeClass(EVT VT) const;
+ findRepresentativeClass(MVT VT) const;
private:
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index a0b6f249a2..80f0ec7437 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -22,8 +22,8 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
using namespace llvm;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index caa5135bfc..351a290e2a 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -18,7 +18,7 @@
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Memory.h"
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 8949b50f67..2ae927e1ee 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -31,9 +31,9 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -1418,7 +1418,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
PrevMI->setDesc(TII->get(NewOpc));
MO.setReg(ARM::PC);
- PrevMI->copyImplicitOps(&*MBBI);
+ PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
MBB.erase(MBBI);
return true;
}
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 1bd2a93a8b..5a65efe3b5 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -16,7 +16,7 @@
#include "ARMAsmPrinter.h"
#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Constants.h"
+#include "llvm/IR/Constants.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/Mangler.h"
diff --git a/lib/Target/ARM/ARMNaClRewritePass.cpp b/lib/Target/ARM/ARMNaClRewritePass.cpp
index d26a35848d..317b84a5da 100644
--- a/lib/Target/ARM/ARMNaClRewritePass.cpp
+++ b/lib/Target/ARM/ARMNaClRewritePass.cpp
@@ -27,7 +27,7 @@
#include "ARMNaClRewritePass.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/CommandLine.h"
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 404634fee9..4191931a5a 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1887,6 +1887,9 @@ def CortexA9Model : SchedMachineModel {
let LoadLatency = 2; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
+ let ILPWindow = 10; // Don't reschedule small blocks to hide
+ // latency. Minimum latency requirements are already
+ // modeled strictly by reserving resources.
let MispredictPenalty = 8; // Based on estimate of pipeline depth.
let Itineraries = CortexA9Itineraries;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 72be065d64..0056562719 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -14,7 +14,7 @@
#define DEBUG_TYPE "arm-selectiondag-info"
#include "ARMTargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/DerivedTypes.h"
using namespace llvm;
ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index f5ea64d42b..0da446aa02 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -14,7 +14,7 @@
#include "ARMSubtarget.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -84,6 +84,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
, HasDataBarrier(false)
, Pref32BitThumb(false)
, AvoidCPSRPartialUpdate(false)
+ , AvoidMOVsShifterOperand(false)
, HasRAS(false)
, HasMPExtension(false)
, FPOnlySP(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 28975f9243..c0e834c648 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -39,7 +39,7 @@ class StringRef;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA5, CortexA8, CortexA9, CortexA15, Swift
+ Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift
};
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
@@ -145,6 +145,10 @@ protected:
/// CPSR setting instruction.
bool AvoidCPSRPartialUpdate;
+ /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
+ /// movs with shifter operand (i.e. asr, lsl, lsr).
+ bool AvoidMOVsShifterOperand;
+
/// HasRAS - Some processors perform return stack prediction. CodeGen should
/// avoid issue "normal" call instructions to callees which do not return.
bool HasRAS;
@@ -225,6 +229,7 @@ protected:
bool isSwift() const { return ARMProcFamily == Swift; }
bool isCortexM3() const { return CPUString == "cortex-m3"; }
bool isLikeA9() const { return isCortexA9() || isCortexA15(); }
+ bool isCortexR5() const { return ARMProcFamily == CortexR5; }
bool hasARMOps() const { return !NoARM; }
@@ -246,6 +251,7 @@ protected:
bool isFPOnlySP() const { return FPOnlySP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRAS() const { return HasRAS; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasThumb2DSP() const { return Thumb2DSP; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 533be838d8..c02e981e54 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -58,6 +58,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
this->Options.FloatABIType = FloatABI::Soft;
}
+void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our ARM pass. This
+ // allows the ARM pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createARMTargetTransformInfoPass(this));
+}
+
+
void ARMTargetMachine::anchor() { }
ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
@@ -77,8 +86,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
"v128:64:128-v64:64:64-n32-S32")),
TLInfo(*this),
TSInfo(*this),
- FrameLowering(Subtarget),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ FrameLowering(Subtarget) {
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
"support ARM mode execution!");
@@ -110,8 +118,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
TSInfo(*this),
FrameLowering(Subtarget.hasThumb2()
? new ARMFrameLowering(Subtarget)
- : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
}
namespace {
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 2d265afa1b..e6ad5979f9 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -24,10 +24,9 @@
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/OwningPtr.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
// @LOCALMOD-START
#include "llvm/Support/CommandLine.h"
@@ -58,6 +57,9 @@ public:
return &InstrItins;
}
+ /// \brief Register ARM analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
+
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
@@ -73,8 +75,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
ARMFrameLowering FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -96,12 +96,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
virtual const ARMFrameLowering *getFrameLowering() const {
return &FrameLowering;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const DataLayout *getDataLayout() const { return &DL; }
};
@@ -119,8 +113,6 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
ARMSelectionDAGInfo TSInfo;
// Either Thumb1FrameLowering or ARMFrameLowering.
OwningPtr<ARMFrameLowering> FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -149,12 +141,6 @@ public:
virtual const ARMFrameLowering *getFrameLowering() const {
return FrameLowering.get();
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual const DataLayout *getDataLayout() const { return &DL; }
};
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
new file mode 100644
index 0000000000..404a6fff11
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -0,0 +1,159 @@
+//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// ARM target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armtti"
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeARMTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class ARMTTI : public ImmutablePass, public TargetTransformInfo {
+ const ARMBaseTargetMachine *TM;
+ const ARMSubtarget *ST;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ ARMTTI() : ImmutablePass(ID), TM(0), ST(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ ARMTTI(const ARMBaseTargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()) {
+ initializeARMTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
+
+ /// @}
+
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool Vector) const {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 16;
+ return 0;
+ }
+
+ if (ST->isThumb1Only())
+ return 8;
+ return 16;
+ }
+
+ unsigned getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 128;
+ return 0;
+ }
+
+ return 32;
+ }
+
+ unsigned getMaximumUnrollFactor() const {
+ // These are out of order CPUs:
+ if (ST->isCortexA15() || ST->isSwift())
+ return 2;
+ return 1;
+ }
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti",
+ "ARM Target Transform Info", true, true, false)
+char ARMTTI::ID = 0;
+
+ImmutablePass *
+llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) {
+ return new ARMTTI(TM);
+}
+
+
+unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned Bits = Ty->getPrimitiveSizeInBits();
+ if (Bits == 0 || Bits > 32)
+ return 4;
+
+ int32_t SImmVal = Imm.getSExtValue();
+ uint32_t ZImmVal = Imm.getZExtValue();
+ if (!ST->isThumb()) {
+ if ((SImmVal >= 0 && SImmVal < 65536) ||
+ (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
+ (ARM_AM::getSOImmVal(~ZImmVal) != -1))
+ return 1;
+ return ST->hasV6T2Ops() ? 2 : 3;
+ } else if (ST->isThumb2()) {
+ if ((SImmVal >= 0 && SImmVal < 65536) ||
+ (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
+ (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
+ return 1;
+ return ST->hasV6T2Ops() ? 2 : 3;
+ } else /*Thumb1*/ {
+ if (SImmVal >= 0 && SImmVal < 256)
+ return 1;
+ if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
+ return 2;
+ // Load from constantpool.
+ return 3;
+ }
+ return 2;
+}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
deleted file mode 100644
index 3ada0f2af6..0000000000
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/ARMBaseInfo.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCTargetAsmLexer.h"
-#include "llvm/Support/TargetRegistry.h"
-#include <map>
-#include <string>
-
-using namespace llvm;
-
-namespace {
-
-class ARMBaseAsmLexer : public MCTargetAsmLexer {
- const MCAsmInfo &AsmInfo;
-
- const AsmToken &lexDefinite() {
- return getLexer()->Lex();
- }
-
- AsmToken LexTokenUAL();
-protected:
- typedef std::map <std::string, unsigned> rmap_ty;
-
- rmap_ty RegisterMap;
-
- void InitRegisterMap(const MCRegisterInfo *info) {
- unsigned numRegs = info->getNumRegs();
-
- for (unsigned i = 0; i < numRegs; ++i) {
- const char *regName = info->getName(i);
- if (regName)
- RegisterMap[regName] = i;
- }
- }
-
- unsigned MatchRegisterName(StringRef Name) {
- rmap_ty::iterator iter = RegisterMap.find(Name.str());
- if (iter != RegisterMap.end())
- return iter->second;
- else
- return 0;
- }
-
- AsmToken LexToken() {
- if (!Lexer) {
- SetError(SMLoc(), "No MCAsmLexer installed");
- return AsmToken(AsmToken::Error, "", 0);
- }
-
- switch (AsmInfo.getAssemblerDialect()) {
- default:
- SetError(SMLoc(), "Unhandled dialect");
- return AsmToken(AsmToken::Error, "", 0);
- case 0:
- return LexTokenUAL();
- }
- }
-public:
- ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : MCTargetAsmLexer(T), AsmInfo(MAI) {
- }
-};
-
-class ARMAsmLexer : public ARMBaseAsmLexer {
-public:
- ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
- : ARMBaseAsmLexer(T, MAI) {
- InitRegisterMap(&MRI);
- }
-};
-
-class ThumbAsmLexer : public ARMBaseAsmLexer {
-public:
- ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI)
- : ARMBaseAsmLexer(T, MAI) {
- InitRegisterMap(&MRI);
- }
-};
-
-} // end anonymous namespace
-
-AsmToken ARMBaseAsmLexer::LexTokenUAL() {
- const AsmToken &lexedToken = lexDefinite();
-
- switch (lexedToken.getKind()) {
- default: break;
- case AsmToken::Error:
- SetError(Lexer->getErrLoc(), Lexer->getErr());
- break;
- case AsmToken::Identifier: {
- std::string lowerCase = lexedToken.getString().lower();
-
- unsigned regID = MatchRegisterName(lowerCase);
- // Check for register aliases.
- // r13 -> sp
- // r14 -> lr
- // r15 -> pc
- // ip -> r12
- // FIXME: Some assemblers support lots of others. Do we want them all?
- if (!regID) {
- regID = StringSwitch<unsigned>(lowerCase)
- .Case("r13", ARM::SP)
- .Case("r14", ARM::LR)
- .Case("r15", ARM::PC)
- .Case("ip", ARM::R12)
- .Default(0);
- }
-
- if (regID)
- return AsmToken(AsmToken::Register,
- lexedToken.getString(),
- static_cast<int64_t>(regID));
- }
- }
-
- return AsmToken(lexedToken);
-}
-
-extern "C" void LLVMInitializeARMAsmLexer() {
- RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget);
- RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
-}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 4685b1d193..e9bdc4ad9b 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -178,7 +178,8 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
+ OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index,
+ SMLoc &EndLoc);
// Asm Match Converter Methods
void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
@@ -270,7 +271,7 @@ public:
namespace {
/// ARMOperand - Instances of this class represent a parsed ARM machine
-/// instruction.
+/// operand.
class ARMOperand : public MCParsedAsmOperand {
enum KindTy {
k_CondCode,
@@ -2450,8 +2451,8 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
StartLoc = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
RegNo = tryParseRegister();
- EndLoc = Parser.getTok().getLoc();
return (RegNo == (unsigned)-1);
}
@@ -2540,6 +2541,8 @@ int ARMAsmParser::tryParseShiftRegister(
if (!PrevOp->isReg())
return Error(PrevOp->getStartLoc(), "shift must be of a register");
int SrcReg = PrevOp->getReg();
+
+ SMLoc EndLoc;
int64_t Imm = 0;
int ShiftReg = 0;
if (ShiftTy == ARM_AM::rrx) {
@@ -2554,7 +2557,7 @@ int ARMAsmParser::tryParseShiftRegister(
Parser.Lex(); // Eat hash.
SMLoc ImmLoc = Parser.getTok().getLoc();
const MCExpr *ShiftExpr = 0;
- if (getParser().ParseExpression(ShiftExpr)) {
+ if (getParser().ParseExpression(ShiftExpr, EndLoc)) {
Error(ImmLoc, "invalid immediate shift value");
return -1;
}
@@ -2579,8 +2582,9 @@ int ARMAsmParser::tryParseShiftRegister(
if (Imm == 0)
ShiftTy = ARM_AM::lsl;
} else if (Parser.getTok().is(AsmToken::Identifier)) {
- ShiftReg = tryParseRegister();
SMLoc L = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
+ ShiftReg = tryParseRegister();
if (ShiftReg == -1) {
Error (L, "expected immediate or register in shift operand");
return -1;
@@ -2595,10 +2599,10 @@ int ARMAsmParser::tryParseShiftRegister(
if (ShiftReg && ShiftTy != ARM_AM::rrx)
Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
ShiftReg, Imm,
- S, Parser.getTok().getLoc()));
+ S, EndLoc));
else
Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm,
- S, Parser.getTok().getLoc()));
+ S, EndLoc));
return 0;
}
@@ -2612,12 +2616,13 @@ int ARMAsmParser::tryParseShiftRegister(
/// parse for a specific register type.
bool ARMAsmParser::
tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &RegTok = Parser.getTok();
int RegNo = tryParseRegister();
if (RegNo == -1)
return true;
- Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
+ Operands.push_back(ARMOperand::CreateReg(RegNo, RegTok.getLoc(),
+ RegTok.getEndLoc()));
const AsmToken &ExclaimTok = Parser.getTok();
if (ExclaimTok.is(AsmToken::Exclaim)) {
@@ -2641,10 +2646,10 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!MCE)
return TokError("immediate value expected for vector index");
- SMLoc E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ SMLoc E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
@@ -2794,7 +2799,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Check for and consume the closing '}'
if (Parser.getTok().isNot(AsmToken::RCurly))
return MatchOperand_ParseFail;
- SMLoc E = Parser.getTok().getLoc();
+ SMLoc E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the '}'
Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E));
@@ -2891,10 +2896,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.getTok().is(AsmToken::Minus)) {
if (Parser.getTok().is(AsmToken::Minus)) {
Parser.Lex(); // Eat the minus.
- SMLoc EndLoc = Parser.getTok().getLoc();
+ SMLoc AfterMinusLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
if (EndReg == -1)
- return Error(EndLoc, "register expected");
+ return Error(AfterMinusLoc, "register expected");
// Allow Q regs and just interpret them as the two D sub-registers.
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
EndReg = getDRegFromQReg(EndReg) + 1;
@@ -2904,10 +2909,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
continue;
// The register must be in the same register class as the first.
if (!RC->contains(EndReg))
- return Error(EndLoc, "invalid register in register list");
+ return Error(AfterMinusLoc, "invalid register in register list");
// Ranges must go from low to high.
if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
- return Error(EndLoc, "bad range in register list");
+ return Error(AfterMinusLoc, "bad range in register list");
// Add all the registers in the range to the register list.
while (Reg != EndReg) {
@@ -2955,9 +2960,9 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc));
}
- SMLoc E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RCurly))
- return Error(E, "'}' expected");
+ return Error(Parser.getTok().getLoc(), "'}' expected");
+ SMLoc E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat '}' token.
// Push the register list operand.
@@ -2974,13 +2979,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Helper function to parse the lane index for vector lists.
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
Index = 0; // Always return a defined index value.
if (Parser.getTok().is(AsmToken::LBrac)) {
Parser.Lex(); // Eat the '['.
if (Parser.getTok().is(AsmToken::RBrac)) {
// "Dn[]" is the 'all lanes' syntax.
LaneKind = AllLanes;
+ EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ']'.
return MatchOperand_Success;
}
@@ -3005,6 +3011,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
Error(Parser.getTok().getLoc(), "']' expected");
return MatchOperand_ParseFail;
}
+ EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ']'.
int64_t Val = CE->getValue();
@@ -3031,21 +3038,19 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// (without encosing curly braces) as a single or double entry list,
// respectively.
if (Parser.getTok().is(AsmToken::Identifier)) {
+ SMLoc E = Parser.getTok().getEndLoc();
int Reg = tryParseRegister();
if (Reg == -1)
return MatchOperand_NoMatch;
- SMLoc E = Parser.getTok().getLoc();
if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
- OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
if (Res != MatchOperand_Success)
return Res;
switch (LaneKind) {
case NoLanes:
- E = Parser.getTok().getLoc();
Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
break;
case AllLanes:
- E = Parser.getTok().getLoc();
Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false,
S, E));
break;
@@ -3059,18 +3064,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
Reg = getDRegFromQReg(Reg);
- OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
if (Res != MatchOperand_Success)
return Res;
switch (LaneKind) {
case NoLanes:
- E = Parser.getTok().getLoc();
Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
&ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
break;
case AllLanes:
- E = Parser.getTok().getLoc();
Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
&ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
@@ -3111,7 +3114,9 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
++Reg;
++Count;
}
- if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success)
+
+ SMLoc E;
+ if (parseVectorLane(LaneKind, LaneIndex, E) != MatchOperand_Success)
return MatchOperand_ParseFail;
while (Parser.getTok().is(AsmToken::Comma) ||
@@ -3125,10 +3130,10 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat the minus.
- SMLoc EndLoc = Parser.getTok().getLoc();
+ SMLoc AfterMinusLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
if (EndReg == -1) {
- Error(EndLoc, "register expected");
+ Error(AfterMinusLoc, "register expected");
return MatchOperand_ParseFail;
}
// Allow Q regs and just interpret them as the two D sub-registers.
@@ -3140,24 +3145,24 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
continue;
// The register must be in the same register class as the first.
if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
- Error(EndLoc, "invalid register in register list");
+ Error(AfterMinusLoc, "invalid register in register list");
return MatchOperand_ParseFail;
}
// Ranges must go from low to high.
if (Reg > EndReg) {
- Error(EndLoc, "bad range in register list");
+ Error(AfterMinusLoc, "bad range in register list");
return MatchOperand_ParseFail;
}
// Parse the lane specifier if present.
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
- if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+ MatchOperand_Success)
return MatchOperand_ParseFail;
if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
- Error(EndLoc, "mismatched lane index in register list");
+ Error(AfterMinusLoc, "mismatched lane index in register list");
return MatchOperand_ParseFail;
}
- EndLoc = Parser.getTok().getLoc();
// Add all the registers in the range to the register list.
Count += EndReg - Reg;
@@ -3196,11 +3201,12 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Parse the lane specifier if present.
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
- SMLoc EndLoc = Parser.getTok().getLoc();
- if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ SMLoc LaneLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+ MatchOperand_Success)
return MatchOperand_ParseFail;
if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
- Error(EndLoc, "mismatched lane index in register list");
+ Error(LaneLoc, "mismatched lane index in register list");
return MatchOperand_ParseFail;
}
continue;
@@ -3221,7 +3227,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
SMLoc EndLoc = Parser.getTok().getLoc();
- if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != MatchOperand_Success)
return MatchOperand_ParseFail;
if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
Error(EndLoc, "mismatched lane index in register list");
@@ -3229,11 +3235,11 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
}
- SMLoc E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RCurly)) {
- Error(E, "'}' expected");
+ Error(Parser.getTok().getLoc(), "'}' expected");
return MatchOperand_ParseFail;
}
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat '}' token.
switch (LaneKind) {
@@ -3525,7 +3531,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
const MCExpr *ShiftAmount;
SMLoc Loc = Parser.getTok().getLoc();
- if (getParser().ParseExpression(ShiftAmount)) {
+ SMLoc EndLoc;
+ if (getParser().ParseExpression(ShiftAmount, EndLoc)) {
Error(Loc, "illegal expression");
return MatchOperand_ParseFail;
}
@@ -3540,7 +3547,7 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
return MatchOperand_ParseFail;
}
- Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc()));
+ Operands.push_back(ARMOperand::CreateImm(CE, Loc, EndLoc));
return MatchOperand_Success;
}
@@ -3550,7 +3557,7 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
- Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ Error(S, "'be' or 'le' operand expected");
return MatchOperand_ParseFail;
}
int Val = StringSwitch<int>(Tok.getString())
@@ -3560,12 +3567,12 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the token.
if (Val == -1) {
- Error(Tok.getLoc(), "'be' or 'le' operand expected");
+ Error(S, "'be' or 'le' operand expected");
return MatchOperand_ParseFail;
}
Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val,
getContext()),
- S, Parser.getTok().getLoc()));
+ S, Tok.getEndLoc()));
return MatchOperand_Success;
}
@@ -3601,16 +3608,17 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat hash token.
+ SMLoc ExLoc = Parser.getTok().getLoc();
const MCExpr *ShiftAmount;
- SMLoc E = Parser.getTok().getLoc();
- if (getParser().ParseExpression(ShiftAmount)) {
- Error(E, "malformed shift expression");
+ SMLoc EndLoc;
+ if (getParser().ParseExpression(ShiftAmount, EndLoc)) {
+ Error(ExLoc, "malformed shift expression");
return MatchOperand_ParseFail;
}
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
if (!CE) {
- Error(E, "shift amount must be an immediate");
+ Error(ExLoc, "shift amount must be an immediate");
return MatchOperand_ParseFail;
}
@@ -3618,25 +3626,24 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (isASR) {
// Shift amount must be in [1,32]
if (Val < 1 || Val > 32) {
- Error(E, "'asr' shift amount must be in range [1,32]");
+ Error(ExLoc, "'asr' shift amount must be in range [1,32]");
return MatchOperand_ParseFail;
}
// asr #32 encoded as asr #0, but is not allowed in Thumb2 mode.
if (isThumb() && Val == 32) {
- Error(E, "'asr #32' shift amount not allowed in Thumb mode");
+ Error(ExLoc, "'asr #32' shift amount not allowed in Thumb mode");
return MatchOperand_ParseFail;
}
if (Val == 32) Val = 0;
} else {
// Shift amount must be in [1,32]
if (Val < 0 || Val > 31) {
- Error(E, "'lsr' shift amount must be in range [0,31]");
+ Error(ExLoc, "'lsr' shift amount must be in range [0,31]");
return MatchOperand_ParseFail;
}
}
- E = Parser.getTok().getLoc();
- Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E));
+ Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, EndLoc));
return MatchOperand_Success;
}
@@ -3662,16 +3669,17 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat hash token.
+ SMLoc ExLoc = Parser.getTok().getLoc();
const MCExpr *ShiftAmount;
- SMLoc E = Parser.getTok().getLoc();
- if (getParser().ParseExpression(ShiftAmount)) {
- Error(E, "malformed rotate expression");
+ SMLoc EndLoc;
+ if (getParser().ParseExpression(ShiftAmount, EndLoc)) {
+ Error(ExLoc, "malformed rotate expression");
return MatchOperand_ParseFail;
}
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
if (!CE) {
- Error(E, "rotate amount must be an immediate");
+ Error(ExLoc, "rotate amount must be an immediate");
return MatchOperand_ParseFail;
}
@@ -3680,12 +3688,11 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// normally, zero is represented in asm by omitting the rotate operand
// entirely.
if (Val != 8 && Val != 16 && Val != 24 && Val != 0) {
- Error(E, "'ror' rotate amount must be 8, 16, or 24");
+ Error(ExLoc, "'ror' rotate amount must be 8, 16, or 24");
return MatchOperand_ParseFail;
}
- E = Parser.getTok().getLoc();
- Operands.push_back(ARMOperand::CreateRotImm(Val, S, E));
+ Operands.push_back(ARMOperand::CreateRotImm(Val, S, EndLoc));
return MatchOperand_Success;
}
@@ -3735,7 +3742,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat hash token.
const MCExpr *WidthExpr;
- if (getParser().ParseExpression(WidthExpr)) {
+ SMLoc EndLoc;
+ if (getParser().ParseExpression(WidthExpr, EndLoc)) {
Error(E, "malformed immediate expression");
return MatchOperand_ParseFail;
}
@@ -3751,9 +3759,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Error(E, "'width' operand must be in the range [1,32-lsb]");
return MatchOperand_ParseFail;
}
- E = Parser.getTok().getLoc();
- Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E));
+ Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, EndLoc));
return MatchOperand_Success;
}
@@ -3772,7 +3779,6 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Tok.getLoc();
bool haveEaten = false;
bool isAdd = true;
- int Reg = -1;
if (Tok.is(AsmToken::Plus)) {
Parser.Lex(); // Eat the '+' token.
haveEaten = true;
@@ -3781,15 +3787,15 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
isAdd = false;
haveEaten = true;
}
- if (Parser.getTok().is(AsmToken::Identifier))
- Reg = tryParseRegister();
+
+ SMLoc E = Parser.getTok().getEndLoc();
+ int Reg = tryParseRegister();
if (Reg == -1) {
if (!haveEaten)
return MatchOperand_NoMatch;
Error(Parser.getTok().getLoc(), "register expected");
return MatchOperand_ParseFail;
}
- SMLoc E = Parser.getTok().getLoc();
ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift;
unsigned ShiftImm = 0;
@@ -3797,6 +3803,9 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the ','.
if (parseMemRegOffsetShift(ShiftTy, ShiftImm))
return MatchOperand_ParseFail;
+
+ // FIXME: Only approximates end...may include intervening whitespace.
+ E = Parser.getTok().getLoc();
}
Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy,
@@ -3829,14 +3838,14 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// differently.
bool isNegative = Parser.getTok().is(AsmToken::Minus);
const MCExpr *Offset;
- if (getParser().ParseExpression(Offset))
+ SMLoc E;
+ if (getParser().ParseExpression(Offset, E))
return MatchOperand_ParseFail;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
if (!CE) {
Error(S, "constant expression expected");
return MatchOperand_ParseFail;
}
- SMLoc E = Tok.getLoc();
// Negative zero is encoded as the flag value INT32_MIN.
int32_t Val = CE->getValue();
if (isNegative && Val == 0)
@@ -3851,7 +3860,6 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
bool haveEaten = false;
bool isAdd = true;
- int Reg = -1;
if (Tok.is(AsmToken::Plus)) {
Parser.Lex(); // Eat the '+' token.
haveEaten = true;
@@ -3860,18 +3868,18 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
isAdd = false;
haveEaten = true;
}
- if (Parser.getTok().is(AsmToken::Identifier))
- Reg = tryParseRegister();
+
+ Tok = Parser.getTok();
+ int Reg = tryParseRegister();
if (Reg == -1) {
if (!haveEaten)
return MatchOperand_NoMatch;
- Error(Parser.getTok().getLoc(), "register expected");
+ Error(Tok.getLoc(), "register expected");
return MatchOperand_ParseFail;
}
- SMLoc E = Parser.getTok().getLoc();
Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift,
- 0, S, E));
+ 0, S, Tok.getEndLoc()));
return MatchOperand_Success;
}
@@ -4224,7 +4232,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return Error(Tok.getLoc(), "malformed memory operand");
if (Tok.is(AsmToken::RBrac)) {
- E = Tok.getLoc();
+ E = Tok.getEndLoc();
Parser.Lex(); // Eat right bracket token.
Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift,
@@ -4272,9 +4280,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// Now we should have the closing ']'
- E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
// Don't worry about range checking the value here. That's handled by
@@ -4321,9 +4329,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
CE = MCConstantExpr::Create(INT32_MIN, getContext());
// Now we should have the closing ']'
- E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
// Don't worry about range checking the value here. That's handled by
@@ -4367,9 +4375,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// Now we should have the closing ']'
- E = Parser.getTok().getLoc();
if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
+ return Error(Parser.getTok().getLoc(), "']' expected");
+ E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum,
@@ -5723,7 +5731,12 @@ processInstruction(MCInst &Inst,
}
// Aliases for alternate PC+imm syntax of LDR instructions.
case ARM::t2LDRpcrel:
- Inst.setOpcode(ARM::t2LDRpci);
+ // Select the narrow version if the immediate will fit.
+ if (Inst.getOperand(1).getImm() > 0 &&
+ Inst.getOperand(1).getImm() <= 0xff)
+ Inst.setOpcode(ARM::tLDRpci);
+ else
+ Inst.setOpcode(ARM::t2LDRpci);
return true;
case ARM::t2LDRBpcrel:
Inst.setOpcode(ARM::t2LDRBpci);
@@ -7636,7 +7649,7 @@ bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
if (getParser().ParseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+ getParser().getStreamer().EmitValue(Value, Size);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -7824,13 +7837,10 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
return true;
}
-extern "C" void LLVMInitializeARMAsmLexer();
-
/// Force static initialization.
extern "C" void LLVMInitializeARMAsmParser() {
RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget);
RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget);
- LLVMInitializeARMAsmLexer();
}
#define GET_REGISTER_MATCHER
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index e24a1b1786..d2012c387c 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -1,7 +1,6 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMARMAsmParser
- ARMAsmLexer.cpp
ARMAsmParser.cpp
)
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 1ea4e00867..5f426d270a 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -11,7 +11,6 @@ tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel)
tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv)
tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(ARMCommonTableGen)
@@ -40,6 +39,7 @@ add_llvm_target(ARMCodeGen
ARMSubtarget.cpp
ARMTargetMachine.cpp
ARMTargetObjectFile.cpp
+ ARMTargetTransformInfo.cpp
MLxExpansionPass.cpp
Thumb1FrameLowering.cpp
Thumb1InstrInfo.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 65c8c1a561..31a3b0b524 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -13,7 +13,6 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
@@ -105,10 +104,6 @@ public:
uint64_t address,
raw_ostream &vStream,
raw_ostream &cStream) const;
-
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
-private:
};
/// ThumbDisassembler - Thumb disassembler for all Thumb platforms.
@@ -131,8 +126,6 @@ public:
raw_ostream &vStream,
raw_ostream &cStream) const;
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
private:
mutable ITStatus ITBlock;
DecodeStatus AddThumbPredicate(MCInst&) const;
@@ -385,7 +378,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
#include "ARMGenDisassemblerTables.inc"
-#include "ARMGenEDInfo.inc"
static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
return new ARMDisassembler(STI);
@@ -395,14 +387,6 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge
return new ThumbDisassembler(STI);
}
-const EDInstInfo *ARMDisassembler::getEDInfo() const {
- return instInfoARM;
-}
-
-const EDInstInfo *ThumbDisassembler::getEDInfo() const {
- return instInfoARM;
-}
-
DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
const MemoryObject &Region,
uint64_t Address,
diff --git a/lib/Target/ARM/LICENSE.TXT b/lib/Target/ARM/LICENSE.TXT
new file mode 100755
index 0000000000..68afea12ed
--- /dev/null
+++ b/lib/Target/ARM/LICENSE.TXT
@@ -0,0 +1,47 @@
+ARM Limited
+
+Software Grant License Agreement ("Agreement")
+
+Except for the license granted herein to you, ARM Limited ("ARM") reserves all
+right, title, and interest in and to the Software (defined below).
+
+Definition
+
+"Software" means the code and documentation as well as any original work of
+authorship, including any modifications or additions to an existing work, that
+is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for
+inclusion in, or documentation of, any of the products owned or managed by LLVM
+(the "Work"). For the purposes of this definition, "submitted" means any form of
+electronic, verbal, or written communication sent to LLVM or its
+representatives, including but not limited to communication on electronic
+mailing lists, source code control systems, and issue tracking systems that are
+managed by, or on behalf of, LLVM for the purpose of discussing and improving
+the Work, but excluding communication that is conspicuously marked otherwise.
+
+1. Grant of Copyright License. Subject to the terms and conditions of this
+ Agreement, ARM hereby grants to you and to recipients of the Software
+ distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+ royalty-free, irrevocable copyright license to reproduce, prepare derivative
+ works of, publicly display, publicly perform, sublicense, and distribute the
+ Software and such derivative works.
+
+2. Grant of Patent License. Subject to the terms and conditions of this
+ Agreement, ARM hereby grants you and to recipients of the Software
+ distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+ royalty-free, irrevocable (except as stated in this section) patent license
+ to make, have made, use, offer to sell, sell, import, and otherwise transfer
+ the Work, where such license applies only to those patent claims licensable
+ by ARM that are necessarily infringed by ARM's Software alone or by
+ combination of the Software with the Work to which such Software was
+ submitted. If any entity institutes patent litigation against ARM or any
+ other entity (including a cross-claim or counterclaim in a lawsuit) alleging
+ that ARM's Software, or the Work to which ARM has contributed constitutes
+ direct or contributory patent infringement, then any patent licenses granted
+ to that entity under this Agreement for the Software or Work shall terminate
+ as of the date such litigation is filed.
+
+Unless required by applicable law or agreed to in writing, the software is
+provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+either express or implied, including, without limitation, any warranties or
+conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 45fef17785..41594dc578 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -123,7 +123,7 @@ public:
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const;
void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
@@ -166,7 +166,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
switch ((unsigned)Fixup.getKind()) {
case ARM::fixup_arm_thumb_br: {
@@ -221,10 +221,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
- // @LOCALMOD-BEGIN
- // The upstream operation for this encoding is not what it says it is.
const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
- // @LOCALMOD-END
const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
if (isThumb()) {
const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
@@ -635,30 +632,11 @@ public:
uint8_t _OSABI)
: ARMAsmBackend(T, TT), OSABI(_OSABI) { }
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const;
-
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createARMELFObjectWriter(OS, OSABI);
}
};
-// FIXME: Raise this to share code between Darwin and ELF.
-void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value) const {
- unsigned NumBytes = 4; // FIXME: 2 for Thumb
- Value = adjustFixupValue(Fixup, Value);
- if (!Value) return; // Doesn't change encoding.
-
- unsigned Offset = Fixup.getOffset();
-
- // For each byte of the fragment that the fixup touches, mask in the bits from
- // the fixup value. The Value has been "split up" into the appropriate
- // bitfields above.
- for (unsigned i = 0; i != NumBytes; ++i)
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
-}
-
// @LOCALMOD-BEGIN
class NaClARMAsmBackend : public ELFARMAsmBackend {
public:
@@ -726,7 +704,7 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
// @LOCALMOD-END
- if (TheTriple.isOSNaCl())
+ if (TheTriple.isOSNaCl())
return new NaClARMAsmBackend(T, TT, OSABI);
// @LOCALMOD-END
return new ELFARMAsmBackend(T, TT, OSABI);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index d291304e83..9193e40bed 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -71,11 +71,10 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
const MCFixup &Fixup,
bool IsPCRel) const {
const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol();
- const MCSymbol &ASymbol = Symbol.AliasedSymbol();
bool EmitThisSym = false;
const MCSectionELF &Section =
- static_cast<const MCSectionELF&>(ASymbol.getSection());
+ static_cast<const MCSectionELF&>(Symbol.getSection());
bool InNormalSection = true;
unsigned RelocType = 0;
RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel);
@@ -134,13 +133,14 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
switch (RelocType) {
default: EmitThisSym = true; break;
case ELF::R_ARM_ABS32: EmitThisSym = false; break;
+ case ELF::R_ARM_PREL31: EmitThisSym = false; break;
}
}
if (EmitThisSym)
- return &ASymbol;
+ return &Symbol;
if (! Symbol.isTemporary() && InNormalSection) {
- return &ASymbol;
+ return &Symbol;
}
return NULL;
}
@@ -226,6 +226,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case FK_Data_4:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_ARM_NONE:
+ Type = ELF::R_ARM_NONE;
+ break;
case MCSymbolRefExpr::VK_ARM_GOT:
Type = ELF::R_ARM_GOT_BREL;
break;
@@ -250,7 +253,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_ARM_TARGET2:
Type = ELF::R_ARM_TARGET2;
break;
- }
+ case MCSymbolRefExpr::VK_ARM_PREL31:
+ Type = ELF::R_ARM_PREL31;
+ break;
+ }
break;
case ARM::fixup_arm_ldst_pcrel_12:
case ARM::fixup_arm_pcrel_10:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
new file mode 100644
index 0000000000..39ded8fb57
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -0,0 +1,201 @@
+//===- lib/MC/ARMELFStreamer.cpp - ELF Object Output for ARM --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ARM ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($a, $t and $d) to
+// delimit regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf.
+///
+/// In brief: $a, $t or $d should be emitted at the start of each contiguous
+/// region of ARM code, Thumb code or data in a section. In practice, this
+/// emission does not rely on explicit assembler directives but on inherent
+/// properties of the directives doing the emission (e.g. ".byte" is data, "add
+/// r0, r0, r0" an instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class ARMELFStreamer : public MCELFStreamer {
+public:
+ ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter, bool IsThumb)
+ : MCELFStreamer(Context, TAB, OS, Emitter),
+ IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
+ }
+
+ ~ARMELFStreamer() {}
+
+ virtual void ChangeSection(const MCSection *Section) {
+ // We have to keep track of the mapping symbol state of any sections we
+ // use. Each one should start off as EMS_None, which is provided as the
+ // default constructor by DenseMap::lookup.
+ LastMappingSymbols[getPreviousSection()] = LastEMS;
+ LastEMS = LastMappingSymbols.lookup(Section);
+
+ MCELFStreamer::ChangeSection(Section);
+ }
+
+ /// This function is the one used to emit instruction data into the ELF
+ /// streamer. We override it to add the appropriate mapping symbol if
+ /// necessary.
+ virtual void EmitInstruction(const MCInst& Inst) {
+ if (IsThumb)
+ EmitThumbMappingSymbol();
+ else
+ EmitARMMappingSymbol();
+
+ MCELFStreamer::EmitInstruction(Inst);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+ /// necessary.
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitBytes(Data, AddrSpace);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+ /// necessary.
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+ }
+
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ MCELFStreamer::EmitAssemblerFlag(Flag);
+
+ switch (Flag) {
+ case MCAF_SyntaxUnified:
+ return; // no-op here.
+ case MCAF_Code16:
+ IsThumb = true;
+ return; // Change to Thumb mode
+ case MCAF_Code32:
+ IsThumb = false;
+ return; // Change to ARM mode
+ case MCAF_Code64:
+ return;
+ case MCAF_SubsectionsViaSymbols:
+ return;
+ }
+ }
+
+private:
+ enum ElfMappingSymbol {
+ EMS_None,
+ EMS_ARM,
+ EMS_Thumb,
+ EMS_Data
+ };
+
+ void EmitDataMappingSymbol() {
+ if (LastEMS == EMS_Data) return;
+ EmitMappingSymbol("$d");
+ LastEMS = EMS_Data;
+ }
+
+ void EmitThumbMappingSymbol() {
+ if (LastEMS == EMS_Thumb) return;
+ EmitMappingSymbol("$t");
+ LastEMS = EMS_Thumb;
+ }
+
+ void EmitARMMappingSymbol() {
+ if (LastEMS == EMS_ARM) return;
+ EmitMappingSymbol("$a");
+ LastEMS = EMS_ARM;
+ }
+
+ void EmitMappingSymbol(StringRef Name) {
+ MCSymbol *Start = getContext().CreateTempSymbol();
+ EmitLabel(Start);
+
+ MCSymbol *Symbol =
+ getContext().GetOrCreateSymbol(Name + "." +
+ Twine(MappingSymbolCounter++));
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ MCELF::SetType(SD, ELF::STT_NOTYPE);
+ MCELF::SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ Symbol->setSection(*getCurrentSection());
+
+ const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+ Symbol->setVariableValue(Value);
+ }
+
+ void EmitThumbFunc(MCSymbol *Func) {
+ // FIXME: Anything needed here to flag the function as thumb?
+
+ getAssembler().setIsThumbFunc(Func);
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func);
+ SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
+ }
+
+
+ bool IsThumb;
+ int64_t MappingSymbolCounter;
+
+ DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+ ElfMappingSymbol LastEMS;
+
+ /// @}
+};
+}
+
+namespace llvm {
+ MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack,
+ bool IsThumb) {
+ ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ if (NoExecStack)
+ S->getAssembler().setNoExecStack(true);
+ return S;
+ }
+
+}
+
+
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
new file mode 100644
index 0000000000..77ae5d2362
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
@@ -0,0 +1,27 @@
+//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF streamer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_STREAMER_H
+#define ARM_ELF_STREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+
+ MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack,
+ bool IsThumb);
+}
+
+#endif // ARM_ELF_STREAMER_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp
index af2b04086a..cd25c865d9 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp
@@ -77,7 +77,7 @@ static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) {
int64_t Pred = Saved[2].getOperand(2).getImm();
assert((ARM::SP == Addr) && "Unexpected register at stack guard");
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
Out.EmitInstruction(Saved[1]);
EmitBICMask(Out, Addr, Pred, 0xC0000000);
Out.EmitBundleUnlock();
@@ -89,8 +89,7 @@ static void EmitDirectGuardCall(int I, MCInst Saved[],
// sfi_nops_to_force_slot3
assert(I == 2 && (ARM::SFI_GUARD_CALL == Saved[0].getOpcode()) &&
"Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
- Out.EmitBundleAlignEnd();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(true);
Out.EmitInstruction(Saved[1]);
Out.EmitBundleUnlock();
}
@@ -104,8 +103,7 @@ static void EmitIndirectGuardCall(int I, MCInst Saved[],
"Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
unsigned Reg = Saved[0].getOperand(0).getReg();
int64_t Pred = Saved[0].getOperand(2).getImm();
- Out.EmitBundleAlignEnd();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(true);
EmitBICMask(Out, Reg, Pred, 0xC000000F);
Out.EmitInstruction(Saved[1]);
Out.EmitBundleUnlock();
@@ -120,7 +118,7 @@ static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) {
unsigned Reg = Saved[0].getOperand(0).getReg();
int64_t Pred = Saved[0].getOperand(2).getImm();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
EmitBICMask(Out, Reg, Pred, 0xC000000F);
Out.EmitInstruction(Saved[1]);
Out.EmitBundleUnlock();
@@ -134,7 +132,7 @@ static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) {
"Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN");
int64_t Pred = Saved[0].getOperand(0).getImm();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
EmitBICMask(Out, ARM::LR, Pred, 0xC000000F);
Out.EmitInstruction(Saved[1]);
Out.EmitBundleUnlock();
@@ -149,7 +147,7 @@ static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) {
unsigned Reg = Saved[0].getOperand(0).getReg();
int64_t Pred = Saved[0].getOperand(2).getImm();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
EmitBICMask(Out, Reg, Pred, 0xC0000000);
Out.EmitInstruction(Saved[1]);
Out.EmitBundleUnlock();
@@ -163,7 +161,7 @@ static void EmitGuardLoadOrStoreTst(int I, MCInst Saved[], MCStreamer &Out) {
"Unexpected SFI Pseudo while lowering");
unsigned Reg = Saved[0].getOperand(0).getReg();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
EmitTST(Out, Reg);
Out.EmitInstruction(Saved[1]);
Out.EmitBundleUnlock();
@@ -182,7 +180,7 @@ static void EmitGuardSpLoad(int I, MCInst Saved[], MCStreamer &Out) {
int64_t Pred = Saved[3].getOperand(2).getImm();
assert((ARM::SP == SpReg) && "Unexpected register at stack guard");
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
EmitBICMask(Out, AddrReg, Pred, 0xC0000000);
Out.EmitInstruction(Saved[2]);
EmitBICMask(Out, SpReg, Pred, 0xC0000000);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index e7d3e7cb0b..00b3b223fd 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -12,7 +12,10 @@
//===----------------------------------------------------------------------===//
#include "ARMMCTargetDesc.h"
+#include "ARMELFStreamer.h"
+#include "ARMMCAsmInfo.h"
#include "ARMBaseInfo.h"
+#include "ARMELFStreamer.h"
#include "ARMMCAsmInfo.h"
#include "InstPrinter/ARMInstPrinter.h"
#include "llvm/MC/MCCodeGenInfo.h"
@@ -144,7 +147,7 @@ static MCInstrInfo *createARMMCInstrInfo() {
static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
- InitARMMCRegisterInfo(X, ARM::LR);
+ InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
return X;
}
@@ -196,8 +199,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
if (TheTriple.isOSWindows()) {
llvm_unreachable("ARM does not support Windows COFF format");
}
-
- return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack);
+ // @LOCALMOD-BEGIN
+ MCStreamer *Streamer = createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
+ NoExecStack, TheTriple.getArch() == Triple::thumb);
+ if (TheTriple.isOSNaCl())
+ Streamer->EmitBundleAlignMode(4);
+ return Streamer;
+ // @LOCALMOD-END
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index 3ee853c822..e9f7682d6f 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_library(LLVMARMDesc
ARMAsmBackend.cpp
ARMELFObjectWriter.cpp
+ ARMELFStreamer.cpp
ARMMCAsmInfo.cpp
ARMMCCodeEmitter.cpp
ARMMCExpr.cpp
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 3e48ed1189..f069535ff3 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -16,7 +16,7 @@ BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \
ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \
ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
- ARMGenEDInfo.inc ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
+ ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc
DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index 500e3de82d..fa5681fb12 100644
--- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index edd73c20c0..123ada67e1 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -281,7 +281,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
.addReg(ARM::R3, RegState::Kill);
AddDefaultPred(MIB);
- MIB->copyImplicitOps(&*MBBI);
+ MIB.copyImplicitOps(&*MBBI);
// erase the old tBX_RET instruction
MBB.erase(MBBI);
}
@@ -352,7 +352,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
continue;
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
- MIB->copyImplicitOps(&*MI);
+ MIB.copyImplicitOps(&*MI);
MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 957a34d11d..57cc7d8604 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -24,10 +24,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -390,6 +390,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc dl = MI.getDebugLoc();
+ MachineInstrBuilder MIB(*MBB.getParent(), &MI);
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
@@ -417,7 +418,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
// Remove offset
MI.RemoveOperand(FrameRegIdx+1);
- MachineInstrBuilder MIB(&MI);
return true;
}
@@ -428,7 +428,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
if (Opcode == ARM::tADDi3) {
MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
- MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
.addImm(Offset / Scale));
} else {
@@ -457,7 +456,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
if (Opcode == ARM::tADDi3) {
MI.setDesc(TII.get(Opcode));
removeOperands(MI, FrameRegIdx);
- MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
} else {
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
@@ -603,6 +601,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
+ MachineInstrBuilder MIB(*MBB.getParent(), &MI);
while (!MI.getOperand(i).isFI()) {
++i;
@@ -719,8 +718,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Add predicate back if it's needed.
- if (MI.isPredicable()) {
- MachineInstrBuilder MIB(&MI);
+ if (MI.isPredicable())
AddDefaultPred(MIB);
- }
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 9fba8227f6..67e8ec7c5f 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -408,7 +408,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Remove offset and remaining explicit predicate operands.
do MI.RemoveOperand(FrameRegIdx+1);
while (MI.getNumOperands() > FrameRegIdx+1);
- MachineInstrBuilder MIB(&MI);
+ MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
AddDefaultPred(MIB);
return true;
}
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 9c018bc37a..1a7a4d450c 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -19,9 +19,9 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index d1cde1bf98..567bc05272 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h" // To access Function attributes
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -52,81 +53,82 @@ namespace {
unsigned PredCC2 : 2;
unsigned PartFlag : 1; // 16-bit instruction does partial flag update
unsigned Special : 1; // Needs to be dealt with specially
+ unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
};
static const ReduceEntry ReduceTable[] = {
- // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S
- { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 },
- { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 },
- { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 },
- { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 },
- { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 },
- { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
- { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 },
- //FIXME: Disable CMN, as CCodes are backwards from compare expectations
- //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 },
- { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 },
- // FIXME: adr.n immediate offset must be multiple of 4.
- //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 },
- { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
- { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 },
- // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
- // likely to cause issue in the loop. As a size / performance workaround,
- // they are not marked as such.
- { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 },
- { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 },
- // FIXME: Do we need the 16-bit 'S' variant?
- { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 },
- { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 },
- { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 },
- { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 },
- { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 },
- { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 },
- { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 },
- { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 },
- { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 },
- { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
- { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
- { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 },
- { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 },
- { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 },
-
- // FIXME: Clean this up after splitting each Thumb load / store opcode
- // into multiple ones.
- { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 },
- { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
- { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
-
- { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 },
- { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 },
- { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 },
- // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
- { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 },
- { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 },
+ // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
+ { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
+ { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
+ { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
+ { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
+ { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
+ { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
+ //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+ //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
+ { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ // FIXME: adr.n immediate offset must be multiple of 4.
+ //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
+ { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
+ // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
+ // likely to cause issue in the loop. As a size / performance workaround,
+ // they are not marked as such.
+ { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1,0 },
+ // FIXME: Do we need the 16-bit 'S' variant?
+ { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
+ { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
+ { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
+ { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
+ { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
+ { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
+ { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
+ { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
+ { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
+ { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+ { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
+
+ // FIXME: Clean this up after splitting each Thumb load / store opcode
+ // into multiple ones.
+ { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
+
+ { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
+ // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+ { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
+ { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
};
class Thumb2SizeReduce : public MachineFunctionPass {
@@ -175,13 +177,22 @@ namespace {
bool LiveCPSR, MachineInstr *CPSRDef,
bool IsSelfLoop);
+ /// ReduceMI - Attempt to reduce MI, return true on success.
+ bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop);
+
/// ReduceMBB - Reduce width of instructions in the specified basic block.
bool ReduceMBB(MachineBasicBlock &MBB);
+
+ bool OptimizeSize;
+ bool MinimizeSize;
};
char Thumb2SizeReduce::ID = 0;
}
Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+ OptimizeSize = MinimizeSize = false;
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
@@ -216,8 +227,8 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
bool
Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
bool FirstInSelfLoop) {
- // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder).
- if (!STI->avoidCPSRPartialUpdate())
+ // Disable the check for -Oz (aka OptimizeForSizeHarder).
+ if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
return false;
if (!Def)
@@ -578,7 +589,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
// are prioritized, but the table assumes a unique entry for each
// source insn opcode. So for now, we hack a local entry record to use.
static const ReduceEntry NarrowEntry =
- { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
+ { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
return true;
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
@@ -596,6 +607,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
+ if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+ STI->avoidMOVsShifterOperand())
+ // Don't issue movs with shifter operand for some CPUs unless we
+ // are optimizing / minimizing for size.
+ return false;
+
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
// t2MUL is "special". The tied source operand is second, not first.
@@ -708,6 +725,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false;
+ if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+ STI->avoidMOVsShifterOperand())
+ // Don't issue movs with shifter operand for some CPUs unless we
+ // are optimizing / minimizing for size.
+ return false;
+
unsigned Limit = ~0U;
if (Entry.Imm1Limit)
Limit = (1 << Entry.Imm1Limit) - 1;
@@ -841,6 +864,32 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
return LiveCPSR;
}
+bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+ bool LiveCPSR, MachineInstr *CPSRDef,
+ bool IsSelfLoop) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+ if (OPI == ReduceOpcodeMap.end())
+ return false;
+ const ReduceEntry &Entry = ReduceTable[OPI->second];
+
+ // Don't attempt normal reductions on "special" cases for now.
+ if (Entry.Special)
+ return ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+
+ // Try to transform to a 16-bit two-address instruction.
+ if (Entry.NarrowOpc2 &&
+ ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+ return true;
+
+ // Try to transform to a 16-bit non-two-address instruction.
+ if (Entry.NarrowOpc1 &&
+ ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+ return true;
+
+ return false;
+}
+
bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Modified = false;
@@ -865,40 +914,20 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
- unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
- if (OPI != ReduceOpcodeMap.end()) {
- const ReduceEntry &Entry = ReduceTable[OPI->second];
- // Ignore "special" cases for now.
- if (Entry.Special) {
- if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
- Modified = true;
- MachineBasicBlock::instr_iterator I = prior(NextMII);
- MI = &*I;
- }
- goto ProcessNext;
- }
-
- // Try to transform to a 16-bit two-address instruction.
- if (Entry.NarrowOpc2 &&
- ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
- Modified = true;
- MachineBasicBlock::instr_iterator I = prior(NextMII);
- MI = &*I;
- goto ProcessNext;
- }
-
- // Try to transform to a 16-bit non-two-address instruction.
- if (Entry.NarrowOpc1 &&
- ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
- Modified = true;
- MachineBasicBlock::instr_iterator I = prior(NextMII);
- MI = &*I;
- }
+ // Does NextMII belong to the same bundle as MI?
+ bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
+
+ if (ReduceMI(MBB, MI, LiveCPSR, CPSRDef, IsSelfLoop)) {
+ Modified = true;
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
+ MI = &*I;
+ // Removing and reinserting the first instruction in a bundle will break
+ // up the bundle. Fix the bundling if it was broken.
+ if (NextInSameBundle && !NextMII->isBundledWithPred())
+ NextMII->bundleWithPred();
}
- ProcessNext:
- if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) {
+ if (!NextInSameBundle && MI->isInsideBundle()) {
// FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
// marker is only on the BUNDLE instruction. Process the BUNDLE
// instruction as we finish with the bundled instruction to work around
@@ -931,6 +960,13 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
STI = &TM.getSubtarget<ARMSubtarget>();
+ // Optimizing / minimizing size?
+ AttributeSet FnAttrs = MF.getFunction()->getAttributes();
+ OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize);
+ MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize);
+
bool Modified = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
Modified |= ReduceMBB(*I);
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index c87a33a8b9..02ac493b42 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -8,7 +8,6 @@ add_llvm_library(LLVMTarget
TargetMachine.cpp
TargetMachineC.cpp
TargetSubtargetInfo.cpp
- TargetTransformImpl.cpp
)
foreach(t ${LLVM_TARGETS_TO_BUILD})
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 788c6e6866..f468861434 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -15,17 +15,17 @@
#include "CPPTargetMachine.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/CallingConv.h"
#include "llvm/Config/config.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
@@ -479,9 +479,9 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
Out << " {\n AttrBuilder B;\n";
#define HANDLE_ATTR(X) \
- if (attrs.hasAttribute(Attributes::X)) \
- Out << " B.addAttribute(Attributes::" #X ");\n"; \
- attrs.removeAttribute(Attributes::X);
+ if (attrs.contains(Attribute::X)) \
+ Out << " B.addAttribute(Attribute::" #X ");\n"; \
+ attrs.removeAttribute(Attribute::X);
HANDLE_ATTR(SExt);
HANDLE_ATTR(ZExt);
@@ -509,11 +509,11 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
HANDLE_ATTR(NonLazyBind);
HANDLE_ATTR(MinSize);
#undef HANDLE_ATTR
- if (attrs.hasAttribute(Attributes::StackAlignment))
+ if (attrs.contains(Attribute::StackAlignment))
Out << " B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")\n";
- attrs.removeAttribute(Attributes::StackAlignment);
+ attrs.removeAttribute(Attribute::StackAlignment);
assert(!attrs.hasAttributes() && "Unhandled attribute!");
- Out << " PAWI.Attrs = Attributes::get(mod->getContext(), B);\n }";
+ Out << " PAWI.Attrs = Attribute::get(mod->getContext(), B);\n }";
nl(Out);
Out << "Attrs.push_back(PAWI);";
nl(Out);
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index cc6f1fb832..477e788ee2 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -14,7 +14,7 @@
#ifndef CPPTARGETMACHINE_H
#define CPPTARGETMACHINE_H
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
diff --git a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
index a8ac0a282c..1ca74a4895 100644
--- a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
+++ b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "CPPTargetMachine.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index cedb3a8303..58b89d1283 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -31,9 +31,10 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -41,7 +42,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
index bc2af63612..bc2af63612 100755..100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
index 1b4b0206f7..2c93d04f98 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -15,7 +15,7 @@
#include "HexagonCallingConvLower.h"
#include "Hexagon.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 29bfc6b466..9043cf92d2 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -25,14 +25,14 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index d875fea575..2a00a9f7bd 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -39,7 +39,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Constants.h"
+#include "llvm/IR/Constants.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 275e135cd6..db292f2cd4 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -16,7 +16,7 @@
#include "HexagonISelLowering.h"
#include "HexagonTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 10c54cba9f..16cec5cfe5 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -17,7 +17,6 @@
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
#include "HexagonTargetObjectFile.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -26,12 +25,13 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 14254ce742..5a415ebe54 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -16,8 +16,8 @@
#define Hexagon_ISELLOWERING_H
#include "Hexagon.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index 49741a3d1b..05f1e23f60 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -59,9 +59,6 @@ class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
bits<6> imm6;
}
-class Immext<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX> {
- let isCodeGenOnly = 1;
-
- bits<26> imm26;
-}
+let isCodeGenOnly = 1 in
+class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
+ : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index dc93fb6b1b..8b183b967c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -98,8 +98,8 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
multiclass ALU32_Pbase<string mnemonic, bit isNot,
bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : ALU32_rr<(outs IntRegs:$dst),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
") $dst = ")#mnemonic#"($src2, $src3)",
@@ -107,10 +107,10 @@ multiclass ALU32_Pbase<string mnemonic, bit isNot,
}
multiclass ALU32_Pred<string mnemonic, bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : ALU32_Pbase<mnemonic, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ALU32_Pbase<mnemonic, PredNot, 0>;
// Predicate new
- defm _cdn#NAME# : ALU32_Pbase<mnemonic, PredNot, 1>;
+ defm _cdn#NAME : ALU32_Pbase<mnemonic, PredNot, 1>;
}
}
@@ -118,7 +118,7 @@ let InputType = "reg" in
multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_rr in {
let isPredicable = 1 in
- def #NAME# : ALU32_rr<(outs IntRegs:$dst),
+ def NAME : ALU32_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = "#mnemonic#"($src1, $src2)",
[(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
@@ -144,8 +144,8 @@ defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
// ALU32/ALU (ADD with register-immediate form)
//===----------------------------------------------------------------------===//
multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : ALU32_ri<(outs IntRegs:$dst),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
") $dst = ")#mnemonic#"($src2, #$src3)",
@@ -153,10 +153,10 @@ multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
}
multiclass ALU32ri_Pred<string mnemonic, bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>;
// Predicate new
- defm _cdn#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 1>;
+ defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>;
}
}
@@ -165,7 +165,7 @@ multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_ri in {
let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16,
isPredicable = 1 in
- def #NAME# : ALU32_ri<(outs IntRegs:$dst),
+ def NAME : ALU32_ri<(outs IntRegs:$dst),
(ins IntRegs:$src1, s16Ext:$src2),
"$dst = "#mnemonic#"($src1, #$src2)",
[(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
@@ -222,15 +222,15 @@ def SUB_ri : ALU32_ri<(outs IntRegs:$dst),
multiclass TFR_Pred<bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- def _c#NAME# : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
+ let PredSense = !if(PredNot, "false", "true") in {
+ def _c#NAME : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
!if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
[]>;
// Predicate new
let PNewValue = "new" in
- def _cdn#NAME# : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
+ def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
!if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
[]>;
}
@@ -240,7 +240,7 @@ let InputType = "reg", neverHasSideEffects = 1 in
multiclass TFR_base<string CextOp> {
let CextOpcode = CextOp, BaseOpcode = CextOp in {
let isPredicable = 1 in
- def #NAME# : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ def NAME : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
"$dst = $src1",
[]>;
@@ -252,15 +252,15 @@ multiclass TFR_base<string CextOp> {
}
multiclass TFR64_Pred<bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- def _c#NAME# : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2),
+ let PredSense = !if(PredNot, "false", "true") in {
+ def _c#NAME : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2),
!if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
[]>;
// Predicate new
let PNewValue = "new" in
- def _cdn#NAME# : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2),
+ def _cdn#NAME : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2),
!if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
[]>;
}
@@ -270,7 +270,7 @@ let InputType = "reg", neverHasSideEffects = 1 in
multiclass TFR64_base<string CextOp> {
let CextOpcode = CextOp, BaseOpcode = CextOp in {
let isPredicable = 1 in
- def #NAME# : ALU32_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ def NAME : ALU32_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
"$dst = $src1",
[]>;
@@ -283,16 +283,16 @@ multiclass TFR64_base<string CextOp> {
multiclass TFRI_Pred<bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- def _c#NAME# : ALU32_ri<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s12Ext:$src2),
+ let PredSense = !if(PredNot, "false", "true") in {
+ def _c#NAME : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Ext:$src2),
!if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2",
[]>;
// Predicate new
let PNewValue = "new" in
- def _cdn#NAME# : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s12Ext:$src2),
+ def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Ext:$src2),
!if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2",
[]>;
}
@@ -303,7 +303,7 @@ multiclass TFRI_base<string CextOp> {
let CextOpcode = CextOp, BaseOpcode = CextOp#I in {
let opExtendable = 1, opExtentBits = 16, isMoveImm = 1, isPredicable = 1,
isReMaterializable = 1 in
- def #NAME# : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
+ def NAME : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
"$dst = #$src1",
[(set (i32 IntRegs:$dst), s16ExtPred:$src1)]>;
@@ -845,19 +845,19 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
// Load -- MEMri operand
multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
bit isNot, bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : LDInst2<(outs RC:$dst),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
(ins PredRegs:$src1, MEMri:$addr),
- #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#"$dst = "#mnemonic#"($addr)",
[]>;
}
multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
// Predicate new
- defm _cdn#NAME# : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
+ defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
}
}
@@ -868,7 +868,7 @@ multiclass LD_MEMri<string mnemonic, string CextOp, RegisterClass RC,
let CextOpcode = CextOp, BaseOpcode = CextOp in {
let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
isPredicable = 1 in
- def #NAME# : LDInst2<(outs RC:$dst), (ins MEMri:$addr),
+ def NAME : LDInst2<(outs RC:$dst), (ins MEMri:$addr),
"$dst = "#mnemonic#"($addr)",
[]>;
@@ -911,20 +911,20 @@ def : Pat < (i64 (load ADDRriS11_3:$addr)),
// Load - Base with Immediate offset addressing mode
multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
bit isNot, bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : LDInst2<(outs RC:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3),
- #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#"$dst = "#mnemonic#"($src2+#$src3)",
[]>;
}
multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
// Predicate new
- defm _cdn#NAME# : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
+ defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
}
}
@@ -936,7 +936,7 @@ multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
isPredicable = 1, AddedComplexity = 20 in
- def #NAME# : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
+ def NAME : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
"$dst = "#mnemonic#"($src1+#$offset)",
[]>;
@@ -990,7 +990,7 @@ def LDrid_GP : LDInst2<(outs DoubleRegs:$dst),
[]>,
Requires<[NoV4T]>;
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
def LDd_GP : LDInst2<(outs DoubleRegs:$dst),
(ins globaladdress:$global),
"$dst = memd(#$global)",
@@ -1005,10 +1005,10 @@ def LDd_GP : LDInst2<(outs DoubleRegs:$dst),
multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
bit isNot, bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
- #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#"$dst = "#mnemonic#"($src2++#$offset)",
[],
"$src2 = $dst2">;
@@ -1016,8 +1016,8 @@ multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC,
Operand ImmOp, bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
// Predicate new
let Predicates = [HasV4T], validSubTargets = HasV4SubT in
defm _cdn#NAME#_V4 : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>;
@@ -1029,8 +1029,8 @@ multiclass LD_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
let BaseOpcode = "POST_"#BaseOp in {
let isPredicable = 1 in
- def #NAME# : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, ImmOp:$offset),
+ def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, ImmOp:$offset),
"$dst = "#mnemonic#"($src1++#$offset)",
[],
"$src1 = $dst2">;
@@ -1057,6 +1057,9 @@ let hasCtrlDep = 1, neverHasSideEffects = 1 in {
PredNewRel;
}
+def : Pat< (i32 (extloadi1 ADDRriS11_0:$addr)),
+ (i32 (LDrib ADDRriS11_0:$addr)) >;
+
// Load byte any-extend.
def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)),
(i32 (LDrib ADDRriS11_0:$addr)) >;
@@ -1073,14 +1076,14 @@ def LDrib_GP : LDInst2<(outs IntRegs:$dst),
[]>,
Requires<[NoV4T]>;
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
def LDb_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
"$dst = memb(#$global)",
[]>,
Requires<[NoV4T]>;
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
def LDub_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
"$dst = memub(#$global)",
@@ -1101,20 +1104,21 @@ def LDrih_GP : LDInst2<(outs IntRegs:$dst),
[]>,
Requires<[NoV4T]>;
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
def LDh_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
"$dst = memh(#$global)",
[]>,
Requires<[NoV4T]>;
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
def LDuh_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
"$dst = memuh(#$global)",
[]>,
Requires<[NoV4T]>;
+let AddedComplexity = 10 in
def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)),
(i32 (LDriub ADDRriS11_0:$addr))>;
@@ -1138,8 +1142,9 @@ def LDriuh_GP : LDInst2<(outs IntRegs:$dst),
Requires<[NoV4T]>;
// Load predicate.
-let Defs = [R10,R11,D5], neverHasSideEffects = 1 in
-def LDriw_pred : LDInst<(outs PredRegs:$dst),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in
+def LDriw_pred : LDInst2<(outs PredRegs:$dst),
(ins MEMri:$addr),
"Error; should not emit",
[]>;
@@ -1152,7 +1157,7 @@ def LDriw_GP : LDInst2<(outs IntRegs:$dst),
[]>,
Requires<[NoV4T]>;
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
def LDw_GP : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
"$dst = memw(#$global)",
@@ -1161,7 +1166,7 @@ def LDw_GP : LDInst2<(outs IntRegs:$dst),
// Deallocate stack frame.
let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
- def DEALLOCFRAME : LDInst2<(outs), (ins i32imm:$amt1),
+ def DEALLOCFRAME : LDInst2<(outs), (ins),
"deallocframe",
[]>;
}
@@ -1393,7 +1398,7 @@ def STrid_GP : STInst2<(outs),
[]>,
Requires<[NoV4T]>;
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
def STd_GP : STInst2<(outs),
(ins globaladdress:$global, DoubleRegs:$src),
"memd(#$global) = $src",
@@ -1435,8 +1440,8 @@ def POST_STdri_cNotPt : STInst2PI<(outs IntRegs:$dst),
//===----------------------------------------------------------------------===//
multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : STInst2<(outs),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, RC: $src2),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#mnemonic#"($addr) = $src2",
@@ -1444,8 +1449,8 @@ multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
// Predicate new
let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
@@ -1460,9 +1465,9 @@ multiclass ST_MEMri<string mnemonic, string CextOp, RegisterClass RC,
let CextOpcode = CextOp, BaseOpcode = CextOp in {
let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
isPredicable = 1 in
- def #NAME# : STInst2<(outs),
+ def NAME : STInst2<(outs),
(ins MEMri:$addr, RC:$src),
- #mnemonic#"($addr) = $src",
+ mnemonic#"($addr) = $src",
[]>;
let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
@@ -1501,8 +1506,8 @@ def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
//===----------------------------------------------------------------------===//
multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
bit isNot, bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : STInst2<(outs),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#mnemonic#"($src2+#$src3) = $src4",
@@ -1511,8 +1516,8 @@ multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true"), isPredicated = 1 in {
- defm _c#NAME# : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true"), isPredicated = 1 in {
+ defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
// Predicate new
let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
@@ -1528,9 +1533,9 @@ multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
isPredicable = 1 in
- def #NAME# : STInst2<(outs),
+ def NAME : STInst2<(outs),
(ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- #mnemonic#"($src1+#$src2) = $src3",
+ mnemonic#"($src1+#$src2) = $src3",
[]>;
let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits in {
@@ -1583,7 +1588,7 @@ def STrib_GP : STInst2<(outs),
Requires<[NoV4T]>;
// memb(#global)=Rt
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
def STb_GP : STInst2<(outs),
(ins globaladdress:$global, IntRegs:$src),
"memb(#$global) = $src",
@@ -1623,7 +1628,7 @@ def STrih_GP : STInst2<(outs),
[]>,
Requires<[NoV4T]>;
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
def STh_GP : STInst2<(outs),
(ins globaladdress:$global, IntRegs:$src),
"memh(#$global) = $src",
@@ -1672,7 +1677,7 @@ def STriw_GP : STInst2<(outs),
[]>,
Requires<[NoV4T]>;
-let neverHasSideEffects = 1 in
+let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
def STw_GP : STInst2<(outs),
(ins globaladdress:$global, IntRegs:$src),
"memw(#$global) = $src",
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index ae407db029..372de9a8b2 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -12,10 +12,14 @@
//===----------------------------------------------------------------------===//
let neverHasSideEffects = 1 in
-def IMMEXT : Immext<(outs), (ins),
- "/* immext #... */",
- []>,
- Requires<[HasV4T]>;
+class T_Immext<dag ins> :
+ EXTENDERInst<(outs), ins, "immext(#$imm)", []>,
+ Requires<[HasV4T]>;
+
+def IMMEXT_b : T_Immext<(ins brtarget:$imm)>;
+def IMMEXT_c : T_Immext<(ins calltarget:$imm)>;
+def IMMEXT_g : T_Immext<(ins globaladdress:$imm)>;
+def IMMEXT_i : T_Immext<(ins u26_6Imm:$imm)>;
// Hexagon V4 Architecture spec defines 8 instruction classes:
// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
@@ -83,86 +87,77 @@ def IMMEXT : Immext<(outs), (ins),
// Shift halfword.
-let isPredicated = 1 in
+let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1) $dst = aslh($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1) $dst = aslh($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1.new) $dst = aslh($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1.new) $dst = aslh($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1) $dst = asrh($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1) $dst = asrh($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1.new) $dst = asrh($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1.new) $dst = asrh($src2)",
[]>,
Requires<[HasV4T]>;
+}
// Sign extend.
-let isPredicated = 1 in
+let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1) $dst = sxtb($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1) $dst = sxtb($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1.new) $dst = sxtb($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1.new) $dst = sxtb($src2)",
@@ -170,94 +165,86 @@ def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
Requires<[HasV4T]>;
-let isPredicated = 1 in
def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1) $dst = sxth($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1) $dst = sxth($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1.new) $dst = sxth($src2)",
[]>,
Requires<[HasV4T]>;
-let isPredicated = 1 in
def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1.new) $dst = sxth($src2)",
[]>,
Requires<[HasV4T]>;
+}
// Zero exten.
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in {
def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1) $dst = zxtb($src2)",
[]>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1) $dst = zxtb($src2)",
[]>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1.new) $dst = zxtb($src2)",
[]>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1.new) $dst = zxtb($src2)",
[]>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1) $dst = zxth($src2)",
[]>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1) $dst = zxth($src2)",
[]>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if ($src1.new) $dst = zxth($src2)",
[]>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
"if (!$src1.new) $dst = zxth($src2)",
[]>,
Requires<[HasV4T]>;
+}
// Generate frame index addresses.
-let neverHasSideEffects = 1, isReMaterializable = 1 in
+let neverHasSideEffects = 1, isReMaterializable = 1,
+isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in
def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
(ins IntRegs:$src1, s32Imm:$offset),
"$dst = add($src1, ##$offset)",
@@ -431,8 +418,8 @@ def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst),
// addressing mode
multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : LDInst2<(outs RC:$dst),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#"$dst = "#mnemonic#"($src2+$src3<<#$offset)",
@@ -440,10 +427,10 @@ multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>;
// Predicate new
- defm _cdn#NAME# : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>;
+ defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>;
}
}
@@ -451,7 +438,7 @@ let neverHasSideEffects = 1 in
multiclass ld_idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
let isPredicable = 1 in
- def #NAME#_V4 : LDInst2<(outs RC:$dst),
+ def NAME#_V4 : LDInst2<(outs RC:$dst),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
"$dst = "#mnemonic#"($src1+$src2<<#$offset)",
[]>, Requires<[HasV4T]>;
@@ -1054,7 +1041,7 @@ def LDriw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
-let isPredicable = 1, neverHasSideEffects = 1 in
+let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins globaladdress:$global),
"$dst=memd(#$global)",
@@ -1062,7 +1049,8 @@ def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
+validSubTargets = HasV4SubT in {
def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1) $dst=memd(##$global)",
@@ -1071,7 +1059,6 @@ def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
// if (!Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1) $dst=memd(##$global)",
@@ -1079,7 +1066,6 @@ def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1.new) $dst=memd(##$global)",
@@ -1088,14 +1074,14 @@ def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
// if (!Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDd_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1.new) $dst=memd(##$global)",
[]>,
Requires<[HasV4T]>;
+}
-let isPredicable = 1, neverHasSideEffects = 1 in
+let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
"$dst=memb(#$global)",
@@ -1103,7 +1089,8 @@ def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
+validSubTargets = HasV4SubT in {
def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1) $dst=memb(##$global)",
@@ -1111,7 +1098,6 @@ def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1) $dst=memb(##$global)",
@@ -1119,7 +1105,6 @@ def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1.new) $dst=memb(##$global)",
@@ -1127,14 +1112,14 @@ def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDb_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1.new) $dst=memb(##$global)",
[]>,
Requires<[HasV4T]>;
+}
-let isPredicable = 1, neverHasSideEffects = 1 in
+let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
"$dst=memub(#$global)",
@@ -1142,7 +1127,8 @@ def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
+validSubTargets = HasV4SubT in {
def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1) $dst=memub(##$global)",
@@ -1151,7 +1137,6 @@ def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
// if (!Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1) $dst=memub(##$global)",
@@ -1159,7 +1144,6 @@ def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1.new) $dst=memub(##$global)",
@@ -1168,14 +1152,14 @@ def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
// if (!Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1.new) $dst=memub(##$global)",
[]>,
Requires<[HasV4T]>;
+}
-let isPredicable = 1, neverHasSideEffects = 1 in
+let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
"$dst=memh(#$global)",
@@ -1183,7 +1167,8 @@ def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
+validSubTargets = HasV4SubT in {
def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1) $dst=memh(##$global)",
@@ -1191,7 +1176,6 @@ def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1) $dst=memh(##$global)",
@@ -1199,7 +1183,6 @@ def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1.new) $dst=memh(##$global)",
@@ -1207,14 +1190,14 @@ def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1.new) $dst=memh(##$global)",
[]>,
Requires<[HasV4T]>;
+}
-let isPredicable = 1, neverHasSideEffects = 1 in
+let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
"$dst=memuh(#$global)",
@@ -1222,7 +1205,8 @@ def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
+validSubTargets = HasV4SubT in {
def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1) $dst=memuh(##$global)",
@@ -1230,7 +1214,6 @@ def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1) $dst=memuh(##$global)",
@@ -1238,7 +1221,6 @@ def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1.new) $dst=memuh(##$global)",
@@ -1246,14 +1228,14 @@ def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (!Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1.new) $dst=memuh(##$global)",
[]>,
Requires<[HasV4T]>;
+}
-let isPredicable = 1, neverHasSideEffects = 1 in
+let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst),
(ins globaladdress:$global),
"$dst=memw(#$global)",
@@ -1261,7 +1243,8 @@ def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
+validSubTargets = HasV4SubT in {
def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1) $dst=memw(##$global)",
@@ -1270,7 +1253,6 @@ def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
// if (!Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1) $dst=memw(##$global)",
@@ -1278,7 +1260,6 @@ def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// if (Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if ($src1.new) $dst=memw(##$global)",
@@ -1287,13 +1268,12 @@ def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
// if (!Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
def LDw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, globaladdress:$global),
"if (!$src1.new) $dst=memw(##$global)",
[]>,
Requires<[HasV4T]>;
-
+}
def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
@@ -1538,8 +1518,8 @@ def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
// mode
multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : STInst2<(outs),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
RC:$src5),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1549,10 +1529,10 @@ multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>;
// Predicate new
- defm _cdn#NAME# : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>;
+ defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>;
}
}
@@ -1560,9 +1540,9 @@ let isNVStorable = 1 in
multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
let isPredicable = 1 in
- def #NAME#_V4 : STInst2<(outs),
+ def NAME#_V4 : STInst2<(outs),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
- #mnemonic#"($src1+$src2<<#$src3) = $src4",
+ mnemonic#"($src1+$src2<<#$src3) = $src4",
[]>,
Requires<[HasV4T]>;
@@ -1577,8 +1557,8 @@ multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
// addressing mode.
multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME#_nv_V4 : NVInst_V4<(outs),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
RC:$src5),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1588,10 +1568,10 @@ multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>;
// Predicate new
- defm _cdn#NAME# : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>;
+ defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>;
}
}
@@ -1599,9 +1579,9 @@ let mayStore = 1, isNVStore = 1 in
multiclass ST_Idxd_shl_nv<string mnemonic, string CextOp, RegisterClass RC> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
let isPredicable = 1 in
- def #NAME#_nv_V4 : NVInst_V4<(outs),
+ def NAME#_nv_V4 : NVInst_V4<(outs),
(ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
- #mnemonic#"($src1+$src2<<#$src3) = $src4.new",
+ mnemonic#"($src1+$src2<<#$src3) = $src4.new",
[]>,
Requires<[HasV4T]>;
@@ -1702,20 +1682,20 @@ def POST_STdri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
// addressing mode and immediate stored value.
multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME# : STInst2<(outs),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4),
- #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#mnemonic#"($src2+#$src3) = #$src4",
[]>,
Requires<[HasV4T]>;
}
multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>;
// Predicate new
- defm _cdn#NAME# : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>;
+ defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>;
}
}
@@ -1723,9 +1703,9 @@ let isExtendable = 1, isExtentSigned = 1, neverHasSideEffects = 1 in
multiclass ST_Imm<string mnemonic, string CextOp, Operand OffsetOp> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in {
let opExtendable = 2, opExtentBits = 8, isPredicable = 1 in
- def #NAME#_V4 : STInst2<(outs),
+ def NAME#_V4 : STInst2<(outs),
(ins IntRegs:$src1, OffsetOp:$src2, s8Ext:$src3),
- #mnemonic#"($src1+#$src2) = #$src3",
+ mnemonic#"($src1+#$src2) = #$src3",
[]>,
Requires<[HasV4T]>;
@@ -2376,8 +2356,8 @@ def : Pat<(store (i32 IntRegs:$src1),
//
multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
Operand predImmOp, bit isNot, bit isPredNew> {
- let PNewValue = #!if(isPredNew, "new", "") in
- def #NAME#_nv_V4 : NVInst_V4<(outs),
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#mnemonic#"($src2+#$src3) = $src4.new",
@@ -2387,10 +2367,10 @@ multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp,
bit PredNot> {
- let PredSense = #!if(PredNot, "false", "true") in {
- defm _c#NAME# : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>;
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>;
// Predicate new
- defm _cdn#NAME# : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>;
+ defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>;
}
}
@@ -2402,9 +2382,9 @@ multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
isPredicable = 1 in
- def #NAME#_nv_V4 : NVInst_V4<(outs),
+ def NAME#_nv_V4 : NVInst_V4<(outs),
(ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- #mnemonic#"($src1+#$src2) = $src3.new",
+ mnemonic#"($src1+#$src2) = $src3.new",
[]>,
Requires<[HasV4T]>;
@@ -2425,16 +2405,56 @@ let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in {
u6_2Ext, 13, 8>, AddrModeRel;
}
-// Store new-value byte.
+// multiclass for new-value store instructions with base + immediate offset.
+// and MEMri operand.
+multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>;
+
+ // Predicate new
+ defm _cdn#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 1>;
+ }
+}
+
+let mayStore = 1, isNVStore = 1, isExtendable = 1, neverHasSideEffects = 1 in
+multiclass ST_MEMri_nv<string mnemonic, string CextOp, RegisterClass RC,
+ bits<5> ImmBits, bits<5> PredImmBits> {
-// memb(Re=#U6)=Nt.new
-// memb(Rs+#s11:0)=Nt.new
-let mayStore = 1, isPredicable = 1 in
-def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
- "memb($addr) = $src1.new",
+ let CextOpcode = CextOp, BaseOpcode = CextOp in {
+ let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+ isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins MEMri:$addr, RC:$src),
+ mnemonic#"($addr) = $src.new",
[]>,
Requires<[HasV4T]>;
+ let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+ neverHasSideEffects = 1, isPredicated = 1 in {
+ defm Pt : ST_MEMri_Pred_nv<mnemonic, RC, 0>;
+ defm NotPt : ST_MEMri_Pred_nv<mnemonic, RC, 1>;
+ }
+ }
+}
+
+let addrMode = BaseImmOffset, isMEMri = "true", validSubTargets = HasV4SubT,
+mayStore = 1 in {
+ defm STrib: ST_MEMri_nv<"memb", "STrib", IntRegs, 11, 6>, AddrModeRel;
+ defm STrih: ST_MEMri_nv<"memh", "STrih", IntRegs, 12, 7>, AddrModeRel;
+ defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
+}
+
// memb(Ru<<#u2+#U6)=Nt.new
let mayStore = 1, AddedComplexity = 10 in
def STrib_shl_nv_V4 : NVInst_V4<(outs),
@@ -2473,44 +2493,6 @@ def STb_GP_nv_V4 : NVInst_V4<(outs),
[]>,
Requires<[HasV4T]>;
-// Store new-value byte conditionally.
-// if ([!]Pv[.new]) memb(#u6)=Nt.new
-// if (Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1) memb($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1.new) memb($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1) memb($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1.new) memb($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new
// if (Pv) memb(Rx++#s4:0)=Nt.new
let mayStore = 1, hasCtrlDep = 1,
@@ -2548,16 +2530,6 @@ def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
[],"$src3 = $dst">,
Requires<[HasV4T]>;
-
-// Store new-value halfword.
-// memh(Re=#U6)=Nt.new
-// memh(Rs+#s11:1)=Nt.new
-let mayStore = 1, isPredicable = 1 in
-def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
- "memh($addr) = $src1.new",
- []>,
- Requires<[HasV4T]>;
-
// memh(Ru<<#u2+#U6)=Nt.new
let mayStore = 1, AddedComplexity = 10 in
def STrih_shl_nv_V4 : NVInst_V4<(outs),
@@ -2597,47 +2569,6 @@ def STh_GP_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
-// Store new-value halfword conditionally.
-
-// if ([!]Pv[.new]) memh(#u6)=Nt.new
-
-// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new
-// if (Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1) memh($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1.new) memh($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1) memh($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1.new) memh($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new
// if (Pv) memh(Rx++#s4:1)=Nt.new
let mayStore = 1, hasCtrlDep = 1,
@@ -2675,18 +2606,6 @@ def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
[],"$src3 = $dst">,
Requires<[HasV4T]>;
-
-// Store new-value word.
-
-// memw(Re=#U6)=Nt.new
-// memw(Rs+#s11:2)=Nt.new
-let mayStore = 1, isPredicable = 1 in
-def STriw_nv_V4 : NVInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$src1),
- "memw($addr) = $src1.new",
- []>,
- Requires<[HasV4T]>;
-
// memw(Ru<<#u2+#U6)=Nt.new
let mayStore = 1, AddedComplexity = 10 in
def STriw_shl_nv_V4 : NVInst_V4<(outs),
@@ -2723,47 +2642,6 @@ def STw_GP_nv_V4 : NVInst_V4<(outs),
[]>,
Requires<[HasV4T]>;
-// Store new-value word conditionally.
-
-// if ([!]Pv[.new]) memw(#u6)=Nt.new
-
-// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new
-// if (Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1) memw($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if ($src1.new) memw($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1) memw($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
- isPredicated = 1 in
-def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
- "if (!$src1.new) memw($addr) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new
// if (Pv) memw(Rx++#s4:2)=Nt.new
let mayStore = 1, hasCtrlDep = 1,
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index dc644ad213..db36ac0110 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -16,7 +16,7 @@
#include "HexagonAsmPrinter.h"
#include "HexagonMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Constants.h"
+#include "llvm/IR/Constants.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/Mangler.h"
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 7195c4a8d3..576f1d7d07 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -45,7 +45,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Constants.h"
+#include "llvm/IR/Constants.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 2985a5667b..d1882de432 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -25,14 +25,14 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index a4445cb4b1..34bf4eacfd 100644
--- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -15,8 +15,8 @@
#include "Hexagon.h"
#include "HexagonTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
@@ -51,7 +51,7 @@ bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
unsigned Idx = 1;
for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
++AI, ++Idx) {
- if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt)) {
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
Argument* Arg = AI;
if (!isa<PointerType>(Arg->getType())) {
for (Instruction::use_iterator UI = Arg->use_begin();
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 8fc836b458..287b3d615b 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -16,7 +16,7 @@
#include "HexagonISelLowering.h"
#include "HexagonMachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
@@ -74,8 +74,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget),
- InstrItins(&Subtarget.getInstrItineraryData()),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ InstrItins(&Subtarget.getInstrItineraryData()) {
setMCUseCFI(false);
}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index fa669a2719..cf8f9aa361 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -19,9 +19,8 @@
#include "HexagonInstrInfo.h"
#include "HexagonSelectionDAGInfo.h"
#include "HexagonSubtarget.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -35,8 +34,6 @@ class HexagonTargetMachine : public LLVMTargetMachine {
HexagonSelectionDAGInfo TSInfo;
HexagonFrameLowering FrameLowering;
const InstrItineraryData* InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
@@ -71,14 +68,6 @@ public:
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
-
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
-
virtual const DataLayout *getDataLayout() const { return &DL; }
static unsigned getModuleMatchQuality(const Module &M);
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 2c1145f171..993fcfaed4 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -14,10 +14,10 @@
#include "HexagonTargetObjectFile.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index ab6f6cf899..409a243f1c 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -241,8 +241,9 @@ static bool IsIndirectCall(MachineInstr* MI) {
// reservation fail.
void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
- QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+ MI->getDebugLoc());
if (ResourceTracker->canReserveResources(PseudoMI)) {
ResourceTracker->reserveResources(PseudoMI);
@@ -259,7 +260,7 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
assert(QII->isExtended(MI) &&
"Should only be called for constant extended instructions");
MachineFunction *MF = MI->getParent()->getParent();
- MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT),
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
MI->getDebugLoc());
bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
MF->DeleteMachineInstr(PseudoMI);
@@ -270,8 +271,9 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
// true, otherwise, return false.
bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
- QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+ MI->getDebugLoc());
if (ResourceTracker->canReserveResources(PseudoMI)) {
ResourceTracker->reserveResources(PseudoMI);
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
index 7aa5dd3b89..40f6c8d23e 100644
--- a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "Hexagon.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index eb6c779f45..f3a9c1c3e7 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
+subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the
diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
index 813767ba6d..4a7d8e8d88 100644
--- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt
+++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -2,7 +2,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMMBlazeAsmParser
- MBlazeAsmLexer.cpp
MBlazeAsmParser.cpp
)
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
deleted file mode 100644
index 480e79f9f5..0000000000
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-//===-- MBlazeAsmLexer.cpp - Tokenize MBlaze assembly to AsmTokens --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/MBlazeBaseInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCTargetAsmLexer.h"
-#include "llvm/Support/TargetRegistry.h"
-#include <map>
-#include <string>
-
-using namespace llvm;
-
-namespace {
-
- class MBlazeBaseAsmLexer : public MCTargetAsmLexer {
- const MCAsmInfo &AsmInfo;
-
- const AsmToken &lexDefinite() {
- return getLexer()->Lex();
- }
-
- AsmToken LexTokenUAL();
- protected:
- typedef std::map <std::string, unsigned> rmap_ty;
-
- rmap_ty RegisterMap;
-
- void InitRegisterMap(const MCRegisterInfo *info) {
- unsigned numRegs = info->getNumRegs();
-
- for (unsigned i = 0; i < numRegs; ++i) {
- const char *regName = info->getName(i);
- if (regName)
- RegisterMap[regName] = i;
- }
- }
-
- unsigned MatchRegisterName(StringRef Name) {
- rmap_ty::iterator iter = RegisterMap.find(Name.str());
- if (iter != RegisterMap.end())
- return iter->second;
- else
- return 0;
- }
-
- AsmToken LexToken() {
- if (!Lexer) {
- SetError(SMLoc(), "No MCAsmLexer installed");
- return AsmToken(AsmToken::Error, "", 0);
- }
-
- switch (AsmInfo.getAssemblerDialect()) {
- default:
- SetError(SMLoc(), "Unhandled dialect");
- return AsmToken(AsmToken::Error, "", 0);
- case 0:
- return LexTokenUAL();
- }
- }
- public:
- MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : MCTargetAsmLexer(T), AsmInfo(MAI) {
- }
- };
-
- class MBlazeAsmLexer : public MBlazeBaseAsmLexer {
- public:
- MBlazeAsmLexer(const Target &T, const MCRegisterInfo &MRI,
- const MCAsmInfo &MAI)
- : MBlazeBaseAsmLexer(T, MAI) {
- InitRegisterMap(&MRI);
- }
- };
-}
-
-AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
- const AsmToken &lexedToken = lexDefinite();
-
- switch (lexedToken.getKind()) {
- default:
- return AsmToken(lexedToken);
- case AsmToken::Error:
- SetError(Lexer->getErrLoc(), Lexer->getErr());
- return AsmToken(lexedToken);
- case AsmToken::Identifier:
- {
- unsigned regID = MatchRegisterName(lexedToken.getString().lower());
-
- if (regID) {
- return AsmToken(AsmToken::Register,
- lexedToken.getString(),
- static_cast<int64_t>(regID));
- } else {
- return AsmToken(lexedToken);
- }
- }
- }
-}
-
-extern "C" void LLVMInitializeMBlazeAsmLexer() {
- RegisterMCAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
-}
-
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index 2016600b9d..d73c20f197 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -35,7 +35,8 @@ class MBlazeAsmParser : public MCTargetAsmParser {
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
- MBlazeOperand *ParseRegister(unsigned &RegNo);
+ MBlazeOperand *ParseRegister();
+ MBlazeOperand *ParseRegister(SMLoc &StartLoc, SMLoc &EndLoc);
MBlazeOperand *ParseImmediate();
MBlazeOperand *ParseFsl();
MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -383,23 +384,31 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
bool MBlazeAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
- return (ParseRegister(RegNo) == 0);
+ MBlazeOperand *Reg = ParseRegister(StartLoc, EndLoc);
+ if (!Reg)
+ return true;
+ RegNo = Reg->getReg();
+ return false;
}
-MBlazeOperand *MBlazeAsmParser::ParseRegister(unsigned &RegNo) {
- SMLoc S = Parser.getTok().getLoc();
- SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+MBlazeOperand *MBlazeAsmParser::ParseRegister() {
+ SMLoc S, E;
+ return ParseRegister(S, E);
+}
- switch (getLexer().getKind()) {
- default: return 0;
- case AsmToken::Identifier:
- RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
- if (RegNo == 0)
- return 0;
+MBlazeOperand *MBlazeAsmParser::ParseRegister(SMLoc &StartLoc, SMLoc &EndLoc) {
+ StartLoc = Parser.getTok().getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
- getLexer().Lex();
- return MBlazeOperand::CreateReg(RegNo, S, E);
- }
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return 0;
+
+ unsigned RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
+ if (RegNo == 0)
+ return 0;
+
+ getLexer().Lex();
+ return MBlazeOperand::CreateReg(RegNo, StartLoc, EndLoc);
}
static unsigned MatchFslRegister(StringRef String) {
@@ -415,7 +424,7 @@ static unsigned MatchFslRegister(StringRef String) {
MBlazeOperand *MBlazeAsmParser::ParseFsl() {
SMLoc S = Parser.getTok().getLoc();
- SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ SMLoc E = Parser.getTok().getEndLoc();
switch (getLexer().getKind()) {
default: return 0;
@@ -432,7 +441,7 @@ MBlazeOperand *MBlazeAsmParser::ParseFsl() {
MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
SMLoc S = Parser.getTok().getLoc();
- SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ SMLoc E = Parser.getTok().getEndLoc();
const MCExpr *EVal;
switch (getLexer().getKind()) {
@@ -454,8 +463,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
MBlazeOperand *Op;
// Attempt to parse the next token as a register name
- unsigned RegNo;
- Op = ParseRegister(RegNo);
+ Op = ParseRegister();
// Attempt to parse the next token as an FSL immediate
if (!Op)
@@ -532,7 +540,7 @@ bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getParser().ParseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+ getParser().getStreamer().EmitValue(Value, Size);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -548,12 +556,9 @@ bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
-extern "C" void LLVMInitializeMBlazeAsmLexer();
-
/// Force static initialization.
extern "C" void LLVMInitializeMBlazeAsmParser() {
RegisterMCAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
- LLVMInitializeMBlazeAsmLexer();
}
#define GET_REGISTER_MATCHER
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index 0bf93d71da..91a41f39b5 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -9,7 +9,6 @@ tablegen(LLVM MBlazeGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM MBlazeGenCallingConv.inc -gen-callingconv)
tablegen(LLVM MBlazeGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
-tablegen(LLVM MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
add_public_tablegen_target(MBlazeCommonTableGen)
add_llvm_target(MBlazeCodeGen
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index c9a46a7361..c03ab3803b 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -14,7 +14,6 @@
#include "MBlazeDisassembler.h"
#include "MBlaze.h"
-#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -25,7 +24,6 @@
// #include "MBlazeGenDecoderTables.inc"
// #include "MBlazeGenRegisterNames.inc"
-#include "MBlazeGenEDInfo.inc"
namespace llvm {
extern const MCInstrDesc MBlazeInsts[];
@@ -491,10 +489,6 @@ static unsigned getOPCODE(uint32_t insn) {
}
}
-const EDInstInfo *MBlazeDisassembler::getEDInfo() const {
- return instInfoMBlaze;
-}
-
//
// Public interface for the disassembler
//
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
index 5c4ae3b1ac..b8ff8f6072 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -23,8 +23,6 @@ class MCInst;
class MemoryObject;
class raw_ostream;
-struct EDInstInfo;
-
/// MBlazeDisassembler - Disassembler for all MBlaze platforms.
class MBlazeDisassembler : public MCDisassembler {
public:
@@ -44,9 +42,6 @@ public:
uint64_t address,
raw_ostream &vStream,
raw_ostream &cStream) const;
-
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
};
} // namespace llvm
diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
index bb2c31a33a..586e2d3eef 100644
--- a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -5,4 +5,4 @@ add_llvm_library(LLVMMBlazeAsmPrinter
MBlazeInstPrinter.cpp
)
-add_dependencies(LLVMMBlazeAsmPrinter intrinsics_gen MBlazeCommonTableGen)
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index 0f6f108aea..7dafaef0af 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -26,14 +26,14 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Module.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index 99ffa413e6..b6edbba2a7 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -22,8 +22,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Function.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index 98257a08d3..78ad24debb 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -23,15 +23,15 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Type.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 06c150571d..8a9f0922b1 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -18,7 +18,6 @@
#include "MBlazeSubtarget.h"
#include "MBlazeTargetMachine.h"
#include "MBlazeTargetObjectFile.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -26,10 +25,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 5238ad0040..8d262a01e7 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -12,13 +12,13 @@
//===----------------------------------------------------------------------===//
#include "MBlazeIntrinsicInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Type.h"
#include <cstring>
using namespace llvm;
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
index 457c206fd0..ad414ac40f 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -18,7 +18,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Constants.h"
+#include "llvm/IR/Constants.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 7fa3b2f785..ed06cc4b72 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -24,8 +24,9 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -34,7 +35,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
#define GET_REGINFO_TARGET_DESC
#include "MBlazeGenRegisterInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 70eb9bac6b..bcdd32fed9 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -42,8 +42,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT,
InstrInfo(*this),
FrameLowering(Subtarget),
TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ InstrItins(Subtarget.getInstrItineraryData()) {
}
namespace {
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 891a57bef5..956794ddda 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -20,11 +20,10 @@
#include "MBlazeIntrinsicInfo.h"
#include "MBlazeSelectionDAGInfo.h"
#include "MBlazeSubtarget.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
class formatted_raw_ostream;
@@ -38,8 +37,6 @@ namespace llvm {
MBlazeSelectionDAGInfo TSInfo;
MBlazeIntrinsicInfo IntrinsicInfo;
InstrItineraryData InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
MBlazeTargetMachine(const Target &T, StringRef TT,
@@ -75,11 +72,6 @@ namespace llvm {
const TargetIntrinsicInfo *getIntrinsicInfo() const
{ return &IntrinsicInfo; }
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const
- { return &STTI; }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const
- { return &VTTI; }
-
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
};
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
index 8a5c13cb1b..a7a0a68b16 100644
--- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -9,9 +9,9 @@
#include "MBlazeTargetObjectFile.h"
#include "MBlazeSubtarget.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/CommandLine.h"
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
index bd2013dee3..6f9752c429 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -54,7 +54,7 @@ public:
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const;
void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
@@ -88,7 +88,7 @@ bool MBlazeAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
// FIXME: Is this right? It's what the "generic" code was doing before,
// but is X86 specific. Is it actually true for MBlaze also, or was it
diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile
index 83c2a7d34d..512ce9a081 100644
--- a/lib/Target/MBlaze/Makefile
+++ b/lib/Target/MBlaze/Makefile
@@ -15,8 +15,7 @@ BUILT_SOURCES = MBlazeGenRegisterInfo.inc MBlazeGenInstrInfo.inc \
MBlazeGenAsmWriter.inc \
MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \
MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \
- MBlazeGenSubtargetInfo.inc MBlazeGenIntrinsics.inc \
- MBlazeGenEDInfo.inc
+ MBlazeGenSubtargetInfo.inc MBlazeGenIntrinsics.inc
DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
diff --git a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
index 71210d8db4..323a7f647d 100644
--- a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
+++ b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "MBlaze.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
index 99457b924c..64ac994b7f 100644
--- a/lib/Target/MSP430/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -4,4 +4,4 @@ add_llvm_library(LLVMMSP430AsmPrinter
MSP430InstPrinter.cpp
)
-add_dependencies(LLVMMSP430AsmPrinter intrinsics_gen MSP430CommonTableGen)
+add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 4a08a1ff9f..0a04e5ddb7 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -24,13 +24,13 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Module.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/Mangler.h"
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index 1b32688489..aef45d8141 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -19,8 +19,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Function.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index ff109615b1..1566c09603 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -13,17 +13,17 @@
#include "MSP430.h"
#include "MSP430TargetMachine.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 8a1bd0958b..5a156c15c7 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -18,7 +18,6 @@
#include "MSP430MachineFunctionInfo.h"
#include "MSP430Subtarget.h"
#include "MSP430TargetMachine.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -27,11 +26,12 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -1063,6 +1063,10 @@ bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
return 0 && VT1 == MVT::i8 && VT2 == MVT::i16;
}
+bool MSP430TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ return isZExtFree(Val.getValueType(), VT2);
+}
+
//===----------------------------------------------------------------------===//
// Other Lowering Code
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index bf021eaac5..ab4e64e921 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -116,6 +116,7 @@ namespace llvm {
/// out to 16 bits.
virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+ virtual bool isZExtFree(SDValue Val, EVT VT2) const;
MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const;
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index c99f1f895e..a6b5f2f6d0 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -18,7 +18,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index a2782e39d9..8f7813ad46 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -21,7 +21,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 062c119410..164e351df9 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
// FIXME: Check DataLayout string.
DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { }
+ FrameLowering(Subtarget) { }
namespace {
/// MSP430 Code Generator Pass Configuration Options.
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 8a94d746de..be695a2111 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -21,10 +21,9 @@
#include "MSP430RegisterInfo.h"
#include "MSP430SelectionDAGInfo.h"
#include "MSP430Subtarget.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -37,8 +36,6 @@ class MSP430TargetMachine : public LLVMTargetMachine {
MSP430TargetLowering TLInfo;
MSP430SelectionDAGInfo TSInfo;
MSP430FrameLowering FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
MSP430TargetMachine(const Target &T, StringRef TT,
@@ -64,12 +61,6 @@ public:
virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
}; // MSP430TargetMachine.
diff --git a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
index 8b3e01ecf5..0d71d04ebe 100644
--- a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
+++ b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "MSP430.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index c3e83725d6..edfd421d85 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -14,9 +14,9 @@
#include "llvm/Target/Mangler.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 59fefa1268..57338df53c 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -84,15 +84,30 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseDirective(AsmToken DirectiveID);
MipsAsmParser::OperandMatchResultTy
- parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&);
+ parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
StringRef Mnemonic);
- int tryParseRegister(StringRef Mnemonic);
+ int tryParseRegister(bool is64BitReg);
bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic);
+ bool is64BitReg);
bool needsExpansion(MCInst &Inst);
@@ -128,9 +143,9 @@ class MipsAsmParser : public MCTargetAsmParser {
return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0;
}
- int matchRegisterName(StringRef Symbol);
+ int matchRegisterName(StringRef Symbol, bool is64BitReg);
- int matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic);
+ int matchRegisterByNumber(unsigned RegNum, unsigned RegClass);
void setFpFormat(FpFormatTy Format) {
FpFormat = Format;
@@ -166,6 +181,20 @@ namespace {
/// instruction.
class MipsOperand : public MCParsedAsmOperand {
+public:
+ enum RegisterKind {
+ Kind_None,
+ Kind_CPURegs,
+ Kind_CPU64Regs,
+ Kind_HWRegs,
+ Kind_HW64Regs,
+ Kind_FGR32Regs,
+ Kind_FGR64Regs,
+ Kind_AFGR32Regs,
+ Kind_CCRRegs
+ };
+
+private:
enum KindTy {
k_CondCode,
k_CoprocNum,
@@ -186,6 +215,7 @@ class MipsOperand : public MCParsedAsmOperand {
struct {
unsigned RegNum;
+ RegisterKind Kind;
} Reg;
struct {
@@ -246,6 +276,11 @@ public:
return Reg.RegNum;
}
+ void setRegKind(RegisterKind RegKind) {
+ assert((Kind == k_Register) && "Invalid access!");
+ Reg.Kind = RegKind;
+ }
+
const MCExpr *getImm() const {
assert((Kind == k_Immediate) && "Invalid access!");
return Imm.Val;
@@ -296,6 +331,45 @@ public:
return Op;
}
+ bool isCPURegsAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_CPURegs;
+ }
+ void addCPURegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isCPU64RegsAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_CPU64Regs;
+ }
+ void addCPU64RegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isHWRegsAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_HWRegs;
+ }
+ void addHWRegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isHW64RegsAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_HW64Regs;
+ }
+ void addHW64RegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ void addCCRAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isCCRAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_CCRRegs;
+ }
+
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
@@ -344,31 +418,31 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
if ( 0 <= ImmValue && ImmValue <= 65535) {
// for 0 <= j <= 65535.
// li d,j => ori d,$zero,j
- tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(
- MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO));
+ MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else if ( ImmValue < 0 && ImmValue >= -32768) {
// for -32768 <= j < 0.
// li d,j => addiu d,$zero,j
- tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files?
+ tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(
- MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO));
+ MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else {
// for any other value of j that is representable as a 32-bit integer.
// li d,j => lui d,hi16(j)
// ori d,d,lo16(j)
- tmpInst.setOpcode(isMips64() ? Mips::LUi64 : Mips::LUi);
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
Instructions.push_back(tmpInst);
tmpInst.clear();
- tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -390,7 +464,7 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
if ( -32768 <= ImmValue && ImmValue <= 65535) {
//for -32768 <= j <= 65535.
//la d,j(s) => addiu d,s,j
- tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files?
+ tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
@@ -400,12 +474,12 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
//la d,j(s) => lui d,hi16(j)
// ori d,d,lo16(j)
// addu d,d,s
- tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi);
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
Instructions.push_back(tmpInst);
tmpInst.clear();
- tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -433,19 +507,19 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(
- MCOperand::CreateReg(isMips64()?Mips::ZERO_64:Mips::ZERO));
+ MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else {
//for any other value of j that is representable as a 32-bit integer.
//la d,j => lui d,hi16(j)
// ori d,d,lo16(j)
- tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi);
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
Instructions.push_back(tmpInst);
tmpInst.clear();
- tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -498,10 +572,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
-int MipsAsmParser::matchRegisterName(StringRef Name) {
+int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
int CC;
- if (!isMips64())
+ if (!is64BitReg)
CC = StringSwitch<unsigned>(Name)
.Case("zero", Mips::ZERO)
.Case("a0", Mips::A0)
@@ -643,7 +717,7 @@ unsigned MipsAsmParser::getATReg() {
unsigned Reg = Options.getATRegNum();
if (isMips64())
return getReg(Mips::CPU64RegsRegClassID,Reg);
-
+
return getReg(Mips::CPURegsRegClassID,Reg);
}
@@ -651,58 +725,36 @@ unsigned MipsAsmParser::getReg(int RC,int RegNo) {
return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo);
}
-int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic) {
-
- if (Mnemonic.lower() == "rdhwr") {
- // at the moment only hwreg29 is supported
- if (RegNum != 29)
- return -1;
- return Mips::HWR29;
- }
+int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) {
if (RegNum > 31)
return -1;
- // MIPS64 registers are numbered 1 after the 32-bit equivalents
- return getReg(Mips::CPURegsRegClassID, RegNum) + isMips64();
+ return getReg(RegClass, RegNum);
}
-int MipsAsmParser::tryParseRegister(StringRef Mnemonic) {
+int MipsAsmParser::tryParseRegister(bool is64BitReg) {
const AsmToken &Tok = Parser.getTok();
int RegNum = -1;
if (Tok.is(AsmToken::Identifier)) {
std::string lowerCase = Tok.getString().lower();
- RegNum = matchRegisterName(lowerCase);
+ RegNum = matchRegisterName(lowerCase, is64BitReg);
} else if (Tok.is(AsmToken::Integer))
RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()),
- Mnemonic.lower());
- else
- return RegNum; //error
- // 64 bit div operations require Mips::ZERO instead of MIPS::ZERO_64
- if (isMips64() && RegNum == Mips::ZERO_64) {
- if (Mnemonic.find("ddiv") != StringRef::npos)
- RegNum = Mips::ZERO;
- }
+ is64BitReg ? Mips::CPU64RegsRegClassID
+ : Mips::CPURegsRegClassID);
return RegNum;
}
bool MipsAsmParser::
tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic){
+ bool is64BitReg){
SMLoc S = Parser.getTok().getLoc();
int RegNo = -1;
- // FIXME: we should make a more generic method for CCR
- if ((Mnemonic == "cfc1" || Mnemonic == "ctc1")
- && Operands.size() == 2 && Parser.getTok().is(AsmToken::Integer)){
- RegNo = Parser.getTok().getIntVal(); // get the int value
- // at the moment only fcc0 is supported
- if (RegNo == 0)
- RegNo = Mips::FCC0;
- } else
- RegNo = tryParseRegister(Mnemonic);
+ RegNo = tryParseRegister(is64BitReg);
if (RegNo == -1)
return true;
@@ -734,7 +786,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
SMLoc S = Parser.getTok().getLoc();
Parser.Lex(); // Eat dollar token.
// parse register operand
- if (!tryParseRegisterOperand(Operands, Mnemonic)) {
+ if (!tryParseRegisterOperand(Operands, isMips64())) {
if (getLexer().is(AsmToken::LParen)) {
// check if it is indexed addressing operand
Operands.push_back(MipsOperand::CreateToken("(", S));
@@ -743,7 +795,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return true;
Parser.Lex(); // eat dollar
- if (tryParseRegisterOperand(Operands, Mnemonic))
+ if (tryParseRegisterOperand(Operands, isMips64()))
return true;
if (!getLexer().is(AsmToken::RParen))
@@ -868,7 +920,7 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
StartLoc = Parser.getTok().getLoc();
- RegNo = tryParseRegister("");
+ RegNo = tryParseRegister(isMips64());
EndLoc = Parser.getTok().getLoc();
return (RegNo == (unsigned)-1);
}
@@ -907,7 +959,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
if (Tok.isNot(AsmToken::LParen)) {
MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
if (Mnemonic->getToken() == "la") {
- SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer()-1);
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
return MatchOperand_Success;
}
@@ -920,7 +972,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
const AsmToken &Tok1 = Parser.getTok(); // get next token
if (Tok1.is(AsmToken::Dollar)) {
Parser.Lex(); // Eat '$' token.
- if (tryParseRegisterOperand(Operands,"")) {
+ if (tryParseRegisterOperand(Operands, isMips64())) {
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return MatchOperand_ParseFail;
}
@@ -954,6 +1006,126 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
return MatchOperand_Success;
}
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (!isMips64())
+ return MatchOperand_NoMatch;
+ // if the first token is not '$' we have an error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat $
+ if(!tryParseRegisterOperand(Operands, true)) {
+ // set the proper register kind
+ MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ op->setRegKind(MipsOperand::Kind_CPU64Regs);
+ return MatchOperand_Success;
+ }
+ return MatchOperand_NoMatch;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ // if the first token is not '$' we have an error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat $
+ if(!tryParseRegisterOperand(Operands, false)) {
+ // set the propper register kind
+ MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ op->setRegKind(MipsOperand::Kind_CPURegs);
+ return MatchOperand_Success;
+ }
+ return MatchOperand_NoMatch;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ // if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.isNot(AsmToken::Integer))
+ return MatchOperand_NoMatch;
+
+ unsigned RegNum = Tok.getIntVal();
+ // at the moment only hwreg29 is supported
+ if (RegNum != 29)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_HWRegs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ //if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.isNot(AsmToken::Integer))
+ return MatchOperand_NoMatch;
+
+ unsigned RegNum = Tok.getIntVal();
+ // at the moment only hwreg29 is supported
+ if (RegNum != 29)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_HWRegs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ unsigned RegNum;
+ //if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.is(AsmToken::Integer)) {
+ RegNum = Tok.getIntVal();
+ // at the moment only fcc0 is supported
+ if (RegNum != 0)
+ return MatchOperand_ParseFail;
+ } else if (Tok.is(AsmToken::Identifier)) {
+ // at the moment only fcc0 is supported
+ if (Tok.getIdentifier() != "fcc0")
+ return MatchOperand_ParseFail;
+ } else
+ return MatchOperand_NoMatch;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_CCRRegs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
MCSymbolRefExpr::VariantKind VK
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index ef56e752b2..d6fac0ce59 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -9,7 +9,6 @@ tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv)
tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info)
tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM MipsGenMCPseudoLowering.inc -gen-pseudo-lowering)
add_public_tablegen_target(MipsCommonTableGen)
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 099a1e79a5..1efeffd328 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -14,7 +14,6 @@
#include "Mips.h"
#include "MipsRegisterInfo.h"
#include "MipsSubtarget.h"
-#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
@@ -23,9 +22,6 @@
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
-// Not a normal header, this must come last.
-#include "MipsGenEDInfo.inc"
-
using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
@@ -43,9 +39,6 @@ public:
virtual ~MipsDisassemblerBase() {}
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
-
const MCRegisterInfo *getRegInfo() const { return RegInfo; }
private:
@@ -93,10 +86,6 @@ public:
} // end anonymous namespace
-const EDInstInfo *MipsDisassemblerBase::getEDInfo() const {
- return instInfoMips;
-}
-
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
@@ -139,11 +128,6 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
-
static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -470,17 +454,6 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
- //Currently only hardware register 29 is supported
- if (RegNo != 29)
- return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateReg(Mips::HWR29_64));
- return MCDisassembler::Success;
-}
-
static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt
index c3f4a6e1be..3e9fbf1c55 100644
--- a/lib/Target/Mips/InstPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -4,4 +4,4 @@ add_llvm_library(LLVMMipsAsmPrinter
MipsInstPrinter.cpp
)
-add_dependencies(LLVMMipsAsmPrinter intrinsics_gen MipsCommonTableGen)
+add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen)
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 68d3ac5f3b..97c367fbf1 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -149,6 +149,11 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
OS << ')';
}
+void MipsInstPrinter::printCPURegs(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printRegName(O, MI->getOperand(OpNo).getReg());
+}
+
void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 3d8a6f918f..38cac68801 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -87,6 +87,7 @@ public:
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printCPURegs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 9242a0bcc9..8e97d527d7 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -38,6 +38,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case FK_Data_4:
case FK_Data_8:
case Mips::fixup_Mips_LO16:
+ case Mips::fixup_Mips_GPREL16:
case Mips::fixup_Mips_GPOFF_HI:
case Mips::fixup_Mips_GPOFF_LO:
case Mips::fixup_Mips_GOT_PAGE:
@@ -214,7 +215,7 @@ public:
/// fixup requires the associated instruction to be relaxed.
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
// FIXME.
assert(0 && "RelaxInstruction() unimplemented");
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 9d67aa1856..a6797492d8 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -24,6 +24,10 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
(TheTriple.getArch() == Triple::mips64))
IsLittleEndian = false;
+ if ((TheTriple.getArch() == Triple::mips64el) ||
+ (TheTriple.getArch() == Triple::mips64))
+ PointerSize = 8;
+
AlignmentIsInBytes = false;
Data16bitsDirective = "\t.2byte\t";
Data32bitsDirective = "\t.4byte\t";
@@ -34,7 +38,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
GPRel32Directive = "\t.gpword\t";
GPRel64Directive = "\t.gpdword\t";
WeakRefDirective = "\t.weak\t";
-
+ DebugLabelSuffix = "=.";
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
HasLEB128 = true;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp
index d39a60d41c..fe0cabc923 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp
@@ -48,7 +48,7 @@ static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) {
unsigned Mask = Saved[2].getOperand(2).getReg();
assert((Mips::SP == Addr) && "Unexpected register at stack guard");
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
Out.EmitInstruction(Saved[1]);
EmitMask(Out, Addr, Mask);
Out.EmitBundleUnlock();
@@ -60,8 +60,7 @@ static void EmitDirectGuardCall(int I, MCInst Saved[],
// sfi_nops_to_force_slot2
assert(I == 3 && (Mips::SFI_GUARD_CALL == Saved[0].getOpcode()) &&
"Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
- Out.EmitBundleAlignEnd();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(true);
Out.EmitInstruction(Saved[1]);
Out.EmitInstruction(Saved[2]);
Out.EmitBundleUnlock();
@@ -78,8 +77,7 @@ static void EmitIndirectGuardCall(int I, MCInst Saved[],
unsigned Addr = Saved[0].getOperand(0).getReg();
unsigned Mask = Saved[0].getOperand(2).getReg();
- Out.EmitBundleAlignEnd();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(true);
EmitMask(Out, Addr, Mask);
Out.EmitInstruction(Saved[1]);
Out.EmitInstruction(Saved[2]);
@@ -95,7 +93,7 @@ static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) {
unsigned Addr = Saved[0].getOperand(0).getReg();
unsigned Mask = Saved[0].getOperand(2).getReg();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
EmitMask(Out, Addr, Mask);
Out.EmitInstruction(Saved[1]);
Out.EmitBundleUnlock();
@@ -110,7 +108,7 @@ static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) {
unsigned Reg = Saved[0].getOperand(0).getReg();
unsigned Mask = Saved[0].getOperand(2).getReg();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
EmitMask(Out, Reg, Mask);
Out.EmitInstruction(Saved[1]);
Out.EmitBundleUnlock();
@@ -125,7 +123,7 @@ static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) {
unsigned Reg = Saved[0].getOperand(0).getReg();
unsigned Mask = Saved[0].getOperand(2).getReg();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
EmitMask(Out, Reg, Mask);
Out.EmitInstruction(Saved[1]);
Out.EmitBundleUnlock();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 936097137d..7237a1d3fe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -131,7 +131,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
bool NoExecStack) {
Triple TheTriple(TT);
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ // @LOCALMOD-BEGIN
+ MCStreamer *Streamer = createELFStreamer(Ctx, MAB, _OS, _Emitter,
+ RelaxAll, NoExecStack);
+ if (TheTriple.isOSNaCl())
+ Streamer->EmitBundleAlignMode(4);
+ return Streamer;
+ // @LOCALMOD-END
}
extern "C" void LLVMInitializeMipsTargetMC() {
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index bd8c517345..bcf951e861 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -16,7 +16,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \
MipsGenDAGISel.inc MipsGenCallingConv.inc \
MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \
- MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \
+ MipsGenDisassemblerTables.inc \
MipsGenMCPseudoLowering.inc MipsGenAsmMatcher.inc
DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index d17c1259e5..127fcf28a5 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -13,14 +13,15 @@
#include "Mips16FrameLowering.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "Mips16InstrInfo.h"
#include "MipsInstrInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Function.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
@@ -29,8 +30,8 @@ using namespace llvm;
void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const MipsInstrInfo &TII =
- *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ const Mips16InstrInfo &TII =
+ *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
uint64_t StackSize = MFI->getStackSize();
@@ -38,9 +39,35 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
// No need to allocate space on the stack.
if (StackSize == 0 && !MFI->adjustsStack()) return;
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ MachineLocation DstML, SrcML;
+
// Adjust stack.
- if (isInt<16>(-StackSize))
- BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize);
+ TII.makeFrame(Mips::SP, StackSize, MBB, MBBI);
+
+ // emit ".cfi_def_cfa_offset StackSize"
+ MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP);
+ SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+ Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+ MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+ DstML = MachineLocation(MachineLocation::VirtualFP, -8);
+ SrcML = MachineLocation(Mips::S1);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+
+ DstML = MachineLocation(MachineLocation::VirtualFP, -12);
+ SrcML = MachineLocation(Mips::S0);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+
+ DstML = MachineLocation(MachineLocation::VirtualFP, -4);
+ SrcML = MachineLocation(Mips::RA);
+ Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
if (hasFP(MF))
BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0)
@@ -52,8 +79,8 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const MipsInstrInfo &TII =
- *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ const Mips16InstrInfo &TII =
+ *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
uint64_t StackSize = MFI->getStackSize();
@@ -65,9 +92,8 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(Mips::S0);
// Adjust stack.
- if (isInt<16>(StackSize))
- // assumes stacksize multiple of 8
- BuildMI(MBB, MBBI, dl, TII.get(Mips::RestoreRaF16)).addImm(StackSize);
+ // assumes stacksize multiple of 8
+ TII.restoreFrame(Mips::SP, StackSize, MBB, MBBI);
}
bool Mips16FrameLowering::
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 10b696fe87..91b5ba0800 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -19,11 +19,19 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
+static cl::opt<bool> NeverUseSaveRestore(
+ "mips16-never-use-save-restore",
+ cl::init(false),
+ cl::desc("For testing ability to adjust stack pointer without save/restore instruction"),
+ cl::Hidden);
+
+
Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
: MipsInstrInfo(tm, Mips::BimmX16),
RI(*tm.getSubtargetImpl(), *this) {}
@@ -160,20 +168,135 @@ unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
return 0;
}
+// Adjust SP by FrameSize bytes. Save RA, S0, S1
+void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ if (!NeverUseSaveRestore) {
+ if (isUInt<11>(FrameSize))
+ BuildMI(MBB, I, DL, get(Mips::SaveRaF16)).addImm(FrameSize);
+ else {
+ int Base = 2040; // should create template function like isUInt that returns largest
+ // possible n bit unsigned integer
+ int64_t Remainder = FrameSize - Base;
+ BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base);
+ if (isInt<16>(-Remainder))
+ BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(-Remainder);
+ else
+ adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1);
+ }
+
+ }
+ else {
+ //
+ // sw ra, -4[sp]
+ // sw s1, -8[sp]
+ // sw s0, -12[sp]
+
+ MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::RA);
+ MIB1.addReg(Mips::SP);
+ MIB1.addImm(-4);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::S1);
+ MIB2.addReg(Mips::SP);
+ MIB2.addImm(-8);
+ MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::S0);
+ MIB3.addReg(Mips::SP);
+ MIB3.addImm(-12);
+ adjustStackPtrBig(SP, -FrameSize, MBB, I, Mips::V0, Mips::V1);
+ }
+}
+
+// Adjust SP by FrameSize bytes. Restore RA, S0, S1
+void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ if (!NeverUseSaveRestore) {
+ if (isUInt<11>(FrameSize))
+ BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)).addImm(FrameSize);
+ else {
+ int Base = 2040; // should create template function like isUInt that returns largest
+ // possible n bit unsigned integer
+ int64_t Remainder = FrameSize - Base;
+ if (isInt<16>(Remainder))
+ BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(Remainder);
+ else
+ adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1);
+ BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)). addImm(Base);
+ }
+ }
+ else {
+ adjustStackPtrBig(SP, FrameSize, MBB, I, Mips::A0, Mips::A1);
+ // lw ra, -4[sp]
+ // lw s1, -8[sp]
+ // lw s0, -12[sp]
+ MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::A0);
+ MIB1.addReg(Mips::SP);
+ MIB1.addImm(-4);
+ MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16), Mips::RA);
+ MIB0.addReg(Mips::A0);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::S1);
+ MIB2.addReg(Mips::SP);
+ MIB2.addImm(-8);
+ MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::S0);
+ MIB3.addReg(Mips::SP);
+ MIB3.addImm(-12);
+ }
+
+}
+
+// Adjust SP by Amount bytes where bytes can be up to 32bit number.
+// This can only be called at times that we know that there is at least one free register.
+// This is clearly safe at prologue and epilogue.
+//
+void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Reg1, unsigned Reg2) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+// MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
+// unsigned Reg1 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass);
+// unsigned Reg2 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass);
+ //
+ // li reg1, constant
+ // move reg2, sp
+ // add reg1, reg1, reg2
+ // move sp, reg1
+ //
+ //
+ MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwConstant32), Reg1);
+ MIB1.addImm(Amount);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::MoveR3216), Reg2);
+ MIB2.addReg(Mips::SP, RegState::Kill);
+ MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::AdduRxRyRz16), Reg1);
+ MIB3.addReg(Reg1);
+ MIB3.addReg(Reg2, RegState::Kill);
+ MachineInstrBuilder MIB4 = BuildMI(MBB, I, DL, get(Mips::Move32R16), Mips::SP);
+ MIB4.addReg(Reg1, RegState::Kill);
+}
+
+void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ assert(false && "adjust stack pointer amount exceeded");
+}
+
/// Adjust SP by Amount bytes.
void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- if (isInt<16>(Amount)) {
- if (Amount < 0)
- BuildMI(MBB, I, DL, get(Mips::SaveDecSpF16)). addImm(-Amount);
- else if (Amount > 0)
- BuildMI(MBB, I, DL, get(Mips::RestoreIncSpF16)).addImm(Amount);
- }
+ if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16>
+ BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(Amount);
else
- // not implemented for large values yet
- assert(false && "adjust stack pointer amount exceeded");
+ adjustStackPtrBigUnrestricted(SP, Amount, MBB, I);
+}
+
+/// This function generates the sequence of instructions needed to get the
+/// result of adding register REG and immediate IMM.
+unsigned
+Mips16InstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ unsigned *NewImm) const {
+
+ return 0;
}
unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index d6ef8d2bf1..3704e2573f 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -64,15 +64,43 @@ public:
virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+ // Adjust SP by FrameSize bytes. Save RA, S0, S1
+ void makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ // Adjust SP by FrameSize bytes. Restore RA, S0, S1
+ void restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+
/// Adjust SP by Amount bytes.
void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
+ /// Emit a series of instructions to load an immediate. If NewImm is a
+ /// non-NULL parameter, the last instruction is not emitted, but instead
+ /// its immediate operand is returned in NewImm.
+ unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ unsigned *NewImm) const;
+
private:
virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Opc) const;
+
+ // Adjust SP by Amount bytes where bytes can be up to 32bit number.
+ void adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Reg1, unsigned Reg2) const;
+
+ // Adjust SP by Amount bytes where bytes can be up to 32bit number.
+ void adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+
};
}
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 1e560c6049..e8e2f3ce41 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -57,13 +57,17 @@ class FEXT_I16_ins<bits<5> eop, string asmstr, InstrItinClass itin> :
class FEXT_I816_ins_base<bits<3> _func, string asmstr,
string asmstr2, InstrItinClass itin>:
- FEXT_I816<_func, (outs), (ins uimm16:$imm), !strconcat(asmstr, asmstr2),
+ FEXT_I816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2),
[], itin>;
class FEXT_I816_ins<bits<3> _func, string asmstr,
InstrItinClass itin>:
FEXT_I816_ins_base<_func, asmstr, "\t$imm", itin>;
+class FEXT_I816_SP_ins<bits<3> _func, string asmstr,
+ InstrItinClass itin>:
+ FEXT_I816_ins_base<_func, asmstr, "\t$$sp, $imm", itin>;
+
//
// Assembler formats in alphabetical order.
// Natural and pseudos are mixed together.
@@ -352,6 +356,18 @@ class SelT<bits<5> f1, string op1, bits<5> f2, string op2,
let Constraints = "$rd = $rd_";
}
+//
+// 32 bit constant
+//
+def imm32: Operand<i32>;
+
+def Constant32:
+ MipsPseudo16<(outs), (ins imm32:$imm), "\t.word $imm", []>;
+
+def LwConstant32:
+ MipsPseudo16<(outs), (ins CPU16Regs:$rx, imm32:$imm),
+ "lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>;
+
//
// Some general instruction class info
@@ -404,6 +420,18 @@ def AddiuRxRyOffMemX16:
// To add a constant to the program counter.
//
def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
+
+//
+// Format: ADDIU sp, immediate MIPS16e
+// Purpose: Add Immediate Unsigned Word (2-Operand, SP-Relative, Extended)
+// To add a constant to the stack pointer.
+//
+def AddiuSpImmX16
+ : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> {
+ let Defs = [SP];
+ let Uses = [SP];
+}
+
//
// Format: ADDU rz, rx, ry MIPS16e
// Purpose: Add Unsigned Word (3-Operand)
@@ -576,7 +604,9 @@ def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad;
// Purpose: Load Word (SP-Relative, Extended)
// To load an SP-relative word from memory as a signed value.
//
-def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad;
+def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad{
+ let Uses = [SP];
+}
//
// Format: MOVE r32, rz MIPS16e
@@ -688,6 +718,8 @@ def RestoreRaF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
"restore\t$$ra, $$s0, $$s1, $frame_size", [], IILoad >, MayLoad {
let isCodeGenOnly = 1;
+ let Defs = [S0, S1, RA, SP];
+ let Uses = [SP];
}
// Use Restore to increment SP since SP is not a Mip 16 register, this
@@ -698,6 +730,8 @@ def RestoreIncSpF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
"restore\t$frame_size", [], IILoad >, MayLoad {
let isCodeGenOnly = 1;
+ let Defs = [SP];
+ let Uses = [SP];
}
//
@@ -712,6 +746,8 @@ def SaveRaF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
"save\t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore {
let isCodeGenOnly = 1;
+ let Uses = [RA, SP, S0, S1];
+ let Defs = [SP];
}
//
@@ -723,6 +759,8 @@ def SaveDecSpF16:
FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
"save\t$frame_size", [], IIStore >, MayStore {
let isCodeGenOnly = 1;
+ let Uses = [SP];
+ let Defs = [SP];
}
//
// Format: SB ry, offset(rx) MIPS16e
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index 6b87ecb08c..c2e09a7206 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -24,9 +24,10 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Constants.h"
#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -35,7 +36,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
using namespace llvm;
@@ -43,6 +43,32 @@ Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST,
const Mips16InstrInfo &I)
: MipsRegisterInfo(ST), TII(I) {}
+bool Mips16RegisterInfo::requiresRegisterScavenging
+ (const MachineFunction &MF) const {
+ return true;
+}
+bool Mips16RegisterInfo::requiresFrameIndexScavenging
+ (const MachineFunction &MF) const {
+ return true;
+}
+
+bool Mips16RegisterInfo::useFPForScavengingIndex
+ (const MachineFunction &MF) const {
+ return false;
+}
+
+bool Mips16RegisterInfo::saveScavengerRegister
+ (MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ DebugLoc DL;
+ TII.copyPhysReg(MBB, I, DL, Mips::T0, Reg, true);
+ TII.copyPhysReg(MBB, UseMI, DL, Reg, Mips::T0, true);
+ return true;
+}
+
// This function eliminate ADJCALLSTACKDOWN,
// ADJCALLSTACKUP pseudo instructions
void Mips16RegisterInfo::
@@ -120,6 +146,10 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+ if (!MI.isDebugValue() && ( ((FrameReg != Mips::SP) && !isInt<16>(Offset)) ||
+ ((FrameReg == Mips::SP) && !isInt<15>(Offset)) )) {
+ llvm_unreachable("frame offset does not fit in instruction");
+ }
MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index 153def20d0..6101739637 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -22,11 +22,25 @@ class Mips16InstrInfo;
class Mips16RegisterInfo : public MipsRegisterInfo {
const Mips16InstrInfo &TII;
public:
- Mips16RegisterInfo(const MipsSubtarget &Subtarget, const Mips16InstrInfo &TII);
+ Mips16RegisterInfo(const MipsSubtarget &Subtarget,
+ const Mips16InstrInfo &TII);
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+ bool useFPForScavengingIndex(const MachineFunction &MF) const;
+
+ bool saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const;
+
private:
virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 1c852e2fc4..bbeb6498bc 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -34,48 +34,36 @@ def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
-// Shifts
-// 64-bit shift instructions.
let DecoderNamespace = "Mips64" in {
-class shift_rotate_imm64<bits<6> func, bits<5> isRotate, string instr_asm,
- SDNode OpNode>:
- shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt6, shamt,
- CPU64Regs>;
-
-// Mul, Div
-class Mult64<bits<6> func, string instr_asm, InstrItinClass itin>:
- Mult<func, instr_asm, itin, CPU64Regs, [HI64, LO64]>;
-class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
- Div<op, func, instr_asm, itin, CPU64Regs, [HI64, LO64]>;
-
-multiclass Atomic2Ops64<PatFrag Op, string Opstr> {
- def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>,
- Requires<[IsN64, HasStdEnc]> {
+
+multiclass Atomic2Ops64<PatFrag Op> {
+ def NAME : Atomic2Ops<Op, CPU64Regs, CPURegs>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Atomic2Ops<Op, CPU64Regs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
let isCodeGenOnly = 1;
}
}
-multiclass AtomicCmpSwap64<PatFrag Op, string Width> {
- def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : AtomicCmpSwap<Op, Width, CPU64Regs, CPU64Regs>,
- Requires<[IsN64, HasStdEnc]> {
+multiclass AtomicCmpSwap64<PatFrag Op> {
+ def NAME : AtomicCmpSwap<Op, CPU64Regs, CPURegs>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : AtomicCmpSwap<Op, CPU64Regs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
let isCodeGenOnly = 1;
}
}
}
let usesCustomInserter = 1, Predicates = [HasStdEnc],
DecoderNamespace = "Mips64" in {
- defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64, "load_add_64">;
- defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64, "load_sub_64">;
- defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64, "load_and_64">;
- defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64<atomic_load_or_64, "load_or_64">;
- defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64<atomic_load_xor_64, "load_xor_64">;
- defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64, "load_nand_64">;
- defm ATOMIC_SWAP_I64 : Atomic2Ops64<atomic_swap_64, "swap_64">;
- defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64, "64">;
+ defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64>;
+ defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64>;
+ defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64>;
+ defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64<atomic_load_or_64>;
+ defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64<atomic_load_xor_64>;
+ defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64>;
+ defm ATOMIC_SWAP_I64 : Atomic2Ops64<atomic_swap_64>;
+ defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64>;
}
//===----------------------------------------------------------------------===//
@@ -83,140 +71,147 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc],
//===----------------------------------------------------------------------===//
let DecoderNamespace = "Mips64" in {
/// Arithmetic Instructions (ALU Immediate)
-def DADDi : ArithOverflowI<0x18, "daddi", add, simm16_64, immSExt16,
- CPU64Regs>;
-def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16,
- CPU64Regs>, IsAsCheapAsAMove;
-def DANDi : ArithLogicI<0x0c, "andi", and, uimm16_64, immZExt16, CPU64Regs>;
-def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>;
-def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>;
-def ORi64 : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>;
-def XORi64 : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>;
-def LUi64 : LoadUpper<0x0f, "lui", CPU64Regs, uimm16_64>;
+def DADDi : ArithLogicI<"daddi", simm16_64, CPU64RegsOpnd>, ADDI_FM<0x18>;
+def DADDiu : ArithLogicI<"daddiu", simm16_64, CPU64RegsOpnd, immSExt16, add>,
+ ADDI_FM<0x19>, IsAsCheapAsAMove;
+def DANDi : ArithLogicI<"andi", uimm16_64, CPU64RegsOpnd, immZExt16, and>,
+ ADDI_FM<0xc>;
+def SLTi64 : SetCC_I<"slti", setlt, simm16_64, immSExt16, CPU64Regs>,
+ SLTI_FM<0xa>;
+def SLTiu64 : SetCC_I<"sltiu", setult, simm16_64, immSExt16, CPU64Regs>,
+ SLTI_FM<0xb>;
+def ORi64 : ArithLogicI<"ori", uimm16_64, CPU64RegsOpnd, immZExt16, or>,
+ ADDI_FM<0xd>;
+def XORi64 : ArithLogicI<"xori", uimm16_64, CPU64RegsOpnd, immZExt16, xor>,
+ ADDI_FM<0xe>;
+def LUi64 : LoadUpper<"lui", CPU64Regs, uimm16_64>, LUI_FM;
/// Arithmetic Instructions (3-Operand, R-Type)
-def DADD : ArithOverflowR<0x00, 0x2C, "dadd", IIAlu, CPU64Regs, 1>;
-def DADDu : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>;
-def DSUBu : ArithLogicR<0x00, 0x2f, "dsubu", sub, IIAlu, CPU64Regs>;
-def SLT64 : SetCC_R<0x00, 0x2a, "slt", setlt, CPU64Regs>;
-def SLTu64 : SetCC_R<0x00, 0x2b, "sltu", setult, CPU64Regs>;
-def AND64 : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPU64Regs, 1>;
-def OR64 : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPU64Regs, 1>;
-def XOR64 : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPU64Regs, 1>;
-def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>;
+def DADD : ArithLogicR<"dadd", CPU64RegsOpnd>, ADD_FM<0, 0x2c>;
+def DADDu : ArithLogicR<"daddu", CPU64RegsOpnd, 1, IIAlu, add>,
+ ADD_FM<0, 0x2d>;
+def DSUBu : ArithLogicR<"dsubu", CPU64RegsOpnd, 0, IIAlu, sub>,
+ ADD_FM<0, 0x2f>;
+def SLT64 : SetCC_R<"slt", setlt, CPU64Regs>, ADD_FM<0, 0x2a>;
+def SLTu64 : SetCC_R<"sltu", setult, CPU64Regs>, ADD_FM<0, 0x2b>;
+def AND64 : ArithLogicR<"and", CPU64RegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
+def OR64 : ArithLogicR<"or", CPU64RegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
+def XOR64 : ArithLogicR<"xor", CPU64RegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
+def NOR64 : LogicNOR<"nor", CPU64RegsOpnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
-def DSLL : shift_rotate_imm64<0x38, 0x00, "dsll", shl>;
-def DSRL : shift_rotate_imm64<0x3a, 0x00, "dsrl", srl>;
-def DSRA : shift_rotate_imm64<0x3b, 0x00, "dsra", sra>;
-def DSLLV : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>;
-def DSRLV : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>;
-def DSRAV : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>;
-let Pattern = []<dag> in {
- def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>;
- def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>;
- def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>;
-}
+def DSLL : shift_rotate_imm<"dsll", shamt, CPU64RegsOpnd, shl, immZExt6>,
+ SRA_FM<0x38, 0>;
+def DSRL : shift_rotate_imm<"dsrl", shamt, CPU64RegsOpnd, srl, immZExt6>,
+ SRA_FM<0x3a, 0>;
+def DSRA : shift_rotate_imm<"dsra", shamt, CPU64RegsOpnd, sra, immZExt6>,
+ SRA_FM<0x3b, 0>;
+def DSLLV : shift_rotate_reg<"dsllv", CPU64RegsOpnd, shl>, SRLV_FM<0x14, 0>;
+def DSRLV : shift_rotate_reg<"dsrlv", CPU64RegsOpnd, srl>, SRLV_FM<0x16, 0>;
+def DSRAV : shift_rotate_reg<"dsrav", CPU64RegsOpnd, sra>, SRLV_FM<0x17, 0>;
+def DSLL32 : shift_rotate_imm<"dsll32", shamt, CPU64RegsOpnd>, SRA_FM<0x3c, 0>;
+def DSRL32 : shift_rotate_imm<"dsrl32", shamt, CPU64RegsOpnd>, SRA_FM<0x3e, 0>;
+def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64RegsOpnd>, SRA_FM<0x3f, 0>;
}
// Rotate Instructions
let Predicates = [HasMips64r2, HasStdEnc],
DecoderNamespace = "Mips64" in {
- def DROTR : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>;
- def DROTRV : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>;
+ def DROTR : shift_rotate_imm<"drotr", shamt, CPU64RegsOpnd, rotr, immZExt6>,
+ SRA_FM<0x3a, 1>;
+ def DROTRV : shift_rotate_reg<"drotrv", CPU64RegsOpnd, rotr>, SRLV_FM<0x16, 1>;
}
let DecoderNamespace = "Mips64" in {
/// Load and Store Instructions
/// aligned
-defm LB64 : LoadM64<0x20, "lb", sextloadi8>;
-defm LBu64 : LoadM64<0x24, "lbu", zextloadi8>;
-defm LH64 : LoadM64<0x21, "lh", sextloadi16>;
-defm LHu64 : LoadM64<0x25, "lhu", zextloadi16>;
-defm LW64 : LoadM64<0x23, "lw", sextloadi32>;
-defm LWu64 : LoadM64<0x27, "lwu", zextloadi32>;
-defm SB64 : StoreM64<0x28, "sb", truncstorei8>;
-defm SH64 : StoreM64<0x29, "sh", truncstorei16>;
-defm SW64 : StoreM64<0x2b, "sw", truncstorei32>;
-defm LD : LoadM64<0x37, "ld", load>;
-defm SD : StoreM64<0x3f, "sd", store>;
+defm LB64 : LoadM<"lb", CPU64Regs, sextloadi8>, LW_FM<0x20>;
+defm LBu64 : LoadM<"lbu", CPU64Regs, zextloadi8>, LW_FM<0x24>;
+defm LH64 : LoadM<"lh", CPU64Regs, sextloadi16>, LW_FM<0x21>;
+defm LHu64 : LoadM<"lhu", CPU64Regs, zextloadi16>, LW_FM<0x25>;
+defm LW64 : LoadM<"lw", CPU64Regs, sextloadi32>, LW_FM<0x23>;
+defm LWu64 : LoadM<"lwu", CPU64Regs, zextloadi32>, LW_FM<0x27>;
+defm SB64 : StoreM<"sb", CPU64Regs, truncstorei8>, LW_FM<0x28>;
+defm SH64 : StoreM<"sh", CPU64Regs, truncstorei16>, LW_FM<0x29>;
+defm SW64 : StoreM<"sw", CPU64Regs, truncstorei32>, LW_FM<0x2b>;
+defm LD : LoadM<"ld", CPU64Regs, load>, LW_FM<0x37>;
+defm SD : StoreM<"sd", CPU64Regs, store>, LW_FM<0x3f>;
/// load/store left/right
-let isCodeGenOnly = 1 in {
- defm LWL64 : LoadLeftRightM64<0x22, "lwl", MipsLWL>;
- defm LWR64 : LoadLeftRightM64<0x26, "lwr", MipsLWR>;
- defm SWL64 : StoreLeftRightM64<0x2a, "swl", MipsSWL>;
- defm SWR64 : StoreLeftRightM64<0x2e, "swr", MipsSWR>;
-}
-defm LDL : LoadLeftRightM64<0x1a, "ldl", MipsLDL>;
-defm LDR : LoadLeftRightM64<0x1b, "ldr", MipsLDR>;
-defm SDL : StoreLeftRightM64<0x2c, "sdl", MipsSDL>;
-defm SDR : StoreLeftRightM64<0x2d, "sdr", MipsSDR>;
+defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>;
+defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, CPU64Regs>, LW_FM<0x26>;
+defm SWL64 : StoreLeftRightM<"swl", MipsSWL, CPU64Regs>, LW_FM<0x2a>;
+defm SWR64 : StoreLeftRightM<"swr", MipsSWR, CPU64Regs>, LW_FM<0x2e>;
+
+defm LDL : LoadLeftRightM<"ldl", MipsLDL, CPU64Regs>, LW_FM<0x1a>;
+defm LDR : LoadLeftRightM<"ldr", MipsLDR, CPU64Regs>, LW_FM<0x1b>;
+defm SDL : StoreLeftRightM<"sdl", MipsSDL, CPU64Regs>, LW_FM<0x2c>;
+defm SDR : StoreLeftRightM<"sdr", MipsSDR, CPU64Regs>, LW_FM<0x2d>;
/// Load-linked, Store-conditional
-def LLD : LLBase<0x34, "lld", CPU64Regs, mem>,
- Requires<[NotN64, HasStdEnc]>;
-def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>,
- Requires<[IsN64, HasStdEnc]> {
- let isCodeGenOnly = 1;
+let Predicates = [NotN64, HasStdEnc] in {
+ def LLD : LLBase<"lld", CPU64RegsOpnd, mem>, LW_FM<0x34>;
+ def SCD : SCBase<"scd", CPU64RegsOpnd, mem>, LW_FM<0x3c>;
}
-def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>,
- Requires<[NotN64, HasStdEnc]>;
-def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>,
- Requires<[IsN64, HasStdEnc]> {
- let isCodeGenOnly = 1;
+
+let Predicates = [IsN64, HasStdEnc], isCodeGenOnly = 1 in {
+ def LLD_P8 : LLBase<"lld", CPU64RegsOpnd, mem64>, LW_FM<0x34>;
+ def SCD_P8 : SCBase<"scd", CPU64RegsOpnd, mem64>, LW_FM<0x3c>;
}
/// Jump and Branch Instructions
-def JR64 : IndirectBranch<CPU64Regs>;
-def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>;
-def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>;
-def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>;
-def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>;
-def BLEZ64 : CBranchZero<0x06, 0, "blez", setle, CPU64Regs>;
-def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>;
+def JR64 : IndirectBranch<CPU64Regs>, MTLO_FM<8>;
+def BEQ64 : CBranch<"beq", seteq, CPU64Regs>, BEQ_FM<4>;
+def BNE64 : CBranch<"bne", setne, CPU64Regs>, BEQ_FM<5>;
+def BGEZ64 : CBranchZero<"bgez", setge, CPU64Regs>, BGEZ_FM<1, 1>;
+def BGTZ64 : CBranchZero<"bgtz", setgt, CPU64Regs>, BGEZ_FM<7, 0>;
+def BLEZ64 : CBranchZero<"blez", setle, CPU64Regs>, BGEZ_FM<6, 0>;
+def BLTZ64 : CBranchZero<"bltz", setlt, CPU64Regs>, BGEZ_FM<1, 0>;
}
let DecoderNamespace = "Mips64" in
-def JALR64 : JumpLinkReg<0x00, 0x09, "jalr", CPU64Regs>;
-def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, IsTailCall;
+def JALR64 : JumpLinkReg<"jalr", CPU64Regs>, JALR_FM;
+def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, MTLO_FM<8>, IsTailCall;
let DecoderNamespace = "Mips64" in {
/// Multiply and Divide Instructions.
-def DMULT : Mult64<0x1c, "dmult", IIImul>;
-def DMULTu : Mult64<0x1d, "dmultu", IIImul>;
-def DSDIV : Div64<MipsDivRem, 0x1e, "ddiv", IIIdiv>;
-def DUDIV : Div64<MipsDivRemU, 0x1f, "ddivu", IIIdiv>;
-
-def MTHI64 : MoveToLOHI<0x11, "mthi", CPU64Regs, [HI64]>;
-def MTLO64 : MoveToLOHI<0x13, "mtlo", CPU64Regs, [LO64]>;
-def MFHI64 : MoveFromLOHI<0x10, "mfhi", CPU64Regs, [HI64]>;
-def MFLO64 : MoveFromLOHI<0x12, "mflo", CPU64Regs, [LO64]>;
+def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1c>;
+def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1d>;
+def DSDIV : Div<MipsDivRem, "ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>,
+ MULT_FM<0, 0x1e>;
+def DUDIV : Div<MipsDivRemU, "ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>,
+ MULT_FM<0, 0x1f>;
+
+def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>;
+def MTLO64 : MoveToLOHI<"mtlo", CPU64Regs, [LO64]>, MTLO_FM<0x13>;
+def MFHI64 : MoveFromLOHI<"mfhi", CPU64Regs, [HI64]>, MFLO_FM<0x10>;
+def MFLO64 : MoveFromLOHI<"mflo", CPU64Regs, [LO64]>, MFLO_FM<0x12>;
/// Sign Ext In Register Instructions.
-def SEB64 : SignExtInReg<0x10, "seb", i8, CPU64Regs>;
-def SEH64 : SignExtInReg<0x18, "seh", i16, CPU64Regs>;
+def SEB64 : SignExtInReg<"seb", i8, CPU64Regs>, SEB_FM<0x10, 0x20>;
+def SEH64 : SignExtInReg<"seh", i16, CPU64Regs>, SEB_FM<0x18, 0x20>;
/// Count Leading
-def DCLZ : CountLeading0<0x24, "dclz", CPU64Regs>;
-def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>;
+def DCLZ : CountLeading0<"dclz", CPU64RegsOpnd>, CLO_FM<0x24>;
+def DCLO : CountLeading1<"dclo", CPU64RegsOpnd>, CLO_FM<0x25>;
/// Double Word Swap Bytes/HalfWords
-def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>;
-def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>;
+def DSBH : SubwordSwap<"dsbh", CPU64RegsOpnd>, SEB_FM<2, 0x24>;
+def DSHD : SubwordSwap<"dshd", CPU64RegsOpnd>, SEB_FM<5, 0x24>;
+
+def LEA_ADDiu64 : EffectiveAddress<"daddiu", CPU64Regs, mem_ea_64>, LW_FM<0x19>;
-def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
}
let DecoderNamespace = "Mips64" in {
-def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
+def RDHWR64 : ReadHardware<CPU64Regs, HW64RegsOpnd>, RDHWR_FM;
-def DEXT : ExtBase<3, "dext", CPU64Regs>;
+def DEXT : ExtBase<"dext", CPU64RegsOpnd>, EXT_FM<3>;
let Pattern = []<dag> in {
- def DEXTU : ExtBase<2, "dextu", CPU64Regs>;
- def DEXTM : ExtBase<1, "dextm", CPU64Regs>;
+ def DEXTU : ExtBase<"dextu", CPU64RegsOpnd>, EXT_FM<2>;
+ def DEXTM : ExtBase<"dextm", CPU64RegsOpnd>, EXT_FM<1>;
}
-def DINS : InsBase<7, "dins", CPU64Regs>;
+def DINS : InsBase<"dins", CPU64RegsOpnd>, EXT_FM<7>;
let Pattern = []<dag> in {
- def DINSU : InsBase<6, "dinsu", CPU64Regs>;
- def DINSM : InsBase<5, "dinsm", CPU64Regs>;
+ def DINSU : InsBase<"dinsu", CPU64RegsOpnd>, EXT_FM<6>;
+ def DINSM : InsBase<"dinsm", CPU64RegsOpnd>, EXT_FM<5>;
}
let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
@@ -310,34 +305,56 @@ def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
-def : InstAlias<"move $dst,$src", (DADD CPU64Regs:$dst,CPU64Regs:$src,ZERO_64)>;
+def : InstAlias<"move $dst,$src", (DADDu CPU64RegsOpnd:$dst,
+ CPU64RegsOpnd:$src,ZERO_64)>,
+ Requires<[HasMips64]>;
+def : InstAlias<"and $rs, $rt, $imm",
+ (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm)>,
+ Requires<[HasMips64]>;
+def : InstAlias<"slt $rs, $rt, $imm",
+ (SLTi64 CPURegsOpnd:$rs, CPU64Regs:$rt, simm16_64:$imm)>,
+ Requires<[HasMips64]>;
+def : InstAlias<"xor $rs, $rt, $imm",
+ (XORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm)>,
+ Requires<[HasMips64]>;
+def : InstAlias<"not $rt, $rs", (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64)>,
+ Requires<[HasMips64]>;
+def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs)>, Requires<[HasMips64]>;
+def : InstAlias<"daddu $rs, $rt, $imm",
+ (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm)>;
+def : InstAlias<"dadd $rs, $rt, $imm",
+ (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm)>;
/// Move between CPU and coprocessor registers
let DecoderNamespace = "Mips64" in {
-def MFC0_3OP64 : MFC3OP<0x10, 0, (outs CPU64Regs:$rt),
- (ins CPU64Regs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">;
-def MTC0_3OP64 : MFC3OP<0x10, 4, (outs CPU64Regs:$rd, uimm16:$sel),
- (ins CPU64Regs:$rt),"mtc0\t$rt, $rd, $sel">;
-def MFC2_3OP64 : MFC3OP<0x12, 0, (outs CPU64Regs:$rt),
- (ins CPU64Regs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">;
-def MTC2_3OP64 : MFC3OP<0x12, 4, (outs CPU64Regs:$rd, uimm16:$sel),
- (ins CPU64Regs:$rt),"mtc2\t$rt, $rd, $sel">;
-def DMFC0_3OP64 : MFC3OP<0x10, 1, (outs CPU64Regs:$rt),
- (ins CPU64Regs:$rd, uimm16:$sel),"dmfc0\t$rt, $rd, $sel">;
-def DMTC0_3OP64 : MFC3OP<0x10, 5, (outs CPU64Regs:$rd, uimm16:$sel),
- (ins CPU64Regs:$rt),"dmtc0\t$rt, $rd, $sel">;
-def DMFC2_3OP64 : MFC3OP<0x12, 1, (outs CPU64Regs:$rt),
- (ins CPU64Regs:$rd, uimm16:$sel),"dmfc2\t$rt, $rd, $sel">;
-def DMTC2_3OP64 : MFC3OP<0x12, 5, (outs CPU64Regs:$rd, uimm16:$sel),
- (ins CPU64Regs:$rt),"dmtc2\t$rt, $rd, $sel">;
+def MFC0_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel),
+ "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>;
+def MTC0_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt),
+ "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>;
+def MFC2_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel),
+ "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>;
+def MTC2_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt),
+ "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>;
+def DMFC0_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel),
+ "dmfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 1>;
+def DMTC0_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt),
+ "dmtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 5>;
+def DMFC2_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel),
+ "dmfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 1>;
+def DMTC2_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt),
+ "dmtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 5>;
}
// Two operand (implicit 0 selector) versions:
def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
-def : InstAlias<"dmfc0 $rt, $rd", (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"dmtc0 $rt, $rd", (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
-def : InstAlias<"dmfc2 $rt, $rd", (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"dmtc2 $rt, $rd", (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+def : InstAlias<"dmfc0 $rt, $rd",
+ (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+def : InstAlias<"dmtc0 $rt, $rd",
+ (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+def : InstAlias<"dmfc2 $rt, $rd",
+ (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+def : InstAlias<"dmtc2 $rt, $rd",
+ (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 8269ec6a61..1d1ab71962 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -22,15 +22,15 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/BasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/DataLayout.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 932e6b3d1e..52fa95b182 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -28,8 +28,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index 457d238a01..559370bc2a 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -16,42 +16,37 @@
// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
// conditional move instructions.
// cond:int, data:int
-class CondMovIntInt<RegisterClass CRC, RegisterClass DRC, bits<6> funct,
- string instr_asm> :
- FR<0, funct, (outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F),
- !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> {
- let shamt = 0;
+class CMov_I_I_FT<string opstr, RegisterClass CRC, RegisterClass DRC,
+ InstrItinClass Itin> :
+ InstSE<(outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F),
+ !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR> {
let Constraints = "$F = $rd";
}
// cond:int, data:float
-class CondMovIntFP<RegisterClass CRC, RegisterClass DRC, bits<5> fmt,
- bits<6> func, string instr_asm> :
- FFR<0x11, func, fmt, (outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F),
- !strconcat(instr_asm, "\t$fd, $fs, $rt"), []> {
- bits<5> rt;
- let ft = rt;
+class CMov_I_F_FT<string opstr, RegisterClass CRC, RegisterClass DRC,
+ InstrItinClass Itin> :
+ InstSE<(outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F),
+ !strconcat(opstr, "\t$fd, $fs, $rt"), [], Itin, FrmFR> {
let Constraints = "$F = $fd";
}
// cond:float, data:int
-class CondMovFPInt<RegisterClass RC, SDNode cmov, bits<1> tf,
- string instr_asm> :
- FCMOV<tf, (outs RC:$rd), (ins RC:$rs, RC:$F),
- !strconcat(instr_asm, "\t$rd, $rs, $$fcc0"),
- [(set RC:$rd, (cmov RC:$rs, RC:$F))]> {
- let cc = 0;
+class CMov_F_I_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$rd), (ins RC:$rs, RC:$F),
+ !strconcat(opstr, "\t$rd, $rs, $$fcc0"),
+ [(set RC:$rd, (OpNode RC:$rs, RC:$F))], Itin, FrmFR> {
let Uses = [FCR31];
let Constraints = "$F = $rd";
}
// cond:float, data:float
-class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
- string instr_asm> :
- FFCMOV<fmt, tf, (outs RC:$fd), (ins RC:$fs, RC:$F),
- !strconcat(instr_asm, "\t$fd, $fs, $$fcc0"),
- [(set RC:$fd, (cmov RC:$fs, RC:$F))]> {
- let cc = 0;
+class CMov_F_F_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fs, RC:$F),
+ !strconcat(opstr, "\t$fd, $fs, $$fcc0"),
+ [(set RC:$fd, (OpNode RC:$fs, RC:$F))], Itin, FrmFR> {
let Uses = [FCR31];
let Constraints = "$F = $fd";
}
@@ -106,81 +101,103 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
}
// Instantiation of instructions.
-def MOVZ_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">;
+def MOVZ_I_I : CMov_I_I_FT<"movz", CPURegs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xa>;
let Predicates = [HasStdEnc],
DecoderNamespace = "Mips64" in {
- def MOVZ_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">;
- def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz"> {
+ def MOVZ_I_I64 : CMov_I_I_FT<"movz", CPURegs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xa>;
+ def MOVZ_I64_I : CMov_I_I_FT<"movz", CPU64Regs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xa> {
let isCodeGenOnly = 1;
}
- def MOVZ_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0a, "movz"> {
+ def MOVZ_I64_I64 : CMov_I_I_FT<"movz", CPU64Regs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xa> {
let isCodeGenOnly = 1;
}
}
-def MOVN_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0b, "movn">;
+def MOVN_I_I : CMov_I_I_FT<"movn", CPURegs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xb>;
let Predicates = [HasStdEnc],
DecoderNamespace = "Mips64" in {
- def MOVN_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0b, "movn">;
- def MOVN_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn"> {
+ def MOVN_I_I64 : CMov_I_I_FT<"movn", CPURegs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xb>;
+ def MOVN_I64_I : CMov_I_I_FT<"movn", CPU64Regs, CPURegs, NoItinerary>,
+ ADD_FM<0, 0xb> {
let isCodeGenOnly = 1;
}
- def MOVN_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0b, "movn"> {
+ def MOVN_I64_I64 : CMov_I_I_FT<"movn", CPU64Regs, CPU64Regs, NoItinerary>,
+ ADD_FM<0, 0xb> {
let isCodeGenOnly = 1;
}
}
-def MOVZ_I_S : CondMovIntFP<CPURegs, FGR32, 16, 18, "movz.s">;
-def MOVZ_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 18, "movz.s">,
- Requires<[HasMips64, HasStdEnc]> {
+def MOVZ_I_S : CMov_I_F_FT<"movz.s", CPURegs, FGR32, IIFmove>,
+ CMov_I_F_FM<18, 16>;
+def MOVZ_I64_S : CMov_I_F_FT<"movz.s", CPU64Regs, FGR32, IIFmove>,
+ CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
-def MOVN_I_S : CondMovIntFP<CPURegs, FGR32, 16, 19, "movn.s">;
-def MOVN_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 19, "movn.s">,
- Requires<[HasMips64, HasStdEnc]> {
+def MOVN_I_S : CMov_I_F_FT<"movn.s", CPURegs, FGR32, IIFmove>,
+ CMov_I_F_FM<19, 16>;
+def MOVN_I64_S : CMov_I_F_FT<"movn.s", CPU64Regs, FGR32, IIFmove>,
+ CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
let Predicates = [NotFP64bit, HasStdEnc] in {
- def MOVZ_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">;
- def MOVN_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">;
+ def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", CPURegs, AFGR64, IIFmove>,
+ CMov_I_F_FM<18, 17>;
+ def MOVN_I_D32 : CMov_I_F_FT<"movn.d", CPURegs, AFGR64, IIFmove>,
+ CMov_I_F_FM<19, 17>;
}
let Predicates = [IsFP64bit, HasStdEnc],
DecoderNamespace = "Mips64" in {
- def MOVZ_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">;
- def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d"> {
+ def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", CPURegs, FGR64, IIFmove>,
+ CMov_I_F_FM<18, 17>;
+ def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", CPU64Regs, FGR64, IIFmove>,
+ CMov_I_F_FM<18, 17> {
let isCodeGenOnly = 1;
}
- def MOVN_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 19, "movn.d">;
- def MOVN_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 19, "movn.d"> {
+ def MOVN_I_D64 : CMov_I_F_FT<"movn.d", CPURegs, FGR64, IIFmove>,
+ CMov_I_F_FM<19, 17>;
+ def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", CPU64Regs, FGR64, IIFmove>,
+ CMov_I_F_FM<19, 17> {
let isCodeGenOnly = 1;
}
}
-def MOVT_I : CondMovFPInt<CPURegs, MipsCMovFP_T, 1, "movt">;
-def MOVT_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_T, 1, "movt">,
- Requires<[HasMips64, HasStdEnc]> {
+def MOVT_I : CMov_F_I_FT<"movt", CPURegs, IIAlu, MipsCMovFP_T>, CMov_F_I_FM<1>;
+def MOVT_I64 : CMov_F_I_FT<"movt", CPU64Regs, IIAlu, MipsCMovFP_T>,
+ CMov_F_I_FM<1>, Requires<[HasMips64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
-def MOVF_I : CondMovFPInt<CPURegs, MipsCMovFP_F, 0, "movf">;
-def MOVF_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_F, 0, "movf">,
- Requires<[HasMips64, HasStdEnc]> {
+def MOVF_I : CMov_F_I_FT<"movf", CPURegs, IIAlu, MipsCMovFP_F>, CMov_F_I_FM<0>;
+def MOVF_I64 : CMov_F_I_FT<"movf", CPU64Regs, IIAlu, MipsCMovFP_F>,
+ CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
-def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
-def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
+def MOVT_S : CMov_F_F_FT<"movt.s", FGR32, IIFmove, MipsCMovFP_T>,
+ CMov_F_F_FM<16, 1>;
+def MOVF_S : CMov_F_F_FT<"movf.s", FGR32, IIFmove, MipsCMovFP_F>,
+ CMov_F_F_FM<16, 0>;
let Predicates = [NotFP64bit, HasStdEnc] in {
- def MOVT_D32 : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
- def MOVF_D32 : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+ def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64, IIFmove, MipsCMovFP_T>,
+ CMov_F_F_FM<17, 1>;
+ def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64, IIFmove, MipsCMovFP_F>,
+ CMov_F_F_FM<17, 0>;
}
let Predicates = [IsFP64bit, HasStdEnc],
DecoderNamespace = "Mips64" in {
- def MOVT_D64 : CondMovFPFP<FGR64, MipsCMovFP_T, 17, 1, "movt.d">;
- def MOVF_D64 : CondMovFPFP<FGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+ def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64, IIFmove, MipsCMovFP_T>,
+ CMov_F_F_FM<17, 1>;
+ def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64, IIFmove, MipsCMovFP_F>,
+ CMov_F_F_FM<17, 0>;
}
// Instantiation of conditional move patterns.
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index 8e01d06596..a72a763fde 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -24,8 +24,9 @@ class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
let Predicates = [HasDSP];
}
-class PseudoDSP<dag outs, dag ins, list<dag> pattern>:
- MipsPseudo<outs, ins, "", pattern> {
+class PseudoDSP<dag outs, dag ins, list<dag> pattern,
+ InstrItinClass itin = IIPseudo>:
+ MipsPseudo<outs, ins, pattern, itin> {
let Predicates = [HasDSP];
}
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index ef9402865b..9531b91487 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -75,10 +75,6 @@ def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>;
def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>;
// Flags.
-class IsCommutable {
- bit isCommutable = 1;
-}
-
class UseAC {
list<Register> Uses = [AC0];
}
@@ -490,7 +486,7 @@ class MULT_DESC_BASE<string instr_asm> {
}
class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
- MipsPseudo<(outs CPURegs:$dst), (ins), "", [(set CPURegs:$dst, (OpNode))]> {
+ MipsPseudo<(outs CPURegs:$dst), (ins), [(set CPURegs:$dst, (OpNode))]> {
list<Register> Uses = [DSPCtrl];
bit usesCustomInserter = 1;
}
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index e3cdab427d..eb9d49fefb 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -22,8 +22,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Function.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 0a8c0b7994..0a7031acad 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -26,15 +26,15 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Type.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 89b9cd150e..1a78913efe 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -21,7 +21,6 @@
#include "MipsTargetMachine.h"
#include "MipsTargetObjectFile.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -29,10 +28,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -50,6 +50,13 @@ static cl::opt<bool>
LargeGOT("mxgot", cl::Hidden,
cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false));
+static cl::opt<bool>
+Mips16HardFloat("mips16-hard-float", cl::NotHidden,
+ cl::desc("MIPS: mips16 hard float enable."),
+ cl::init(false));
+
+
+
static const uint16_t O32IntRegs[4] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
@@ -198,6 +205,41 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
+void MipsTargetLowering::setMips16HardFloatLibCalls() {
+ setLibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
+ setLibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
+ setLibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
+ setLibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
+ setLibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
+ setLibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
+ setLibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
+ setLibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
+ setLibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
+ setLibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
+ setLibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
+ setLibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
+ setLibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
+ setLibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
+ setLibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
+ setLibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
+ setLibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
+ setLibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
+ setLibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
+ setLibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
+ setLibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
+ setLibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
+ setLibcallName(RTLIB::O_F32, "__mips16_unordsf2");
+ setLibcallName(RTLIB::O_F64, "__mips16_unorddf2");
+}
+
MipsTargetLowering::
MipsTargetLowering(MipsTargetMachine &TM)
: TargetLowering(TM, new MipsTargetObjectFile()),
@@ -218,6 +260,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
if (Subtarget->inMips16Mode()) {
addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
+ if (Mips16HardFloat)
+ setMips16HardFloatLibCalls();
}
if (Subtarget->hasDSP()) {
@@ -488,7 +532,9 @@ MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
}
EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
- return MVT::i32;
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
}
// SelectMadd -
@@ -1020,7 +1066,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
static unsigned
AddLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
{
- assert(RC->contains(PReg) && "Not the correct regclass!");
unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
MF.getRegInfo().addLiveIn(PReg, VReg);
return VReg;
@@ -2234,7 +2279,7 @@ SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
unsigned RA = IsN64 ? Mips::RA_64 : Mips::RA;
MFI->setReturnAddressIsTaken(true);
@@ -2932,12 +2977,14 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
- bool GlobalOrExternal = false;
+ bool GlobalOrExternal = false, InternalLinkage = false;
SDValue CalleeLo;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
if (IsPICCall) {
- if (G->getGlobal()->hasInternalLinkage())
+ InternalLinkage = G->getGlobal()->hasInternalLinkage();
+
+ if (InternalLinkage)
Callee = getAddrLocal(Callee, DAG, HasMips64);
else if (LargeGOT)
Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
@@ -2984,8 +3031,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
// Insert node "GP copy globalreg" before call to function.
- // Lazy-binding stubs require GP to point to the GOT.
- if (IsPICCall) {
+ //
+ // R_MIPS_CALL* operators (emitted when non-internal functions are called
+ // in PIC mode) allow symbols to be resolved via lazy binding.
+ // The lazy binding stub requires GP to point to the GOT.
+ if (IsPICCall && !InternalLinkage) {
unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
RegsToPass.push_back(std::make_pair(GPReg, GetGlobalReg(DAG, Ty)));
@@ -3134,7 +3184,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
- RC = &Mips::CPURegsRegClass;
+ RC = Subtarget->inMips16Mode()? &Mips::CPU16RegsRegClass :
+ &Mips::CPURegsRegClass;
else if (RegVT == MVT::i64)
RC = &Mips::CPU64RegsRegClass;
else if (RegVT == MVT::f32)
@@ -3559,7 +3610,8 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
}
EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsZeroVal,
+ unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
if (Subtarget->hasMips64())
@@ -3738,7 +3790,7 @@ copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains,
return;
// Copy arg registers.
- EVT RegTy = MVT::getIntegerVT(CC.regSize() * 8);
+ MVT RegTy = MVT::getIntegerVT(CC.regSize() * 8);
const TargetRegisterClass *RC = getRegClassFor(RegTy);
for (unsigned I = 0; I < ByVal.NumRegs; ++I) {
@@ -3860,7 +3912,7 @@ MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
const CCState &CCInfo = CC.getCCInfo();
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumRegs);
unsigned RegSize = CC.regSize();
- EVT RegTy = MVT::getIntegerVT(RegSize * 8);
+ MVT RegTy = MVT::getIntegerVT(RegSize * 8);
const TargetRegisterClass *RC = getRegClassFor(RegTy);
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 08165a036c..12b01e919e 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -174,6 +174,8 @@ namespace llvm {
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
private:
+ void setMips16HardFloatLibCalls();
+
/// ByValArgInfo - Byval argument information.
struct ByValArgInfo {
unsigned FirstIdx; // Index of the first register used.
@@ -368,7 +370,8 @@ namespace llvm {
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsZeroVal,
+ unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 5daed320d2..f6d82daad6 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -86,273 +86,320 @@ def fpimm0neg : PatLeaf<(fpimm), [{
// Only S32 and D32 are supported right now.
//===----------------------------------------------------------------------===//
-// FP load.
-let DecoderMethod = "DecodeFMem" in {
-class FPLoad<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
- FMem<op, (outs RC:$ft), (ins MemOpnd:$addr),
- !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (load addr:$addr))],
- IILoad>;
-
-// FP store.
-class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
- FMem<op, (outs), (ins RC:$ft, MemOpnd:$addr),
- !strconcat(opstr, "\t$ft, $addr"), [(store RC:$ft, addr:$addr)],
- IIStore>;
-}
-// FP indexed load.
-class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC,
- RegisterClass PRC, SDPatternOperator FOp = null_frag>:
- FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index),
- !strconcat(opstr, "\t$fd, ${index}(${base})"),
- [(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> {
- let fs = 0;
-}
-
-// FP indexed store.
-class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC,
- RegisterClass PRC, SDPatternOperator FOp= null_frag>:
- FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index),
- !strconcat(opstr, "\t$fs, ${index}(${base})"),
- [(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> {
- let fd = 0;
-}
-
-// Instructions that convert an FP value to 32-bit fixed point.
-multiclass FFR1_W_M<bits<6> funct, string opstr> {
- def _S : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>;
- def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>,
+class ADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin, bit IsComm,
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fs, RC:$ft),
+ !strconcat(opstr, "\t$fd, $fs, $ft"),
+ [(set RC:$fd, (OpNode RC:$fs, RC:$ft))], Itin, FrmFR> {
+ let isCommutable = IsComm;
+}
+
+multiclass ADDS_M<string opstr, InstrItinClass Itin, bit IsComm,
+ SDPatternOperator OpNode = null_frag> {
+ def _D32 : ADDS_FT<opstr, AFGR64, Itin, IsComm, OpNode>,
Requires<[NotFP64bit, HasStdEnc]>;
- def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>,
+ def _D64 : ADDS_FT<opstr, FGR64, Itin, IsComm, OpNode>,
Requires<[IsFP64bit, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
+ string DecoderNamespace = "Mips64";
}
}
-// Instructions that convert an FP value to 64-bit fixed point.
-let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in
-multiclass FFR1_L_M<bits<6> funct, string opstr> {
- def _S : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>;
- def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>;
-}
+class ABSS_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"),
+ [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>;
-// FP-to-FP conversion instructions.
-multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> {
- def _S : FFR1P<funct, 16, opstr, "s", FGR32, FGR32, OpNode>;
- def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>,
+multiclass ABSS_M<string opstr, InstrItinClass Itin,
+ SDPatternOperator OpNode= null_frag> {
+ def _D32 : ABSS_FT<opstr, AFGR64, AFGR64, Itin, OpNode>,
Requires<[NotFP64bit, HasStdEnc]>;
- def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>,
+ def _D64 : ABSS_FT<opstr, FGR64, FGR64, Itin, OpNode>,
Requires<[IsFP64bit, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
+ string DecoderNamespace = "Mips64";
}
}
-multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> {
- let isCommutable = isComm in {
- def _S : FFR2P<funct, 16, opstr, "s", FGR32, OpNode>;
- def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>,
+multiclass ROUND_M<string opstr, InstrItinClass Itin> {
+ def _D32 : ABSS_FT<opstr, FGR32, AFGR64, Itin>,
Requires<[NotFP64bit, HasStdEnc]>;
- def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>,
+ def _D64 : ABSS_FT<opstr, FGR32, FGR64, Itin>,
Requires<[IsFP64bit, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
}
-}
-// FP madd/msub/nmadd/nmsub instruction classes.
-class FMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr,
- SDNode OpNode, RegisterClass RC> :
- FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
- !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"),
- [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))]>;
-
-class FNMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr,
- SDNode OpNode, RegisterClass RC> :
- FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
- !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"),
- [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))]>;
+class MFC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>;
+
+class MTC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
+
+class MFC1_FT_CCR<string opstr, RegisterClass DstRC, RegisterOperand SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>;
+
+class MTC1_FT_CCR<string opstr, RegisterOperand DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
+
+class LW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addr:$addr))], Itin, FrmFI> {
+ let DecoderMethod = "DecodeFMem";
+}
+
+class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addr:$addr)], Itin, FrmFI> {
+ let DecoderMethod = "DecodeFMem";
+}
+
+class MADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+ !strconcat(opstr, "\t$fd, $fr, $fs, $ft"),
+ [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))], Itin, FrmFR>;
+
+class NMADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+ !strconcat(opstr, "\t$fd, $fr, $fs, $ft"),
+ [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))],
+ Itin, FrmFR>;
+
+class LWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC,
+ InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index),
+ !strconcat(opstr, "\t$fd, ${index}(${base})"),
+ [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI>;
+
+class SWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC,
+ InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index),
+ !strconcat(opstr, "\t$fs, ${index}(${base})"),
+ [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI>;
+
+class BC1F_FT<string opstr, InstrItinClass Itin,
+ SDPatternOperator Op = null_frag> :
+ InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"),
+ [(MipsFPBrcond Op, bb:$offset)], Itin, FrmFI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+ let Defs = [AT];
+ let Uses = [FCR31];
+}
+
+class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs), (ins RC:$fs, RC:$ft, condcode:$cond),
+ !strconcat("c.$cond.", typestr, "\t$fs, $ft"),
+ [(OpNode RC:$fs, RC:$ft, imm:$cond)], Itin, FrmFR> {
+ let Defs = [FCR31];
+}
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
-defm ROUND_W : FFR1_W_M<0xc, "round">;
-defm ROUND_L : FFR1_L_M<0x8, "round">;
-defm TRUNC_W : FFR1_W_M<0xd, "trunc">;
-defm TRUNC_L : FFR1_L_M<0x9, "trunc">;
-defm CEIL_W : FFR1_W_M<0xe, "ceil">;
-defm CEIL_L : FFR1_L_M<0xa, "ceil">;
-defm FLOOR_W : FFR1_W_M<0xf, "floor">;
-defm FLOOR_L : FFR1_L_M<0xb, "floor">;
-defm CVT_W : FFR1_W_M<0x24, "cvt">, NeverHasSideEffects;
-//defm CVT_L : FFR1_L_M<0x25, "cvt">;
-
-def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>, NeverHasSideEffects;
-def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>, NeverHasSideEffects;
-def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>, NeverHasSideEffects;
+def ROUND_W_S : ABSS_FT<"round.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xc, 16>;
+def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xd, 16>;
+def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xe, 16>;
+def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xf, 16>;
+def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>,
+ NeverHasSideEffects;
+
+defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>;
+defm TRUNC_W : ROUND_M<"trunc.w.d", IIFcvt>, ABSS_FM<0xd, 17>;
+defm CEIL_W : ROUND_M<"ceil.w.d", IIFcvt>, ABSS_FM<0xe, 17>;
+defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>;
+defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>,
+ NeverHasSideEffects;
+
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def ROUND_L_S : ABSS_FT<"round.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x8, 16>;
+ def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64, FGR64, IIFcvt>,
+ ABSS_FM<0x8, 17>;
+ def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x9, 16>;
+ def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64, FGR64, IIFcvt>,
+ ABSS_FM<0x9, 17>;
+ def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xa, 16>;
+ def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0xa, 17>;
+ def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xb, 16>;
+ def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64, FGR64, IIFcvt>,
+ ABSS_FM<0xb, 17>;
+}
+
+def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32, FGR32, IIFcvt>, ABSS_FM<0x20, 20>;
+def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>,
+ NeverHasSideEffects;
+def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>,
+ NeverHasSideEffects;
let Predicates = [NotFP64bit, HasStdEnc], neverHasSideEffects = 1 in {
- def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>;
- def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>;
- def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>;
+ def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32, AFGR64, IIFcvt>, ABSS_FM<0x20, 17>;
+ def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>;
+ def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>;
}
let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64",
neverHasSideEffects = 1 in {
- def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>;
- def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>;
- def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>;
- def CVT_D64_S : FFR1<0x21, 16, "cvt", "d.s", FGR64, FGR32>;
- def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>;
+ def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 17>;
+ def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 21>;
+ def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>;
+ def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>;
+ def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64, FGR64, IIFcvt>, ABSS_FM<0x21, 21>;
}
let Predicates = [NoNaNsFPMath, HasStdEnc] in {
- defm FABS : FFR1P_M<0x5, "abs", fabs>;
- defm FNEG : FFR1P_M<0x7, "neg", fneg>;
+ def FABS_S : ABSS_FT<"abs.s", FGR32, FGR32, IIFcvt, fabs>, ABSS_FM<0x5, 16>;
+ def FNEG_S : ABSS_FT<"neg.s", FGR32, FGR32, IIFcvt, fneg>, ABSS_FM<0x7, 16>;
+ defm FABS : ABSS_M<"abs.d", IIFcvt, fabs>, ABSS_FM<0x5, 17>;
+ defm FNEG : ABSS_M<"neg.d", IIFcvt, fneg>, ABSS_FM<0x7, 17>;
}
-defm FSQRT : FFR1P_M<0x4, "sqrt", fsqrt>;
+
+def FSQRT_S : ABSS_FT<"sqrt.s", FGR32, FGR32, IIFsqrtSingle, fsqrt>,
+ ABSS_FM<0x4, 16>;
+defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>;
// The odd-numbered registers are only referenced when doing loads,
// stores, and moves between floating-point and integer registers.
// When defining instructions, we reference all 32-bit registers,
// regardless of register aliasing.
-class FFRGPR<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern>:
- FFR<0x11, 0x0, _fmt, outs, ins, asmstr, pattern> {
- bits<5> rt;
- let ft = rt;
- let fd = 0;
-}
-
/// Move Control Registers From/To CPU Registers
-def CFC1 : FFRGPR<0x2, (outs CPURegs:$rt), (ins CCR:$fs),
- "cfc1\t$rt, $fs", []>;
-
-def CTC1 : FFRGPR<0x6, (outs CCR:$fs), (ins CPURegs:$rt),
- "ctc1\t$rt, $fs", []>;
-
-def MFC1 : FFRGPR<0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
- "mfc1\t$rt, $fs",
- [(set CPURegs:$rt, (bitconvert FGR32:$fs))]>;
-
-def MTC1 : FFRGPR<0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
- "mtc1\t$rt, $fs",
- [(set FGR32:$fs, (bitconvert CPURegs:$rt))]>;
-
-def DMFC1 : FFRGPR<0x01, (outs CPU64Regs:$rt), (ins FGR64:$fs),
- "dmfc1\t$rt, $fs",
- [(set CPU64Regs:$rt, (bitconvert FGR64:$fs))]>;
-
-def DMTC1 : FFRGPR<0x05, (outs FGR64:$fs), (ins CPU64Regs:$rt),
- "dmtc1\t$rt, $fs",
- [(set FGR64:$fs, (bitconvert CPU64Regs:$rt))]>;
-
-def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>;
-def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>,
+def CFC1 : MFC1_FT_CCR<"cfc1", CPURegs, CCROpnd, IIFmove>, MFC1_FM<2>;
+def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegs, IIFmove>, MFC1_FM<6>;
+def MFC1 : MFC1_FT<"mfc1", CPURegs, FGR32, IIFmove, bitconvert>, MFC1_FM<0>;
+def MTC1 : MTC1_FT<"mtc1", FGR32, CPURegs, IIFmove, bitconvert>, MFC1_FM<4>;
+def DMFC1 : MFC1_FT<"dmfc1", CPU64Regs, FGR64, IIFmove, bitconvert>, MFC1_FM<1>;
+def DMTC1 : MTC1_FT<"dmtc1", FGR64, CPU64Regs, IIFmove, bitconvert>, MFC1_FM<5>;
+
+def FMOV_S : ABSS_FT<"mov.s", FGR32, FGR32, IIFmove>, ABSS_FM<0x6, 16>;
+def FMOV_D32 : ABSS_FT<"mov.d", AFGR64, AFGR64, IIFmove>, ABSS_FM<0x6, 17>,
Requires<[NotFP64bit, HasStdEnc]>;
-def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>,
+def FMOV_D64 : ABSS_FT<"mov.d", FGR64, FGR64, IIFmove>, ABSS_FM<0x6, 17>,
Requires<[IsFP64bit, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
/// Floating Point Memory Instructions
let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
- def LWC1_P8 : FPLoad<0x31, "lwc1", FGR32, mem64>;
- def SWC1_P8 : FPStore<0x39, "swc1", FGR32, mem64>;
- def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64> {
+ def LWC1_P8 : LW_FT<"lwc1", FGR32, IILoad, mem64, load>, LW_FM<0x31>;
+ def SWC1_P8 : SW_FT<"swc1", FGR32, IIStore, mem64, store>, LW_FM<0x39>;
+ def LDC164_P8 : LW_FT<"ldc1", FGR64, IILoad, mem64, load>, LW_FM<0x35> {
let isCodeGenOnly =1;
}
- def SDC164_P8 : FPStore<0x3d, "sdc1", FGR64, mem64> {
+ def SDC164_P8 : SW_FT<"sdc1", FGR64, IIStore, mem64, store>, LW_FM<0x3d> {
let isCodeGenOnly =1;
}
}
let Predicates = [NotN64, HasStdEnc] in {
- def LWC1 : FPLoad<0x31, "lwc1", FGR32, mem>;
- def SWC1 : FPStore<0x39, "swc1", FGR32, mem>;
+ def LWC1 : LW_FT<"lwc1", FGR32, IILoad, mem, load>, LW_FM<0x31>;
+ def SWC1 : SW_FT<"swc1", FGR32, IIStore, mem, store>, LW_FM<0x39>;
}
let Predicates = [NotN64, HasMips64, HasStdEnc],
DecoderNamespace = "Mips64" in {
- def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>;
- def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>;
+ def LDC164 : LW_FT<"ldc1", FGR64, IILoad, mem, load>, LW_FM<0x35>;
+ def SDC164 : SW_FT<"sdc1", FGR64, IIStore, mem, store>, LW_FM<0x3d>;
}
let Predicates = [NotN64, NotMips64, HasStdEnc] in {
- def LDC1 : FPLoad<0x35, "ldc1", AFGR64, mem>;
- def SDC1 : FPStore<0x3d, "sdc1", AFGR64, mem>;
+ def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem, load>, LW_FM<0x35>;
+ def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem, store>, LW_FM<0x3d>;
}
// Indexed loads and stores.
let Predicates = [HasFPIdx, IsNotNaCl/*@LOCALMOD*/] in {
- def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load>;
- def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store>;
+ def LWXC1 : LWXC1_FT<"lwxc1", FGR32, CPURegs, IILoad, load>, LWXC1_FM<0>;
+ def SWXC1 : SWXC1_FT<"swxc1", FGR32, CPURegs, IIStore, store>, SWXC1_FM<8>;
}
let Predicates = [HasMips32r2, NotMips64, IsNotNaCl/*@LOCALMOD*/] in {
- def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load>;
- def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store>;
+ def LDXC1 : LWXC1_FT<"ldxc1", AFGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
+ def SDXC1 : SWXC1_FT<"sdxc1", AFGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
}
-let Predicates = [HasMips64, NotN64, IsNotNaCl/*@LOCALMOD*/],
- DecoderNamespace="Mips64" in {
- def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load>;
- def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store>;
+let Predicates = [HasMips64, NotN64, IsNotNaCl/*@LOCALMOD*/], DecoderNamespace="Mips64" in {
+ def LDXC164 : LWXC1_FT<"ldxc1", FGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
+ def SDXC164 : SWXC1_FT<"sdxc1", FGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
}
// n64
let Predicates = [IsN64, IsNotNaCl/*@LOCALMOD*/], isCodeGenOnly=1 in {
- def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load>;
- def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load>;
- def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store>;
- def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store>;
+ def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32, CPU64Regs, IILoad, load>, LWXC1_FM<0>;
+ def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64, CPU64Regs, IILoad, load>,
+ LWXC1_FM<1>;
+ def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32, CPU64Regs, IIStore, store>,
+ SWXC1_FM<8>;
+ def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64, CPU64Regs, IIStore, store>,
+ SWXC1_FM<9>;
}
// Load/store doubleword indexed unaligned.
let Predicates = [NotMips64, HasStdEnc] in {
- def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>;
- def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>;
+ def LUXC1 : LWXC1_FT<"luxc1", AFGR64, CPURegs, IILoad>, LWXC1_FM<0x5>;
+ def SUXC1 : SWXC1_FT<"suxc1", AFGR64, CPURegs, IIStore>, SWXC1_FM<0xd>;
}
let Predicates = [HasMips64, HasStdEnc],
DecoderNamespace="Mips64" in {
- def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>;
- def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>;
+ def LUXC164 : LWXC1_FT<"luxc1", FGR64, CPURegs, IILoad>, LWXC1_FM<0x5>;
+ def SUXC164 : SWXC1_FT<"suxc1", FGR64, CPURegs, IIStore>, SWXC1_FM<0xd>;
}
/// Floating-point Aritmetic
-defm FADD : FFR2P_M<0x00, "add", fadd, 1>;
-defm FDIV : FFR2P_M<0x03, "div", fdiv>;
-defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>;
-defm FSUB : FFR2P_M<0x01, "sub", fsub>;
+def FADD_S : ADDS_FT<"add.s", FGR32, IIFadd, 1, fadd>, ADDS_FM<0x00, 16>;
+defm FADD : ADDS_M<"add.d", IIFadd, 1, fadd>, ADDS_FM<0x00, 17>;
+def FDIV_S : ADDS_FT<"div.s", FGR32, IIFdivSingle, 0, fdiv>, ADDS_FM<0x03, 16>;
+defm FDIV : ADDS_M<"div.d", IIFdivDouble, 0, fdiv>, ADDS_FM<0x03, 17>;
+def FMUL_S : ADDS_FT<"mul.s", FGR32, IIFmulSingle, 1, fmul>, ADDS_FM<0x02, 16>;
+defm FMUL : ADDS_M<"mul.d", IIFmulDouble, 1, fmul>, ADDS_FM<0x02, 17>;
+def FSUB_S : ADDS_FT<"sub.s", FGR32, IIFadd, 0, fsub>, ADDS_FM<0x01, 16>;
+defm FSUB : ADDS_M<"sub.d", IIFadd, 0, fsub>, ADDS_FM<0x01, 17>;
let Predicates = [HasMips32r2, HasStdEnc] in {
- def MADD_S : FMADDSUB<0x4, 0, "madd", "s", fadd, FGR32>;
- def MSUB_S : FMADDSUB<0x5, 0, "msub", "s", fsub, FGR32>;
+ def MADD_S : MADDS_FT<"madd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<4, 0>;
+ def MSUB_S : MADDS_FT<"msub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<5, 0>;
}
let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in {
- def NMADD_S : FNMADDSUB<0x6, 0, "nmadd", "s", fadd, FGR32>;
- def NMSUB_S : FNMADDSUB<0x7, 0, "nmsub", "s", fsub, FGR32>;
+ def NMADD_S : NMADDS_FT<"nmadd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<6, 0>;
+ def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<7, 0>;
}
let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in {
- def MADD_D32 : FMADDSUB<0x4, 1, "madd", "d", fadd, AFGR64>;
- def MSUB_D32 : FMADDSUB<0x5, 1, "msub", "d", fsub, AFGR64>;
+ def MADD_D32 : MADDS_FT<"madd.d", AFGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>;
+ def MSUB_D32 : MADDS_FT<"msub.d", AFGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>;
}
let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in {
- def NMADD_D32 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, AFGR64>;
- def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>;
+ def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64, IIFmulDouble, fadd>,
+ MADDS_FM<6, 1>;
+ def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64, IIFmulDouble, fsub>,
+ MADDS_FM<7, 1>;
}
let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in {
- def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>;
- def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>;
+ def MADD_D64 : MADDS_FT<"madd.d", FGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>;
+ def MSUB_D64 : MADDS_FT<"msub.d", FGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>;
}
let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc],
isCodeGenOnly=1 in {
- def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>;
- def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>;
+ def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64, IIFmulDouble, fadd>,
+ MADDS_FM<6, 1>;
+ def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64, IIFmulDouble, fsub>,
+ MADDS_FM<7, 1>;
}
//===----------------------------------------------------------------------===//
@@ -363,19 +410,9 @@ let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc],
def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
-/// Floating Point Branch of False/True (Likely)
-let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
- class FBRANCH<bits<1> nd, bits<1> tf, PatLeaf op, string asmstr> :
- FFI<0x11, (outs), (ins brtarget:$dst), !strconcat(asmstr, "\t$dst"),
- [(MipsFPBrcond op, bb:$dst)]> {
- let Inst{20-18} = 0;
- let Inst{17} = nd;
- let Inst{16} = tf;
-}
-
let DecoderMethod = "DecodeBC1" in {
-def BC1F : FBRANCH<0, 0, MIPS_BRANCH_F, "bc1f">;
-def BC1T : FBRANCH<0, 1, MIPS_BRANCH_T, "bc1t">;
+def BC1F : BC1F_FT<"bc1f", IIBranch, MIPS_BRANCH_F>, BC1F_FM<0, 0>;
+def BC1T : BC1F_FT<"bc1t", IIBranch, MIPS_BRANCH_T>, BC1F_FM<0, 1>;
}
//===----------------------------------------------------------------------===//
// Floating Point Flag Conditions
@@ -399,33 +436,24 @@ def MIPS_FCOND_NGE : PatLeaf<(i32 13)>;
def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
-class FCMP<bits<5> fmt, RegisterClass RC, string typestr> :
- FCC<fmt, (outs), (ins RC:$fs, RC:$ft, condcode:$cc),
- !strconcat("c.$cc.", typestr, "\t$fs, $ft"),
- [(MipsFPCmp RC:$fs, RC:$ft, imm:$cc)]>;
-
/// Floating Point Compare
-let Defs=[FCR31] in {
- def FCMP_S32 : FCMP<0x10, FGR32, "s">;
- def FCMP_D32 : FCMP<0x11, AFGR64, "d">,
- Requires<[NotFP64bit, HasStdEnc]>;
- def FCMP_D64 : FCMP<0x11, FGR64, "d">,
- Requires<[IsFP64bit, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- }
-}
+def FCMP_S32 : CEQS_FT<"s", FGR32, IIFcmp, MipsFPCmp>, CEQS_FM<16>;
+def FCMP_D32 : CEQS_FT<"d", AFGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+let DecoderNamespace = "Mips64" in
+def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>,
+ Requires<[IsFP64bit, HasStdEnc]>;
//===----------------------------------------------------------------------===//
// Floating Point Pseudo-Instructions
//===----------------------------------------------------------------------===//
-def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src),
- "# MOVCCRToCCR", []>;
+def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCROpnd:$src), []>;
// This pseudo instr gets expanded into 2 mtc1 instrs after register
// allocation.
def BuildPairF64 :
PseudoSE<(outs AFGR64:$dst),
- (ins CPURegs:$lo, CPURegs:$hi), "",
+ (ins CPURegs:$lo, CPURegs:$hi),
[(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
// This pseudo instr gets expanded into 2 mfc1 instrs after register
@@ -433,7 +461,7 @@ def BuildPairF64 :
// if n is 0, lower part of src is extracted.
// if n is 1, higher part of src is extracted.
def ExtractElementF64 :
- PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), "",
+ PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n),
[(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index c3c12c1fea..c026b5dae0 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -80,15 +80,17 @@ class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
}
// Mips Pseudo Instructions Format
-class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> {
+class MipsPseudo<dag outs, dag ins, list<dag> pattern,
+ InstrItinClass itin = IIPseudo> :
+ MipsInst<outs, ins, "", pattern, itin, Pseudo> {
let isCodeGenOnly = 1;
let isPseudo = 1;
}
// Mips32/64 Pseudo Instruction Format
-class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsPseudo<outs, ins, asmstr, pattern> {
+class PseudoSE<dag outs, dag ins, list<dag> pattern,
+ InstrItinClass itin = IIPseudo>:
+ MipsPseudo<outs, ins, pattern, itin> {
let Predicates = [HasStdEnc];
}
@@ -161,30 +163,28 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
// Format J instruction class in Mips : <|opcode|address|>
//===----------------------------------------------------------------------===//
-class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmJ>
+class FJ<bits<6> op>
{
- bits<26> addr;
+ bits<26> target;
- let Opcode = op;
+ bits<32> Inst;
- let Inst{25-0} = addr;
+ let Inst{31-26} = op;
+ let Inst{25-0} = target;
}
- //===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
// MFC instruction class in Mips : <|op|mf|rt|rd|0000000|sel|>
//===----------------------------------------------------------------------===//
-class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>:
- InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR>
+class MFC3OP_FM<bits<6> op, bits<5> mfmt>
{
- bits<5> mfmt;
bits<5> rt;
bits<5> rd;
bits<3> sel;
- let Opcode = op;
- let mfmt = _mfmt;
+ bits<32> Inst;
+ let Inst{31-26} = op;
let Inst{25-21} = mfmt;
let Inst{20-16} = rt;
let Inst{15-11} = rd;
@@ -192,6 +192,275 @@ class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>:
let Inst{2-0} = sel;
}
+class ADD_FM<bits<6> op, bits<6> funct> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class ADDI_FM<bits<6> op> {
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class SRA_FM<bits<6> funct, bit rotate> {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> shamt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-22} = 0;
+ let Inst{21} = rotate;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = shamt;
+ let Inst{5-0} = funct;
+}
+
+class SRLV_FM<bits<6> funct, bit rotate> {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-7} = 0;
+ let Inst{6} = rotate;
+ let Inst{5-0} = funct;
+}
+
+class BEQ_FM<bits<6> op> {
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = offset;
+}
+
+class BGEZ_FM<bits<6> op, bits<5> funct> {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = funct;
+ let Inst{15-0} = offset;
+}
+
+class B_FM {
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 4;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = offset;
+}
+
+class SLTI_FM<bits<6> op> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class MFLO_FM<bits<6> funct> {
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-16} = 0;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class MTLO_FM<bits<6> funct> {
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class SEB_FM<bits<5> funct, bits<6> funct2> {
+ bits<5> rd;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1f;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = funct;
+ let Inst{5-0} = funct2;
+}
+
+class CLO_FM<bits<6> funct> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1c;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+ let rt = rd;
+}
+
+class LUI_FM {
+ bits<5> rt;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0xf;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class NOP_FM {
+ bits<32> Inst;
+
+ let Inst{31-0} = 0;
+}
+
+class JALR_FM {
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = 31;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 9;
+}
+
+class BAL_FM {
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 1;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0x11;
+ let Inst{15-0} = offset;
+}
+
+class BGEZAL_FM<bits<5> funct> {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 1;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = funct;
+ let Inst{15-0} = offset;
+}
+
+class SYNC_FM {
+ bits<5> stype;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{10-6} = stype;
+ let Inst{5-0} = 0xf;
+}
+
+class MULT_FM<bits<6> op, bits<6> funct> {
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class EXT_FM<bits<6> funct> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> pos;
+ bits<5> size;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1f;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = size;
+ let Inst{10-6} = pos;
+ let Inst{5-0} = funct;
+}
+
+class RDHWR_FM {
+ bits<5> rt;
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1f;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 0x3b;
+}
+
//===----------------------------------------------------------------------===//
//
// FLOATING POINT INSTRUCTION FORMATS
@@ -206,31 +475,6 @@ class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>:
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|>
-//===----------------------------------------------------------------------===//
-
-class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
- string asmstr, list<dag> pattern> :
- InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFR>
-{
- bits<5> fd;
- bits<5> fs;
- bits<5> ft;
- bits<5> fmt;
- bits<6> funct;
-
- let Opcode = op;
- let funct = _funct;
- let fmt = _fmt;
-
- let Inst{25-21} = fmt;
- let Inst{20-16} = ft;
- let Inst{15-11} = fs;
- let Inst{10-6} = fd;
- let Inst{5-0} = funct;
-}
-
-//===----------------------------------------------------------------------===//
// Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
//===----------------------------------------------------------------------===//
@@ -248,130 +492,179 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
let Inst{15-0} = imm16;
}
-//===----------------------------------------------------------------------===//
-// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|>
-//===----------------------------------------------------------------------===//
-
-class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
- InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
- bits<5> fs;
- bits<5> ft;
- bits<4> cc;
- bits<5> fmt;
+class ADDS_FM<bits<6> funct, bits<5> fmt> {
+ bits<5> fd;
+ bits<5> fs;
+ bits<5> ft;
- let Opcode = 0x11;
- let fmt = _fmt;
+ bits<32> Inst;
+ let Inst{31-26} = 0x11;
let Inst{25-21} = fmt;
let Inst{20-16} = ft;
let Inst{15-11} = fs;
- let Inst{10-6} = 0;
- let Inst{5-4} = 0b11;
- let Inst{3-0} = cc;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
}
+class ABSS_FM<bits<6> funct, bits<5> fmt> {
+ bits<5> fd;
+ bits<5> fs;
-class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
- list<dag> pattern> :
- InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
- bits<5> rd;
- bits<5> rs;
- bits<3> cc;
- bits<1> tf;
-
- let Opcode = 0;
- let tf = _tf;
+ bits<32> Inst;
- let Inst{25-21} = rs;
- let Inst{20-18} = cc;
- let Inst{17} = 0;
- let Inst{16} = tf;
- let Inst{15-11} = rd;
- let Inst{10-6} = 0;
- let Inst{5-0} = 1;
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
}
-class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
- list<dag> pattern> :
- InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
- bits<5> fd;
- bits<5> fs;
- bits<3> cc;
- bits<5> fmt;
- bits<1> tf;
+class MFC1_FM<bits<5> funct> {
+ bits<5> rt;
+ bits<5> fs;
- let Opcode = 17;
- let fmt = _fmt;
- let tf = _tf;
+ bits<32> Inst;
- let Inst{25-21} = fmt;
- let Inst{20-18} = cc;
- let Inst{17} = 0;
- let Inst{16} = tf;
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rt;
let Inst{15-11} = fs;
- let Inst{10-6} = fd;
- let Inst{5-0} = 17;
+ let Inst{10-0} = 0;
}
-// FP unary instructions without patterns.
-class FFR1<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
- RegisterClass DstRC, RegisterClass SrcRC> :
- FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
- !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), []> {
- let ft = 0;
-}
+class LW_FM<bits<6> op> {
+ bits<5> rt;
+ bits<21> addr;
-// FP unary instructions with patterns.
-class FFR1P<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
- RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode> :
- FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
- !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"),
- [(set DstRC:$fd, (OpNode SrcRC:$fs))]> {
- let ft = 0;
-}
+ bits<32> Inst;
-class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
- string fmtstr, RegisterClass RC, SDNode OpNode> :
- FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft),
- !strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"),
- [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>;
+ let Inst{31-26} = op;
+ let Inst{25-21} = addr{20-16};
+ let Inst{20-16} = rt;
+ let Inst{15-0} = addr{15-0};
+}
-// Floating point madd/msub/nmadd/nmsub.
-class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
- list<dag> pattern>
- : InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
+class MADDS_FM<bits<3> funct, bits<3> fmt> {
bits<5> fd;
bits<5> fr;
bits<5> fs;
bits<5> ft;
- let Opcode = 0x13;
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x13;
let Inst{25-21} = fr;
let Inst{20-16} = ft;
let Inst{15-11} = fs;
- let Inst{10-6} = fd;
- let Inst{5-3} = funct;
- let Inst{2-0} = fmt;
+ let Inst{10-6} = fd;
+ let Inst{5-3} = funct;
+ let Inst{2-0} = fmt;
}
-// FP indexed load/store instructions.
-class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr,
- list<dag> pattern> :
- InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
- bits<5> base;
- bits<5> index;
- bits<5> fs;
- bits<5> fd;
+class LWXC1_FM<bits<6> funct> {
+ bits<5> fd;
+ bits<5> base;
+ bits<5> index;
- let Opcode = 0x13;
+ bits<32> Inst;
+ let Inst{31-26} = 0x13;
+ let Inst{25-21} = base;
+ let Inst{20-16} = index;
+ let Inst{15-11} = 0;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+class SWXC1_FM<bits<6> funct> {
+ bits<5> fs;
+ bits<5> base;
+ bits<5> index;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x13;
let Inst{25-21} = base;
let Inst{20-16} = index;
let Inst{15-11} = fs;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+class BC1F_FM<bit nd, bit tf> {
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = 0x8;
+ let Inst{20-18} = 0; // cc
+ let Inst{17} = nd;
+ let Inst{16} = tf;
+ let Inst{15-0} = offset;
+}
+
+class CEQS_FM<bits<5> fmt> {
+ bits<5> fs;
+ bits<5> ft;
+ bits<4> cond;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-8} = 0; // cc
+ let Inst{7-4} = 0x3;
+ let Inst{3-0} = cond;
+}
+
+class CMov_I_F_FM<bits<6> funct, bits<5> fmt> {
+ bits<5> fd;
+ bits<5> fs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = fs;
let Inst{10-6} = fd;
let Inst{5-0} = funct;
}
+
+class CMov_F_I_FM<bit tf> {
+ bits<5> rd;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-18} = 0; // cc
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 1;
+}
+
+class CMov_F_F_FM<bits<5> fmt, bit tf> {
+ bits<5> fd;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x11;
+ let Inst{25-21} = fmt;
+ let Inst{20-18} = 0; // cc
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = 0x11;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 8f8ab86e70..b81c975a16 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -175,6 +175,10 @@ class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
let Predicates = [HasStdEnc];
}
+class IsCommutable {
+ bit isCommutable = 1;
+}
+
class IsBranch {
bit isBranch = 1;
}
@@ -296,6 +300,10 @@ def HI16 : SDNodeXForm<imm, [{
// e.g. addi, andi
def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
+// Node immediate fits as 15-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt15 : PatLeaf<(imm), [{ return isInt<15>(N->getSExtValue()); }]>;
+
// Node immediate fits as 16-bit zero extended on target immediate.
// The LO16 param means that only the lower 16 bits of the node
// immediate are caught.
@@ -325,104 +333,65 @@ def addr :
// Instructions specific format
//===----------------------------------------------------------------------===//
-/// Move Control Registers From/To CPU Registers
-def MFC0_3OP : MFC3OP<0x10, 0, (outs CPURegs:$rt),
- (ins CPURegs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">;
-def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegs:$rt, CPURegs:$rd, 0)>;
-
-def MTC0_3OP : MFC3OP<0x10, 4, (outs CPURegs:$rd, uimm16:$sel),
- (ins CPURegs:$rt),"mtc0\t$rt, $rd, $sel">;
-def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegs:$rd, 0, CPURegs:$rt)>;
-
-def MFC2_3OP : MFC3OP<0x12, 0, (outs CPURegs:$rt),
- (ins CPURegs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">;
-def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegs:$rt, CPURegs:$rd, 0)>;
-
-def MTC2_3OP : MFC3OP<0x12, 4, (outs CPURegs:$rd, uimm16:$sel),
- (ins CPURegs:$rt),"mtc2\t$rt, $rd, $sel">;
-def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegs:$rd, 0, CPURegs:$rt)>;
-
// Arithmetic and logical instructions with 3 register operands.
-class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
- InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
- FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rs, $rt"),
- [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], itin> {
- let shamt = 0;
+class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0,
+ InstrItinClass Itin = NoItinerary,
+ SDPatternOperator OpNode = null_frag>:
+ InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt),
+ !strconcat(opstr, "\t$rd, $rs, $rt"),
+ [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
let isCommutable = isComm;
let isReMaterializable = 1;
-}
-
-class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm,
- InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
- FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], itin> {
- let shamt = 0;
- let isCommutable = isComm;
+ string BaseOpcode;
+ string Arch;
}
// Arithmetic and logical instructions with 2 register operands.
-class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode,
- Operand Od, PatLeaf imm_type, RegisterClass RC> :
- FI<op, (outs RC:$rt), (ins RC:$rs, Od:$imm16),
- !strconcat(instr_asm, "\t$rt, $rs, $imm16"),
- [(set RC:$rt, (OpNode RC:$rs, imm_type:$imm16))], IIAlu> {
+class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
+ SDPatternOperator imm_type = null_frag,
+ SDPatternOperator OpNode = null_frag> :
+ InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16),
+ !strconcat(opstr, "\t$rt, $rs, $imm16"),
+ [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> {
let isReMaterializable = 1;
}
-class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
- Operand Od, PatLeaf imm_type, RegisterClass RC> :
- FI<op, (outs RC:$rt), (ins RC:$rs, Od:$imm16),
- !strconcat(instr_asm, "\t$rt, $rs, $imm16"), [], IIAlu>;
-
// Arithmetic Multiply ADD/SUB
-let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
-class MArithR<bits<6> func, string instr_asm, SDNode op, bit isComm = 0> :
- FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
- !strconcat(instr_asm, "\t$rs, $rt"),
- [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul> {
- let rd = 0;
- let shamt = 0;
+class MArithR<string opstr, SDPatternOperator op = null_frag, bit isComm = 0> :
+ InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt),
+ !strconcat(opstr, "\t$rs, $rt"),
+ [(op CPURegsOpnd:$rs, CPURegsOpnd:$rt, LO, HI)], IIImul, FrmR> {
+ let Defs = [HI, LO];
+ let Uses = [HI, LO];
let isCommutable = isComm;
}
// Logical
-class LogicNOR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
- FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rs, $rt"),
- [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu> {
- let shamt = 0;
+class LogicNOR<string opstr, RegisterOperand RC>:
+ InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(opstr, "\t$rd, $rs, $rt"),
+ [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> {
let isCommutable = 1;
}
// Shifts
-class shift_rotate_imm<bits<6> func, bits<5> isRotate, string instr_asm,
- SDNode OpNode, PatFrag PF, Operand ImmOpnd,
- RegisterClass RC>:
- FR<0x00, func, (outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
- !strconcat(instr_asm, "\t$rd, $rt, $shamt"),
- [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu> {
- let rs = isRotate;
-}
-
-// 32-bit shift instructions.
-class shift_rotate_imm32<bits<6> func, bits<5> isRotate, string instr_asm,
- SDNode OpNode>:
- shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt5, shamt, CPURegs>;
-
-class shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm,
- SDNode OpNode, RegisterClass RC>:
- FR<0x00, func, (outs RC:$rd), (ins CPURegs:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rt, $rs"),
- [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs))], IIAlu> {
- let shamt = isRotate;
-}
+class shift_rotate_imm<string opstr, Operand ImmOpnd,
+ RegisterOperand RC, SDPatternOperator OpNode = null_frag,
+ SDPatternOperator PF = null_frag> :
+ InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
+ !strconcat(opstr, "\t$rd, $rt, $shamt"),
+ [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>;
+
+class shift_rotate_reg<string opstr, RegisterOperand RC,
+ SDPatternOperator OpNode = null_frag>:
+ InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt),
+ !strconcat(opstr, "\t$rd, $rt, $rs"),
+ [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>;
// Load Upper Imediate
-class LoadUpper<bits<6> op, string instr_asm, RegisterClass RC, Operand Imm>:
- FI<op, (outs RC:$rt), (ins Imm:$imm16),
- !strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu>, IsAsCheapAsAMove {
- let rs = 0;
+class LoadUpper<string opstr, RegisterClass RC, Operand Imm>:
+ InstSE<(outs RC:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"),
+ [], IIAlu, FrmI>, IsAsCheapAsAMove {
let neverHasSideEffects = 1;
let isReMaterializable = 1;
}
@@ -436,66 +405,34 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
}
// Memory Load/Store
-let canFoldAsLoad = 1 in
-class LoadM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
- Operand MemOpnd, bit Pseudo>:
- FMem<op, (outs RC:$rt), (ins MemOpnd:$addr),
- !strconcat(instr_asm, "\t$rt, $addr"),
- [(set RC:$rt, (OpNode addr:$addr))], IILoad> {
- let isPseudo = Pseudo;
-}
-
-class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
- Operand MemOpnd, bit Pseudo>:
- FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr),
- !strconcat(instr_asm, "\t$rt, $addr"),
- [(OpNode RC:$rt, addr:$addr)], IIStore> {
- let isPseudo = Pseudo;
-}
-
-// 32-bit load.
-multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
- bit Pseudo = 0> {
- def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
+class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC,
+ Operand MemOpnd> :
+ InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addr:$addr))], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
+ let canFoldAsLoad = 1;
}
-// 64-bit load.
-multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
- bit Pseudo = 0> {
- def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
+class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
+ Operand MemOpnd> :
+ InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addr:$addr)], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
}
-// 32-bit store.
-multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
- bit Pseudo = 0> {
- def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
- Requires<[IsN64, HasStdEnc]> {
+multiclass LoadM<string opstr, RegisterClass RC,
+ SDPatternOperator OpNode = null_frag> {
+ def NAME : Load<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Load<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
}
-// 64-bit store.
-multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
- bit Pseudo = 0> {
- def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
- Requires<[IsN64, HasStdEnc]> {
+multiclass StoreM<string opstr, RegisterClass RC,
+ SDPatternOperator OpNode = null_frag> {
+ def NAME : Store<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Store<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
@@ -503,81 +440,58 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
// Load/Store Left/Right
let canFoldAsLoad = 1 in
-class LoadLeftRight<bits<6> op, string instr_asm, SDNode OpNode,
- RegisterClass RC, Operand MemOpnd> :
- FMem<op, (outs RC:$rt), (ins MemOpnd:$addr, RC:$src),
- !strconcat(instr_asm, "\t$rt, $addr"),
- [(set RC:$rt, (OpNode addr:$addr, RC:$src))], IILoad> {
+class LoadLeftRight<string opstr, SDNode OpNode, RegisterClass RC,
+ Operand MemOpnd> :
+ InstSE<(outs RC:$rt), (ins MemOpnd:$addr, RC:$src),
+ !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, (OpNode addr:$addr, RC:$src))], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
string Constraints = "$src = $rt";
}
-class StoreLeftRight<bits<6> op, string instr_asm, SDNode OpNode,
- RegisterClass RC, Operand MemOpnd>:
- FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr),
- !strconcat(instr_asm, "\t$rt, $addr"), [(OpNode RC:$rt, addr:$addr)],
- IIStore>;
-
-// 32-bit load left/right.
-multiclass LoadLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
- def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem64>,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
-}
-
-// 64-bit load left/right.
-multiclass LoadLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> {
- def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
+class StoreLeftRight<string opstr, SDNode OpNode, RegisterClass RC,
+ Operand MemOpnd>:
+ InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RC:$rt, addr:$addr)], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
}
-// 32-bit store left/right.
-multiclass StoreLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
- def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem64>,
- Requires<[IsN64, HasStdEnc]> {
+multiclass LoadLeftRightM<string opstr, SDNode OpNode, RegisterClass RC> {
+ def NAME : LoadLeftRight<opstr, OpNode, RC, mem>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : LoadLeftRight<opstr, OpNode, RC, mem64>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
}
-// 64-bit store left/right.
-multiclass StoreLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> {
- def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>,
- Requires<[IsN64, HasStdEnc]> {
+multiclass StoreLeftRightM<string opstr, SDNode OpNode, RegisterClass RC> {
+ def NAME : StoreLeftRight<opstr, OpNode, RC, mem>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : StoreLeftRight<opstr, OpNode, RC, mem64>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
}
// Conditional Branch
-class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
- BranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
- [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
+class CBranch<string opstr, PatFrag cond_op, RegisterClass RC> :
+ InstSE<(outs), (ins RC:$rs, RC:$rt, brtarget:$offset),
+ !strconcat(opstr, "\t$rs, $rt, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$offset)], IIBranch,
+ FrmI> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
let Defs = [AT];
}
-class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
- RegisterClass RC>:
- BranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $imm16"),
- [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
- let rt = _rt;
+class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> :
+ InstSE<(outs), (ins RC:$rs, brtarget:$offset),
+ !strconcat(opstr, "\t$rs, $offset"),
+ [(brcond (i32 (cond_op RC:$rs, 0)), bb:$offset)], IIBranch, FrmI> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
@@ -585,27 +499,22 @@ class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
}
// SetCC
-class SetCC_R<bits<6> op, bits<6> func, string instr_asm, PatFrag cond_op,
- RegisterClass RC>:
- FR<op, func, (outs CPURegs:$rd), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rs, $rt"),
- [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))],
- IIAlu> {
- let shamt = 0;
-}
+class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> :
+ InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt),
+ !strconcat(opstr, "\t$rd, $rs, $rt"),
+ [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>;
-class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
- PatLeaf imm_type, RegisterClass RC>:
- FI<op, (outs CPURegs:$rt), (ins RC:$rs, Od:$imm16),
- !strconcat(instr_asm, "\t$rt, $rs, $imm16"),
- [(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))],
- IIAlu>;
+class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
+ RegisterClass RC>:
+ InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16),
+ !strconcat(opstr, "\t$rt, $rs, $imm16"),
+ [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))], IIAlu, FrmI>;
// Jump
-class JumpFJ<bits<6> op, DAGOperand opnd, string instr_asm,
- SDPatternOperator operator, SDPatternOperator targetoperator>:
- FJ<op, (outs), (ins opnd:$target), !strconcat(instr_asm, "\t$target"),
- [(operator targetoperator:$target)], IIBranch> {
+class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
+ SDPatternOperator targetoperator> :
+ InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
+ [(operator targetoperator:$target)], IIBranch, FrmJ> {
let isTerminator=1;
let isBarrier=1;
let hasDelaySlot = 1;
@@ -614,11 +523,9 @@ class JumpFJ<bits<6> op, DAGOperand opnd, string instr_asm,
}
// Unconditional branch
-class UncondBranch<bits<6> op, string instr_asm>:
- BranchBase<op, (outs), (ins brtarget:$imm16),
- !strconcat(instr_asm, "\t$imm16"), [(br bb:$imm16)], IIBranch> {
- let rs = 0;
- let rt = 0;
+class UncondBranch<string opstr> :
+ InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"),
+ [(br bb:$offset)], IIBranch, FrmI> {
let isBranch = 1;
let isTerminator = 1;
let isBarrier = 1;
@@ -630,11 +537,7 @@ class UncondBranch<bits<6> op, string instr_asm>:
// Base class for indirect branch and return instruction classes.
let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
class JumpFR<RegisterClass RC, SDPatternOperator operator = null_frag>:
- FR<0, 0x8, (outs), (ins RC:$rs), "jr\t$rs", [(operator RC:$rs)], IIBranch> {
- let rt = 0;
- let rd = 0;
- let shamt = 0;
-}
+ InstSE<(outs), (ins RC:$rs), "jr\t$rs", [(operator RC:$rs)], IIBranch, FrmR>;
// Indirect branch
class IndirectBranch<RegisterClass RC>: JumpFR<RC, brind> {
@@ -652,205 +555,170 @@ class RetBase<RegisterClass RC>: JumpFR<RC> {
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1, Defs = [RA] in {
- class JumpLink<bits<6> op, string instr_asm>:
- FJ<op, (outs), (ins calltarget:$target),
- !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
- IIBranch> {
- let DecoderMethod = "DecodeJumpTarget";
- }
-
- class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm,
- RegisterClass RC>:
- FR<op, func, (outs), (ins RC:$rs),
- !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink RC:$rs)], IIBranch> {
- let rt = 0;
- let rd = 31;
- let shamt = 0;
+ class JumpLink<string opstr> :
+ InstSE<(outs), (ins calltarget:$target), !strconcat(opstr, "\t$target"),
+ [(MipsJmpLink imm:$target)], IIBranch, FrmJ> {
+ let DecoderMethod = "DecodeJumpTarget";
}
- class BranchLink<string instr_asm, bits<5> _rt, RegisterClass RC>:
- FI<0x1, (outs), (ins RC:$rs, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch> {
- let rt = _rt;
- }
+ class JumpLinkReg<string opstr, RegisterClass RC>:
+ InstSE<(outs), (ins RC:$rs), !strconcat(opstr, "\t$rs"),
+ [(MipsJmpLink RC:$rs)], IIBranch, FrmR>;
+
+ class BGEZAL_FT<string opstr, RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, brtarget:$offset),
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI>;
+
}
+class BAL_FT :
+ InstSE<(outs), (ins brtarget:$offset), "bal\t$offset", [], IIBranch, FrmI> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 1;
+ let Defs = [RA];
+}
+
+// Sync
+let hasSideEffects = 1 in
+class SYNC_FT :
+ InstSE<(outs), (ins i32imm:$stype), "sync $stype", [(MipsSync imm:$stype)],
+ NoItinerary, FrmOther>;
+
// Mul, Div
-class Mult<bits<6> func, string instr_asm, InstrItinClass itin,
- RegisterClass RC, list<Register> DefRegs>:
- FR<0x00, func, (outs), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$rs, $rt"), [], itin> {
- let rd = 0;
- let shamt = 0;
+class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
+ list<Register> DefRegs> :
+ InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [],
+ itin, FrmR> {
let isCommutable = 1;
let Defs = DefRegs;
let neverHasSideEffects = 1;
}
-class Mult32<bits<6> func, string instr_asm, InstrItinClass itin>:
- Mult<func, instr_asm, itin, CPURegs, [HI, LO]>;
-
-class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin,
- RegisterClass RC, list<Register> DefRegs>:
- FR<0x00, func, (outs), (ins RC:$rs, RC:$rt),
- !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
- [(op RC:$rs, RC:$rt)], itin> {
- let rd = 0;
- let shamt = 0;
+class Div<SDNode op, string opstr, InstrItinClass itin, RegisterOperand RO,
+ list<Register> DefRegs> :
+ InstSE<(outs), (ins RO:$rs, RO:$rt),
+ !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RO:$rs, RO:$rt)], itin,
+ FrmR> {
let Defs = DefRegs;
}
-class Div32<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
- Div<op, func, instr_asm, itin, CPURegs, [HI, LO]>;
-
// Move from Hi/Lo
-class MoveFromLOHI<bits<6> func, string instr_asm, RegisterClass RC,
- list<Register> UseRegs>:
- FR<0x00, func, (outs RC:$rd), (ins),
- !strconcat(instr_asm, "\t$rd"), [], IIHiLo> {
- let rs = 0;
- let rt = 0;
- let shamt = 0;
+class MoveFromLOHI<string opstr, RegisterClass RC, list<Register> UseRegs>:
+ InstSE<(outs RC:$rd), (ins), !strconcat(opstr, "\t$rd"), [], IIHiLo, FrmR> {
let Uses = UseRegs;
let neverHasSideEffects = 1;
}
-class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC,
- list<Register> DefRegs>:
- FR<0x00, func, (outs), (ins RC:$rs),
- !strconcat(instr_asm, "\t$rs"), [], IIHiLo> {
- let rt = 0;
- let rd = 0;
- let shamt = 0;
+class MoveToLOHI<string opstr, RegisterClass RC, list<Register> DefRegs>:
+ InstSE<(outs), (ins RC:$rs), !strconcat(opstr, "\t$rs"), [], IIHiLo, FrmR> {
let Defs = DefRegs;
let neverHasSideEffects = 1;
}
-class EffectiveAddress<bits<6> opc, string instr_asm, RegisterClass RC, Operand Mem> :
- FMem<opc, (outs RC:$rt), (ins Mem:$addr),
- instr_asm, [(set RC:$rt, addr:$addr)], IIAlu> {
- let isCodeGenOnly = 1;
+class EffectiveAddress<string opstr, RegisterClass RC, Operand Mem> :
+ InstSE<(outs RC:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RC:$rt, addr:$addr)], NoItinerary, FrmI> {
+ let isCodeGenOnly = 1;
+ let DecoderMethod = "DecodeMem";
}
// Count Leading Ones/Zeros in Word
-class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>:
- FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
- !strconcat(instr_asm, "\t$rd, $rs"),
- [(set RC:$rd, (ctlz RC:$rs))], IIAlu>,
- Requires<[HasBitCount, HasStdEnc]> {
- let shamt = 0;
- let rt = rd;
-}
+class CountLeading0<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [(set RO:$rd, (ctlz RO:$rs))], IIAlu, FrmR>,
+ Requires<[HasBitCount, HasStdEnc]>;
+
+class CountLeading1<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [(set RO:$rd, (ctlz (not RO:$rs)))], IIAlu, FrmR>,
+ Requires<[HasBitCount, HasStdEnc]>;
-class CountLeading1<bits<6> func, string instr_asm, RegisterClass RC>:
- FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
- !strconcat(instr_asm, "\t$rd, $rs"),
- [(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu>,
- Requires<[HasBitCount, HasStdEnc]> {
- let shamt = 0;
- let rt = rd;
-}
// Sign Extend in Register.
-class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt,
- RegisterClass RC>:
- FR<0x1f, 0x20, (outs RC:$rd), (ins RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rt"),
- [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary> {
- let rs = 0;
- let shamt = sa;
+class SignExtInReg<string opstr, ValueType vt, RegisterClass RC> :
+ InstSE<(outs RC:$rd), (ins RC:$rt), !strconcat(opstr, "\t$rd, $rt"),
+ [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary, FrmR> {
let Predicates = [HasSEInReg, HasStdEnc];
}
// Subword Swap
-class SubwordSwap<bits<6> func, bits<5> sa, string instr_asm, RegisterClass RC>:
- FR<0x1f, func, (outs RC:$rd), (ins RC:$rt),
- !strconcat(instr_asm, "\t$rd, $rt"), [], NoItinerary> {
- let rs = 0;
- let shamt = sa;
+class SubwordSwap<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
+ NoItinerary, FrmR> {
let Predicates = [HasSwap, HasStdEnc];
let neverHasSideEffects = 1;
}
// Read Hardware
-class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass>
- : FR<0x1f, 0x3b, (outs CPURegClass:$rt), (ins HWRegClass:$rd),
- "rdhwr\t$rt, $rd", [], IIAlu> {
- let rs = 0;
- let shamt = 0;
-}
+class ReadHardware<RegisterClass CPURegClass, RegisterOperand RO> :
+ InstSE<(outs CPURegClass:$rt), (ins RO:$rd), "rdhwr\t$rt, $rd", [],
+ IIAlu, FrmR>;
// Ext and Ins
-class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
- FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz),
- !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
- [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> {
- bits<5> pos;
- bits<5> sz;
- let rd = sz;
- let shamt = pos;
+class ExtBase<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ext:$size),
+ !strconcat(opstr, " $rt, $rs, $pos, $size"),
+ [(set RO:$rt, (MipsExt RO:$rs, imm:$pos, imm:$size))], NoItinerary,
+ FrmR> {
let Predicates = [HasMips32r2, HasStdEnc];
}
-class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
- FR<0x1f, _funct, (outs RC:$rt),
- (ins RC:$rs, uimm16:$pos, size_ins:$sz, RC:$src),
- !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
- [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$sz, RC:$src))],
- NoItinerary> {
- bits<5> pos;
- bits<5> sz;
- let rd = sz;
- let shamt = pos;
+class InsBase<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ins:$size, RO:$src),
+ !strconcat(opstr, " $rt, $rs, $pos, $size"),
+ [(set RO:$rt, (MipsIns RO:$rs, imm:$pos, imm:$size, RO:$src))],
+ NoItinerary, FrmR> {
let Predicates = [HasMips32r2, HasStdEnc];
let Constraints = "$src = $rt";
}
// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
-class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC,
- RegisterClass PRC> :
+class Atomic2Ops<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
- !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
[(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
-multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
- def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>,
- Requires<[IsN64, HasStdEnc]> {
+multiclass Atomic2Ops32<PatFrag Op> {
+ def NAME : Atomic2Ops<Op, CPURegs, CPURegs>, Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Atomic2Ops<Op, CPURegs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
}
// Atomic Compare & Swap.
-class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC,
- RegisterClass PRC> :
+class AtomicCmpSwap<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
- !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
[(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
-multiclass AtomicCmpSwap32<PatFrag Op, string Width> {
- def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>,
- Requires<[IsN64, HasStdEnc]> {
+multiclass AtomicCmpSwap32<PatFrag Op> {
+ def NAME : AtomicCmpSwap<Op, CPURegs, CPURegs>,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : AtomicCmpSwap<Op, CPURegs, CPU64Regs>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
}
-class LLBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
- FMem<Opc, (outs RC:$rt), (ins Mem:$addr),
- !strconcat(opstring, "\t$rt, $addr"), [], IILoad> {
+class LLBase<string opstr, RegisterOperand RO, Operand Mem> :
+ InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
let mayLoad = 1;
}
-class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
- FMem<Opc, (outs RC:$dst), (ins RC:$rt, Mem:$addr),
- !strconcat(opstring, "\t$rt, $addr"), [], IIStore> {
+class SCBase<string opstr, RegisterOperand RO, Operand Mem> :
+ InstSE<(outs RO:$dst), (ins RO:$rt, Mem:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMem";
let mayStore = 1;
let Constraints = "$rt = $dst";
}
+class MFC3OP<dag outs, dag ins, string asmstr> :
+ InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR>;
+
//===----------------------------------------------------------------------===//
// Pseudo instructions
//===----------------------------------------------------------------------===//
@@ -859,278 +727,276 @@ class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
// Older Macro based SFI Model
def SFI_GUARD_LOADSTORE :
-MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
- "sfi_load_store_preamble\t$dst, $src1, $src2", []>;
+MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+ "sfi_load_store_preamble\t$dst, $src1, $src2">;
def SFI_GUARD_INDIRECT_CALL :
-MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
- "sfi_indirect_call_preamble\t$dst, $src1, $src2", []>;
+MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+ "sfi_indirect_call_preamble\t$dst, $src1, $src2">;
def SFI_GUARD_INDIRECT_JMP :
-MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
- "sfi_indirect_jump_preamble\t$dst, $src1, $src2", []>;
+MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+ "sfi_indirect_jump_preamble\t$dst, $src1, $src2">;
def SFI_GUARD_CALL :
-MipsPseudo<(outs), (ins), "sfi_call_preamble", []>;
+MipsAsmPseudoInst<(outs), (ins), "sfi_call_preamble">;
def SFI_GUARD_RETURN :
-MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
- "sfi_return_preamble\t$dst, $src1, $src2", []>;
+MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+ "sfi_return_preamble\t$dst, $src1, $src2">;
def SFI_NOP_IF_AT_BUNDLE_END :
-MipsPseudo<(outs), (ins), "sfi_nop_if_at_bundle_end", []>;
+MipsAsmPseudoInst<(outs), (ins), "sfi_nop_if_at_bundle_end">;
def SFI_DATA_MASK :
-MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
- "sfi_data_mask\t$dst, $src1, $src2", []>;
+MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+ "sfi_data_mask\t$dst, $src1, $src2">;
// @LOCALMOD-END
// Return RA.
let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
-def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>;
+def RetRA : PseudoSE<(outs), (ins), [(MipsRet)]>;
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt),
- "!ADJCALLSTACKDOWN $amt",
[(callseq_start timm:$amt)]>;
def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- "!ADJCALLSTACKUP $amt1",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
-// When handling PIC code the assembler needs .cpload and .cprestore
-// directives. If the real instructions corresponding these directives
-// are used, we have the same behavior, but get also a bunch of warnings
-// from the assembler.
-let neverHasSideEffects = 1 in
-def CPRESTORE : PseudoSE<(outs), (ins i32imm:$loc, CPURegs:$gp),
- ".cprestore\t$loc", []>;
-
let usesCustomInserter = 1 in {
- defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8, "load_add_8">;
- defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16, "load_add_16">;
- defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32<atomic_load_add_32, "load_add_32">;
- defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32<atomic_load_sub_8, "load_sub_8">;
- defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32<atomic_load_sub_16, "load_sub_16">;
- defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32<atomic_load_sub_32, "load_sub_32">;
- defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32<atomic_load_and_8, "load_and_8">;
- defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32<atomic_load_and_16, "load_and_16">;
- defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32<atomic_load_and_32, "load_and_32">;
- defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32<atomic_load_or_8, "load_or_8">;
- defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32<atomic_load_or_16, "load_or_16">;
- defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32<atomic_load_or_32, "load_or_32">;
- defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32<atomic_load_xor_8, "load_xor_8">;
- defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32<atomic_load_xor_16, "load_xor_16">;
- defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32<atomic_load_xor_32, "load_xor_32">;
- defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32<atomic_load_nand_8, "load_nand_8">;
- defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16, "load_nand_16">;
- defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32, "load_nand_32">;
-
- defm ATOMIC_SWAP_I8 : Atomic2Ops32<atomic_swap_8, "swap_8">;
- defm ATOMIC_SWAP_I16 : Atomic2Ops32<atomic_swap_16, "swap_16">;
- defm ATOMIC_SWAP_I32 : Atomic2Ops32<atomic_swap_32, "swap_32">;
-
- defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32<atomic_cmp_swap_8, "8">;
- defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32<atomic_cmp_swap_16, "16">;
- defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32, "32">;
+ defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8>;
+ defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16>;
+ defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32<atomic_load_add_32>;
+ defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32<atomic_load_sub_8>;
+ defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32<atomic_load_sub_16>;
+ defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32<atomic_load_sub_32>;
+ defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32<atomic_load_and_8>;
+ defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32<atomic_load_and_16>;
+ defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32<atomic_load_and_32>;
+ defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32<atomic_load_or_8>;
+ defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32<atomic_load_or_16>;
+ defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32<atomic_load_or_32>;
+ defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32<atomic_load_xor_8>;
+ defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32<atomic_load_xor_16>;
+ defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32<atomic_load_xor_32>;
+ defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32<atomic_load_nand_8>;
+ defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16>;
+ defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32>;
+
+ defm ATOMIC_SWAP_I8 : Atomic2Ops32<atomic_swap_8>;
+ defm ATOMIC_SWAP_I16 : Atomic2Ops32<atomic_swap_16>;
+ defm ATOMIC_SWAP_I32 : Atomic2Ops32<atomic_swap_32>;
+
+ defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32<atomic_cmp_swap_8>;
+ defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32<atomic_cmp_swap_16>;
+ defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32>;
}
//===----------------------------------------------------------------------===//
// Instruction definition
//===----------------------------------------------------------------------===//
-
-class LoadImm32< string instr_asm, Operand Od, RegisterClass RC> :
- MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32),
- !strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32Reg : LoadImm32<"li", shamt,CPURegs>;
-
-class LoadAddress<string instr_asm, Operand MemOpnd, RegisterClass RC> :
- MipsAsmPseudoInst<(outs RC:$rt), (ins MemOpnd:$addr),
- !strconcat(instr_asm, "\t$rt, $addr")> ;
-def LoadAddr32Reg : LoadAddress<"la", mem, CPURegs>;
-
-class LoadAddressImm<string instr_asm, Operand Od, RegisterClass RC> :
- MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32),
- !strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegs>;
-
//===----------------------------------------------------------------------===//
// MipsI Instructions
//===----------------------------------------------------------------------===//
/// Arithmetic Instructions (ALU Immediate)
-def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>,
- IsAsCheapAsAMove;
-def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16, CPURegs>;
-def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16, CPURegs>;
-def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>;
-def ANDi : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>;
-def ORi : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>;
-def XORi : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>;
-def LUi : LoadUpper<0x0f, "lui", CPURegs, uimm16>;
+def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
+ ADDI_FM<0x9>, IsAsCheapAsAMove;
+def ADDi : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
+def SLTi : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>;
+def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>;
+def ANDi : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>, ADDI_FM<0xc>;
+def ORi : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>, ADDI_FM<0xd>;
+def XORi : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>, ADDI_FM<0xe>;
+def LUi : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
/// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>;
-def SUBu : ArithLogicR<0x00, 0x23, "subu", sub, IIAlu, CPURegs>;
-def ADD : ArithOverflowR<0x00, 0x20, "add", IIAlu, CPURegs, 1>;
-def SUB : ArithOverflowR<0x00, 0x22, "sub", IIAlu, CPURegs>;
-def SLT : SetCC_R<0x00, 0x2a, "slt", setlt, CPURegs>;
-def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult, CPURegs>;
-def AND : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPURegs, 1>;
-def OR : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPURegs, 1>;
-def XOR : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPURegs, 1>;
-def NOR : LogicNOR<0x00, 0x27, "nor", CPURegs>;
+def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>;
+def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>;
+def MUL : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>;
+def ADD : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
+def SUB : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
+def SLT : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
+def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
+def AND : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
+def OR : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
+def XOR : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
+def NOR : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
-def SLL : shift_rotate_imm32<0x00, 0x00, "sll", shl>;
-def SRL : shift_rotate_imm32<0x02, 0x00, "srl", srl>;
-def SRA : shift_rotate_imm32<0x03, 0x00, "sra", sra>;
-def SLLV : shift_rotate_reg<0x04, 0x00, "sllv", shl, CPURegs>;
-def SRLV : shift_rotate_reg<0x06, 0x00, "srlv", srl, CPURegs>;
-def SRAV : shift_rotate_reg<0x07, 0x00, "srav", sra, CPURegs>;
+def SLL : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>, SRA_FM<0, 0>;
+def SRL : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>, SRA_FM<2, 0>;
+def SRA : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>, SRA_FM<3, 0>;
+def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>;
+def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>;
+def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>;
// Rotate Instructions
let Predicates = [HasMips32r2, HasStdEnc] in {
- def ROTR : shift_rotate_imm32<0x02, 0x01, "rotr", rotr>;
- def ROTRV : shift_rotate_reg<0x06, 0x01, "rotrv", rotr, CPURegs>;
+ def ROTR : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>,
+ SRA_FM<2, 1>;
+ def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>;
}
/// Load and Store Instructions
/// aligned
-defm LB : LoadM32<0x20, "lb", sextloadi8>;
-defm LBu : LoadM32<0x24, "lbu", zextloadi8>;
-defm LH : LoadM32<0x21, "lh", sextloadi16>;
-defm LHu : LoadM32<0x25, "lhu", zextloadi16>;
-defm LW : LoadM32<0x23, "lw", load>;
-defm SB : StoreM32<0x28, "sb", truncstorei8>;
-defm SH : StoreM32<0x29, "sh", truncstorei16>;
-defm SW : StoreM32<0x2b, "sw", store>;
+defm LB : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>;
+defm LBu : LoadM<"lbu", CPURegs, zextloadi8>, LW_FM<0x24>;
+defm LH : LoadM<"lh", CPURegs, sextloadi16>, LW_FM<0x21>;
+defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>;
+defm LW : LoadM<"lw", CPURegs, load>, LW_FM<0x23>;
+defm SB : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>;
+defm SH : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>;
+defm SW : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>;
/// load/store left/right
-defm LWL : LoadLeftRightM32<0x22, "lwl", MipsLWL>;
-defm LWR : LoadLeftRightM32<0x26, "lwr", MipsLWR>;
-defm SWL : StoreLeftRightM32<0x2a, "swl", MipsSWL>;
-defm SWR : StoreLeftRightM32<0x2e, "swr", MipsSWR>;
+defm LWL : LoadLeftRightM<"lwl", MipsLWL, CPURegs>, LW_FM<0x22>;
+defm LWR : LoadLeftRightM<"lwr", MipsLWR, CPURegs>, LW_FM<0x26>;
+defm SWL : StoreLeftRightM<"swl", MipsSWL, CPURegs>, LW_FM<0x2a>;
+defm SWR : StoreLeftRightM<"swr", MipsSWR, CPURegs>, LW_FM<0x2e>;
-let hasSideEffects = 1 in
-def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype",
- [(MipsSync imm:$stype)], NoItinerary, FrmOther>
-{
- bits<5> stype;
- let Opcode = 0;
- let Inst{25-11} = 0;
- let Inst{10-6} = stype;
- let Inst{5-0} = 15;
-}
+def SYNC : SYNC_FT, SYNC_FM;
/// Load-linked, Store-conditional
-def LL : LLBase<0x30, "ll", CPURegs, mem>,
- Requires<[NotN64, HasStdEnc]>;
-def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
+let Predicates = [NotN64, HasStdEnc] in {
+ def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>;
+ def SC : SCBase<"sc", CPURegsOpnd, mem>, LW_FM<0x38>;
}
-def SC : SCBase<0x38, "sc", CPURegs, mem>,
- Requires<[NotN64, HasStdEnc]>;
-def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
+let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def LL_P8 : LLBase<"ll", CPURegsOpnd, mem64>, LW_FM<0x30>;
+ def SC_P8 : SCBase<"sc", CPURegsOpnd, mem64>, LW_FM<0x38>;
}
/// Jump and Branch Instructions
-def J : JumpFJ<0x02, jmptarget, "j", br, bb>,
+def J : JumpFJ<jmptarget, "j", br, bb>, FJ<2>,
Requires<[RelocStatic, HasStdEnc]>, IsBranch;
-def JR : IndirectBranch<CPURegs>;
-def B : UncondBranch<0x04, "b">;
-def BEQ : CBranch<0x04, "beq", seteq, CPURegs>;
-def BNE : CBranch<0x05, "bne", setne, CPURegs>;
-def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>;
-def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>;
-def BLEZ : CBranchZero<0x06, 0, "blez", setle, CPURegs>;
-def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>;
-
-let rt = 0, rs = 0, isBranch = 1, isTerminator = 1, isBarrier = 1,
- hasDelaySlot = 1, Defs = [RA] in
-def BAL_BR: FI<0x1, (outs), (ins brtarget:$imm16), "bal\t$imm16", [], IIBranch>;
-
-def JAL : JumpLink<0x03, "jal">;
-def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>;
-def BGEZAL : BranchLink<"bgezal", 0x11, CPURegs>;
-def BLTZAL : BranchLink<"bltzal", 0x10, CPURegs>;
-def TAILCALL : JumpFJ<0x02, calltarget, "j", MipsTailCall, imm>, IsTailCall;
-def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, IsTailCall;
-
-def RET : RetBase<CPURegs>;
+def JR : IndirectBranch<CPURegs>, MTLO_FM<8>;
+def B : UncondBranch<"b">, B_FM;
+def BEQ : CBranch<"beq", seteq, CPURegs>, BEQ_FM<4>;
+def BNE : CBranch<"bne", setne, CPURegs>, BEQ_FM<5>;
+def BGEZ : CBranchZero<"bgez", setge, CPURegs>, BGEZ_FM<1, 1>;
+def BGTZ : CBranchZero<"bgtz", setgt, CPURegs>, BGEZ_FM<7, 0>;
+def BLEZ : CBranchZero<"blez", setle, CPURegs>, BGEZ_FM<6, 0>;
+def BLTZ : CBranchZero<"bltz", setlt, CPURegs>, BGEZ_FM<1, 0>;
+
+def BAL_BR: BAL_FT, BAL_FM;
+
+def JAL : JumpLink<"jal">, FJ<3>;
+def JALR : JumpLinkReg<"jalr", CPURegs>, JALR_FM;
+def BGEZAL : BGEZAL_FT<"bgezal", CPURegsOpnd>, BGEZAL_FM<0x11>;
+def BLTZAL : BGEZAL_FT<"bltzal", CPURegsOpnd>, BGEZAL_FM<0x10>;
+def TAILCALL : JumpFJ<calltarget, "j", MipsTailCall, imm>, FJ<2>, IsTailCall;
+def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, MTLO_FM<8>, IsTailCall;
+
+def RET : RetBase<CPURegs>, MTLO_FM<8>;
/// Multiply and Divide Instructions.
-def MULT : Mult32<0x18, "mult", IIImul>;
-def MULTu : Mult32<0x19, "multu", IIImul>;
-def SDIV : Div32<MipsDivRem, 0x1a, "div", IIIdiv>;
-def UDIV : Div32<MipsDivRemU, 0x1b, "divu", IIIdiv>;
+def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>;
+def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>;
+def SDIV : Div<MipsDivRem, "div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>;
+def UDIV : Div<MipsDivRemU, "divu", IIIdiv, CPURegsOpnd, [HI, LO]>,
+ MULT_FM<0, 0x1b>;
-def MTHI : MoveToLOHI<0x11, "mthi", CPURegs, [HI]>;
-def MTLO : MoveToLOHI<0x13, "mtlo", CPURegs, [LO]>;
-def MFHI : MoveFromLOHI<0x10, "mfhi", CPURegs, [HI]>;
-def MFLO : MoveFromLOHI<0x12, "mflo", CPURegs, [LO]>;
+def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>;
+def MTLO : MoveToLOHI<"mtlo", CPURegs, [LO]>, MTLO_FM<0x13>;
+def MFHI : MoveFromLOHI<"mfhi", CPURegs, [HI]>, MFLO_FM<0x10>;
+def MFLO : MoveFromLOHI<"mflo", CPURegs, [LO]>, MFLO_FM<0x12>;
/// Sign Ext In Register Instructions.
-def SEB : SignExtInReg<0x10, "seb", i8, CPURegs>;
-def SEH : SignExtInReg<0x18, "seh", i16, CPURegs>;
+def SEB : SignExtInReg<"seb", i8, CPURegs>, SEB_FM<0x10, 0x20>;
+def SEH : SignExtInReg<"seh", i16, CPURegs>, SEB_FM<0x18, 0x20>;
/// Count Leading
-def CLZ : CountLeading0<0x20, "clz", CPURegs>;
-def CLO : CountLeading1<0x21, "clo", CPURegs>;
+def CLZ : CountLeading0<"clz", CPURegsOpnd>, CLO_FM<0x20>;
+def CLO : CountLeading1<"clo", CPURegsOpnd>, CLO_FM<0x21>;
/// Word Swap Bytes Within Halfwords
-def WSBH : SubwordSwap<0x20, 0x2, "wsbh", CPURegs>;
+def WSBH : SubwordSwap<"wsbh", CPURegsOpnd>, SEB_FM<2, 0x20>;
-/// No operation
-let addr=0 in
- def NOP : FJ<0, (outs), (ins), "nop", [], IIAlu>;
+/// No operation.
+/// FIXME: NOP should be an alias of "sll $0, $0, 0".
+def NOP : InstSE<(outs), (ins), "nop", [], IIAlu, FrmJ>, NOP_FM;
// FrameIndexes are legalized when they are operands from load/store
// instructions. The same not happens for stack address copies, so an
// add op with mem ComplexPattern is used and the stack address copy
// can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
+def LEA_ADDiu : EffectiveAddress<"addiu", CPURegs, mem_ea>, LW_FM<9>;
// MADD*/MSUB*
-def MADD : MArithR<0, "madd", MipsMAdd, 1>;
-def MADDU : MArithR<1, "maddu", MipsMAddu, 1>;
-def MSUB : MArithR<4, "msub", MipsMSub>;
-def MSUBU : MArithR<5, "msubu", MipsMSubu>;
+def MADD : MArithR<"madd", MipsMAdd, 1>, MULT_FM<0x1c, 0>;
+def MADDU : MArithR<"maddu", MipsMAddu, 1>, MULT_FM<0x1c, 1>;
+def MSUB : MArithR<"msub", MipsMSub>, MULT_FM<0x1c, 4>;
+def MSUBU : MArithR<"msubu", MipsMSubu>, MULT_FM<0x1c, 5>;
+
+def RDHWR : ReadHardware<CPURegs, HWRegsOpnd>, RDHWR_FM;
-// MUL is a assembly macro in the current used ISAs. In recent ISA's
-// it is a real instruction.
-def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
- Requires<[HasStdEnc]>;
+def EXT : ExtBase<"ext", CPURegsOpnd>, EXT_FM<0>;
+def INS : InsBase<"ins", CPURegsOpnd>, EXT_FM<4>;
-def RDHWR : ReadHardware<CPURegs, HWRegs>;
+/// Move Control Registers From/To CPU Registers
+def MFC0_3OP : MFC3OP<(outs CPURegs:$rt), (ins CPURegs:$rd, uimm16:$sel),
+ "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>;
+
+def MTC0_3OP : MFC3OP<(outs CPURegs:$rd, uimm16:$sel), (ins CPURegs:$rt),
+ "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>;
+
+def MFC2_3OP : MFC3OP<(outs CPURegs:$rt), (ins CPURegs:$rd, uimm16:$sel),
+ "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>;
-def EXT : ExtBase<0, "ext", CPURegs>;
-def INS : InsBase<4, "ins", CPURegs>;
+def MTC2_3OP : MFC3OP<(outs CPURegs:$rd, uimm16:$sel), (ins CPURegs:$rt),
+ "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>;
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
-def : InstAlias<"move $dst,$src", (ADD CPURegs:$dst,CPURegs:$src,ZERO)>;
-def : InstAlias<"bal $offset", (BGEZAL RA,brtarget:$offset)>;
-def : InstAlias<"addu $rs,$rt,$imm",
- (ADDiu CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"add $rs,$rt,$imm",
- (ADDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"and $rs,$rt,$imm",
- (ANDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"j $rs", (JR CPURegs:$rs)>;
-def : InstAlias<"not $rt,$rs", (NOR CPURegs:$rt,CPURegs:$rs,ZERO)>;
-def : InstAlias<"neg $rt,$rs", (SUB CPURegs:$rt,ZERO,CPURegs:$rs)>;
-def : InstAlias<"negu $rt,$rs", (SUBu CPURegs:$rt,ZERO,CPURegs:$rs)>;
-def : InstAlias<"slt $rs,$rt,$imm",
- (SLTi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"xor $rs,$rt,$imm",
- (XORi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
+def : InstAlias<"move $dst,$src", (ADDu CPURegsOpnd:$dst,
+ CPURegsOpnd:$src,ZERO)>, Requires<[NotMips64]>;
+def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset)>;
+def : InstAlias<"addu $rs, $rt, $imm",
+ (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>;
+def : InstAlias<"add $rs, $rt, $imm",
+ (ADDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>;
+def : InstAlias<"and $rs, $rt, $imm",
+ (ANDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>;
+def : InstAlias<"j $rs", (JR CPURegs:$rs)>, Requires<[NotMips64]>;
+def : InstAlias<"not $rt, $rs", (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO)>;
+def : InstAlias<"neg $rt, $rs", (SUB CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs)>;
+def : InstAlias<"negu $rt, $rs", (SUBu CPURegsOpnd:$rt, ZERO,
+ CPURegsOpnd:$rs)>;
+def : InstAlias<"slt $rs, $rt, $imm",
+ (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm)>;
+def : InstAlias<"xor $rs, $rt, $imm",
+ (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>,
+ Requires<[NotMips64]>;
+def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegs:$rt, CPURegs:$rd, 0)>;
+def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegs:$rd, 0, CPURegs:$rt)>;
+def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegs:$rt, CPURegs:$rd, 0)>;
+def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegs:$rd, 0, CPURegs:$rt)>;
+
+//===----------------------------------------------------------------------===//
+// Assembler Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
+ !strconcat(instr_asm, "\t$rt, $imm32")> ;
+def LoadImm32Reg : LoadImm32<"li", shamt,CPURegsOpnd>;
+
+class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr),
+ !strconcat(instr_asm, "\t$rt, $addr")> ;
+def LoadAddr32Reg : LoadAddress<"la", mem, CPURegsOpnd>;
+
+class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
+ !strconcat(instr_asm, "\t$rt, $imm32")> ;
+def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>;
+
+
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
@@ -1215,7 +1081,7 @@ def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>;
// Mips does not have "not", so we expand our way
def : MipsPat<(not CPURegs:$in),
- (NOR CPURegs:$in, ZERO)>;
+ (NOR CPURegsOpnd:$in, ZERO)>;
// extended loads
let Predicates = [NotN64, HasStdEnc] in {
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index 5fd600ebc9..1b2a325d3c 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -17,7 +17,7 @@
#include "MipsRelocations.h"
#include "MipsSubtarget.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Memory.h"
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 1d53a1508e..30f68b156e 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -24,7 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 11eef6591c..0c71596cae 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -13,7 +13,7 @@
#include "MipsSubtarget.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
diff --git a/lib/Target/Mips/MipsNaClRewritePass.cpp b/lib/Target/Mips/MipsNaClRewritePass.cpp
index 1ef5632e09..5e4e5e3b8f 100644
--- a/lib/Target/Mips/MipsNaClRewritePass.cpp
+++ b/lib/Target/Mips/MipsNaClRewritePass.cpp
@@ -28,7 +28,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/CommandLine.h"
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 36d8971579..98464c7b10 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -25,8 +25,9 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Constants.h"
#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -35,7 +36,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
#define GET_REGINFO_TARGET_DESC
#include "MipsGenRegisterInfo.inc"
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index f07a10c3dd..c6eb0e1e87 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -331,3 +331,48 @@ def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
// Accumulator Registers
def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>;
+
+def CPURegsAsmOperand : AsmOperandClass {
+ let Name = "CPURegsAsm";
+ let ParserMethod = "parseCPURegs";
+}
+
+def CPU64RegsAsmOperand : AsmOperandClass {
+ let Name = "CPU64RegsAsm";
+ let ParserMethod = "parseCPU64Regs";
+}
+
+def CCRAsmOperand : AsmOperandClass {
+ let Name = "CCRAsm";
+ let ParserMethod = "parseCCRRegs";
+}
+
+def CPURegsOpnd : RegisterOperand<CPURegs, "printCPURegs"> {
+ let ParserMatchClass = CPURegsAsmOperand;
+}
+
+def CPU64RegsOpnd : RegisterOperand<CPU64Regs, "printCPURegs"> {
+ let ParserMatchClass = CPU64RegsAsmOperand;
+}
+
+def CCROpnd : RegisterOperand<CCR, "printCPURegs"> {
+ let ParserMatchClass = CCRAsmOperand;
+}
+
+def HWRegsAsmOperand : AsmOperandClass {
+ let Name = "HWRegsAsm";
+ let ParserMethod = "parseHWRegs";
+}
+
+def HW64RegsAsmOperand : AsmOperandClass {
+ let Name = "HW64RegsAsm";
+ let ParserMethod = "parseHW64Regs";
+}
+
+def HWRegsOpnd : RegisterOperand<HWRegs, "printCPURegs"> {
+ let ParserMatchClass = HWRegsAsmOperand;
+}
+
+def HW64RegsOpnd : RegisterOperand<HWRegs, "printCPURegs"> {
+ let ParserMatchClass = HW64RegsAsmOperand;
+}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index f7603a9d74..60b12337d7 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -22,8 +22,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Function.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index a714411f22..cd8f9f41d7 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -90,7 +90,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
if (Mips::CPURegsRegClass.contains(SrcReg))
- Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+ Opc = Mips::OR, ZeroReg = Mips::ZERO;
else if (Mips::CCRRegClass.contains(SrcReg))
Opc = Mips::CFC1;
else if (Mips::FGR32RegClass.contains(SrcReg))
@@ -120,7 +120,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = Mips::MOVCCRToCCR;
else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
if (Mips::CPU64RegsRegClass.contains(SrcReg))
- Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+ Opc = Mips::OR64, ZeroReg = Mips::ZERO_64;
else if (SrcReg == Mips::HI64)
Opc = Mips::MFHI64, SrcReg = 0;
else if (SrcReg == Mips::LO64)
@@ -144,11 +144,11 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (DestReg)
MIB.addReg(DestReg, RegState::Define);
- if (ZeroReg)
- MIB.addReg(ZeroReg);
-
if (SrcReg)
MIB.addReg(SrcReg, getKillRegState(KillSrc));
+
+ if (ZeroReg)
+ MIB.addReg(ZeroReg);
}
void MipsSEInstrInfo::
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 44b1827f8d..abeab7b61b 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -25,9 +25,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Constants.h"
#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -36,7 +37,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
using namespace llvm;
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 83b58a9b53..da6ca892a1 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -45,15 +45,16 @@ MipsTargetMachine(const Target &T, StringRef TT,
Subtarget(TT, CPU, FS, isLittle, RM),
DL(isLittle ?
(Subtarget.isABI_N64() ?
- "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
- "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+ "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
+ "n32:64-S128" :
+ "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64") :
(Subtarget.isABI_N64() ?
- "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
- "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+ "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
+ "n32:64-S128" :
+ "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
InstrInfo(MipsInstrInfo::create(*this)),
FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
- TLInfo(*this), TSInfo(*this), JITInfo(),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ TLInfo(*this), TSInfo(*this), JITInfo() {
}
void MipsebTargetMachine::anchor() { }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 8b1a06f320..c4928c21eb 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -20,10 +20,10 @@
#include "MipsJITInfo.h"
#include "MipsSelectionDAGInfo.h"
#include "MipsSubtarget.h"
-#include "llvm/DataLayout.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
class formatted_raw_ostream;
@@ -32,13 +32,11 @@ class MipsRegisterInfo;
class MipsTargetMachine : public LLVMTargetMachine {
MipsSubtarget Subtarget;
const DataLayout DL; // Calculates type size & alignment
- const MipsInstrInfo *InstrInfo;
- const MipsFrameLowering *FrameLowering;
+ OwningPtr<const MipsInstrInfo> InstrInfo;
+ OwningPtr<const MipsFrameLowering> FrameLowering;
MipsTargetLowering TLInfo;
MipsSelectionDAGInfo TSInfo;
MipsJITInfo JITInfo;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
MipsTargetMachine(const Target &T, StringRef TT,
@@ -47,12 +45,12 @@ public:
CodeGenOpt::Level OL,
bool isLittle);
- virtual ~MipsTargetMachine() { delete InstrInfo; }
+ virtual ~MipsTargetMachine() {}
virtual const MipsInstrInfo *getInstrInfo() const
- { return InstrInfo; }
+ { return InstrInfo.get(); }
virtual const TargetFrameLowering *getFrameLowering() const
- { return FrameLowering; }
+ { return FrameLowering.get(); }
virtual const MipsSubtarget *getSubtargetImpl() const
{ return &Subtarget; }
virtual const DataLayout *getDataLayout() const
@@ -72,13 +70,6 @@ public:
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
-
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 59d07dae12..4270b79933 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -9,9 +9,9 @@
#include "MipsTargetObjectFile.h"
#include "MipsSubtarget.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/CommandLine.h"
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index 243632b20a..3615c146a5 100644
--- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "Mips.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index b29a2962aa..097b50aa4e 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -16,10 +16,10 @@
#define LLVM_TARGET_NVPTX_H
#include "MCTargetDesc/NVPTXBaseInfo.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Value.h"
#include <cassert>
#include <iosfwd>
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 38c7083fea..60f52a46da 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "NVPTXAllocaHoisting.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
namespace llvm {
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index 768d514dbb..19d73c5783 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -15,7 +15,7 @@
#define NVPTX_ALLOCA_HOISTING_H_
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Pass.h"
namespace llvm {
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index af07576d59..22da8f33d7 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -29,12 +29,13 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
@@ -164,20 +165,14 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
case Instruction::GetElementPtr: {
const DataLayout &TD = *AP.TM.getDataLayout();
// Generate a symbolic expression for the byte address
- const Constant *PtrVal = CE->getOperand(0);
- SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
- int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
+ APInt OffsetAI(TD.getPointerSizeInBits(), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
- if (Offset == 0)
+ if (!OffsetAI)
return Base;
- // Truncate/sext the offset to the pointer size.
- if (TD.getPointerSizeInBits() != 64) {
- int SExtAmount = 64-TD.getPointerSizeInBits();
- Offset = (Offset << SExtAmount) >> SExtAmount;
- }
-
+ int64_t Offset = OffsetAI.getSExtValue();
return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
Ctx);
}
@@ -1521,8 +1516,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
continue;
}
- if (PAL.getParamAttributes(paramIndex+1).
- hasAttribute(Attributes::ByVal) == false) {
+ if (PAL.hasAttribute(paramIndex+1, Attribute::ByVal) == false) {
// Just a scalar
const PointerType *PTy = dyn_cast<PointerType>(Ty);
if (isKernelFunc) {
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 29db4abe6c..42498f0bf7 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -21,7 +21,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index d68e3b6871..36ab7f5c15 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -13,8 +13,8 @@
#include "NVPTXISelDAGToDAG.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 29e4f17962..14f2091a3f 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -18,7 +18,7 @@
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 4e9d68687a..b3ab9fc9d0 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -23,13 +23,13 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Module.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -1022,7 +1022,7 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
// to newly created nodes. The SDNOdes for params have to
// appear in the same order as their order of appearance
// in the original function. "idx+1" holds that order.
- if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) {
+ if (PAL.hasAttribute(i+1, Attribute::ByVal) == false) {
// A plain scalar.
if (isABI || isKernel) {
// If ABI, load from the param symbol
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index cd50deb26a..6fe654cb49 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -16,7 +16,7 @@
#include "NVPTXTargetMachine.h"
#define GET_INSTRINFO_CTOR
#include "NVPTXGenInstrInfo.inc"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 8e40c965bf..f7fa7aa61d 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -12,15 +12,15 @@
//===----------------------------------------------------------------------===//
#include "NVPTXLowerAggrCopies.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/InstIterator.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index 9bafce274e..286e753fa9 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -16,7 +16,7 @@
#define NVPTX_LOWER_AGGR_COPIES_H
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Pass.h"
namespace llvm {
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index 9c832e1b51..e166be5a68 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -14,7 +14,7 @@
#ifndef LLVM_NVPTXSECTION_H
#define LLVM_NVPTXSECTION_H
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCSection.h"
#include <vector>
@@ -38,6 +38,8 @@ public:
virtual bool isBaseAddressKnownZero() const { return true; }
virtual bool UseCodeAlign() const { return false; }
virtual bool isVirtualSection() const { return false; }
+ virtual std::string getLabelBeginName() const { return ""; }
+ virtual std::string getLabelEndName() const { return ""; }
};
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
index 6171292eeb..babe29500d 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
@@ -13,10 +13,10 @@
#include "NVPTXSplitBBatBar.h"
#include "NVPTXUtilities.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/InstIterator.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 0867a4e01e..b4e049ea3e 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -25,7 +25,7 @@
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -35,7 +35,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -72,8 +71,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64bit),
DL(Subtarget.getDataLayout()),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit),
- STTI(&TLInfo), VTTI(&TLInfo)
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
}
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index b4763a5bae..1a732be1ad 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -21,11 +21,10 @@
#include "NVPTXInstrInfo.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXSubtarget.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -45,9 +44,6 @@ class NVPTXTargetMachine : public LLVMTargetMachine {
// Hold Strings that can be free'd all together with NVPTXTargetMachine
ManagedStringPool ManagedStrPool;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
-
//bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
// bool DisableVerify, MCContext *&OutCtx);
@@ -76,12 +72,6 @@ public:
virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
//virtual bool addInstSelector(PassManagerBase &PM,
// CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index fff3f43bb0..1ccc9f7c02 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -12,11 +12,11 @@
#include "NVPTXUtilities.h"
#include "NVPTX.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include <algorithm>
#include <cstring>
#include <map>
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index 5ea7bc8640..247e09b8bc 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -14,10 +14,10 @@
#ifndef NVPTXUTILITIES_H
#define NVPTXUTILITIES_H
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Value.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
#include <cstdarg>
#include <set>
#include <string>
diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index f3624b9f23..6c801b875e 100644
--- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp
index e67e2e42b3..f1b285dd78 100644
--- a/lib/Target/NVPTX/VectorElementize.cpp
+++ b/lib/Target/NVPTX/VectorElementize.cpp
@@ -43,15 +43,15 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Constant.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
using namespace llvm;
@@ -243,7 +243,7 @@ void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies) {
copies.push_back(F.CloneMachineInstr(Instr));
- MachineInstr *copy=copies[0];
+ MachineInstrBuilder copy(F, copies[0]);
copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
// Remove the dest, that should be a vector operand.
@@ -260,12 +260,11 @@ void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr,
for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
copy->RemoveOperand(0);
- for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) {
- copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
- }
+ for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
+ copy.addReg(scalarRegs[i], RegState::Define);
for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy->addOperand(otherOperands[i]);
+ copy.addOperand(otherOperands[i]);
}
@@ -280,7 +279,7 @@ void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies) {
copies.push_back(F.CloneMachineInstr(Instr));
- MachineInstr *copy=copies[0];
+ MachineInstrBuilder copy(F, copies[0]);
copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
MachineOperand src = copy->getOperand(0);
@@ -297,10 +296,10 @@ void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr,
copy->RemoveOperand(0);
for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
- copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false));
+ copy.addReg(scalarRegs[i]);
for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy->addOperand(otherOperands[i]);
+ copy.addOperand(otherOperands[i]);
}
///=============================================================================
@@ -327,8 +326,8 @@ void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr,
DebugLoc DL = Instr->getDebugLoc();
for (unsigned i=0; i<numcopies; i++) {
- MachineInstr *copy = BuildMI(F, DL,
- InstrInfo->get(getScalarVersion(Instr)), dest[i]);
+ MachineInstrBuilder copy =
+ BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]);
MachineOperand which=Instr->getOperand(3+i);
assert(which.isImm() && "Shuffle operand not a constant");
@@ -336,9 +335,9 @@ void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr,
int elem=src%numcopies;
if (which.getImm() < numcopies)
- copy->addOperand(MachineOperand::CreateReg(src1[elem], false));
+ copy.addReg(src1[elem]);
else
- copy->addOperand(MachineOperand::CreateReg(src2[elem], false));
+ copy.addReg(src2[elem]);
copies.push_back(copy);
}
}
@@ -358,12 +357,9 @@ void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr,
assert(which.isImm() && "Extract operand not a constant");
DebugLoc DL = Instr->getDebugLoc();
-
- MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
- Instr->getOperand(0).getReg());
- copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false));
-
- copies.push_back(copy);
+ copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
+ Instr->getOperand(0).getReg())
+ .addReg(src[which.getImm()]));
}
///=============================================================================
@@ -389,13 +385,13 @@ void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr,
DebugLoc DL = Instr->getDebugLoc();
for (unsigned i=0; i<numcopies; i++) {
- MachineInstr *copy = BuildMI(F, DL,
- InstrInfo->get(getScalarVersion(Instr)), dest[i]);
+ MachineInstrBuilder copy =
+ BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]);
if (i != elem)
- copy->addOperand(MachineOperand::CreateReg(src[i], false));
+ copy.addReg(src[i]);
else
- copy->addOperand(Instr->getOperand(2));
+ copy.addOperand(Instr->getOperand(2));
copies.push_back(copy);
}
@@ -418,15 +414,10 @@ void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr,
DebugLoc DL = Instr->getDebugLoc();
- for (unsigned i=0; i<numcopies; i++) {
- MachineInstr *copy = BuildMI(F, DL,
- InstrInfo->get(getScalarVersion(Instr)), dest[i]);
-
- copy->addOperand(Instr->getOperand(1+i));
-
- copies.push_back(copy);
- }
-
+ for (unsigned i=0; i<numcopies; i++)
+ copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
+ dest[i])
+ .addOperand(Instr->getOperand(1+i)));
}
///=============================================================================
@@ -439,7 +430,7 @@ void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr,
std::vector<MachineInstr *>& copies) {
copies.push_back(F.CloneMachineInstr(Instr));
- MachineInstr *copy=copies[0];
+ MachineInstrBuilder copy(F, copies[0]);
copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
// Remove the dest, that should be a vector operand.
@@ -457,10 +448,10 @@ void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr,
copy->RemoveOperand(0);
for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
- copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
+ copy.addReg(scalarRegs[i], RegState::Define);
for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy->addOperand(otherOperands[i]);
+ copy.addOperand(otherOperands[i]);
}
///=============================================================================
@@ -504,7 +495,7 @@ void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr,
copies.push_back(F.CloneMachineInstr(Instr));
for (unsigned i=0; i<numcopies; ++i) {
- MachineInstr *copy = copies[i];
+ MachineInstrBuilder copy(F, copies[i]);
std::vector<MachineOperand> allOperands;
std::vector<bool> isDef;
@@ -530,13 +521,13 @@ void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr,
if (isVectorRegister(regnum)) {
SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j]));
+ copy.addReg(scalarRegs[i], getDefRegState(isDef[j]));
}
else
- copy->addOperand(oper);
+ copy.addOperand(oper);
}
else
- copy->addOperand(oper);
+ copy.addOperand(oper);
}
}
}
@@ -659,7 +650,7 @@ unsigned VectorElementize::copyProp(MachineFunction &F) {
for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i)
Instr->RemoveOperand(0);
for (unsigned i=0, e=operands.size(); i!=e; ++i)
- Instr->addOperand(operands[i]);
+ Instr->addOperand(F, operands[i]);
}
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 631f749581..f24edf62ed 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -32,6 +32,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case FK_Data_8:
case PPC::fixup_ppc_toc:
case PPC::fixup_ppc_tlsreg:
+ case PPC::fixup_ppc_nofixup:
return Value;
case PPC::fixup_ppc_lo14:
case PPC::fixup_ppc_toc16_ds:
@@ -85,7 +86,8 @@ public:
{ "fixup_ppc_toc", 0, 64, 0 },
{ "fixup_ppc_toc16", 16, 16, 0 },
{ "fixup_ppc_toc16_ds", 16, 14, 0 },
- { "fixup_ppc_tlsreg", 0, 0, 0 }
+ { "fixup_ppc_tlsreg", 0, 0, 0 },
+ { "fixup_ppc_nofixup", 0, 0, 0 }
};
if (Kind < FirstTargetFixupKind)
@@ -117,7 +119,7 @@ public:
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
// FIXME.
llvm_unreachable("relaxInstruction() unimplemented");
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 462d7072b5..d61e741ef5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCValue.h"
@@ -33,9 +34,25 @@ namespace {
const MCFixup &Fixup,
bool IsPCRel) const;
virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
+
+ virtual void sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs);
+ };
+
+ class PPCELFRelocationEntry : public ELFRelocationEntry {
+ public:
+ PPCELFRelocationEntry(const ELFRelocationEntry &RE);
+ bool operator<(const PPCELFRelocationEntry &RE) const {
+ return (RE.r_offset < r_offset ||
+ (RE.r_offset == r_offset && RE.Type > Type));
+ }
};
}
+PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE)
+ : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol,
+ RE.r_addend, *RE.Fixup) {}
+
PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
: MCELFObjectTargetWriter(Is64Bit, OSABI,
Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC,
@@ -60,9 +77,14 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case PPC::fixup_ppc_br24:
Type = ELF::R_PPC_REL24;
break;
+ case FK_Data_4:
case FK_PCRel_4:
Type = ELF::R_PPC_REL32;
break;
+ case FK_Data_8:
+ case FK_PCRel_8:
+ Type = ELF::R_PPC64_REL64;
+ break;
}
} else {
switch ((unsigned)Fixup.getKind()) {
@@ -79,12 +101,24 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_TPREL16_HA:
Type = ELF::R_PPC_TPREL16_HA;
break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
+ Type = ELF::R_PPC64_DTPREL16_HA;
+ break;
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC_ADDR16_HA;
break;
case MCSymbolRefExpr::VK_PPC_TOC16_HA:
Type = ELF::R_PPC64_TOC16_HA;
break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA:
+ Type = ELF::R_PPC64_GOT_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA:
+ Type = ELF::R_PPC64_GOT_TLSGD16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA:
+ Type = ELF::R_PPC64_GOT_TLSLD16_HA;
+ break;
}
break;
case PPC::fixup_ppc_lo16:
@@ -93,12 +127,21 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
Type = ELF::R_PPC_TPREL16_LO;
break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO;
+ break;
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC_ADDR16_LO;
break;
case MCSymbolRefExpr::VK_PPC_TOC16_LO:
Type = ELF::R_PPC64_TOC16_LO;
break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO:
+ Type = ELF::R_PPC64_GOT_TLSGD16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
+ Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+ break;
}
break;
case PPC::fixup_ppc_lo14:
@@ -119,14 +162,25 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_TOC16_LO:
Type = ELF::R_PPC64_TOC16_LO_DS;
break;
- case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS:
- Type = ELF::R_PPC64_GOT_TPREL16_DS;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO:
+ Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
break;
}
break;
case PPC::fixup_ppc_tlsreg:
Type = ELF::R_PPC64_TLS;
break;
+ case PPC::fixup_ppc_nofixup:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TLSGD:
+ Type = ELF::R_PPC64_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TLSLD:
+ Type = ELF::R_PPC64_TLSLD;
+ break;
+ }
+ break;
case FK_Data_8:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
@@ -191,6 +245,34 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
}
}
+// The standard sorter only sorts on the r_offset field, but PowerPC can
+// have multiple relocations at the same offset. Sort secondarily on the
+// relocation type to avoid nondeterminism.
+void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) {
+
+ // Copy to a temporary vector of relocation entries having a different
+ // sort function.
+ std::vector<PPCELFRelocationEntry> TmpRelocs;
+
+ for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin();
+ R != Relocs.end(); ++R) {
+ TmpRelocs.push_back(PPCELFRelocationEntry(*R));
+ }
+
+ // Sort in place by ascending r_offset and descending r_type.
+ array_pod_sort(TmpRelocs.begin(), TmpRelocs.end());
+
+ // Copy back to the original vector.
+ unsigned I = 0;
+ for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin();
+ R != TmpRelocs.end(); ++R, ++I) {
+ Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type,
+ R->Symbol, R->r_addend, *R->Fixup);
+ }
+}
+
+
MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
bool Is64Bit,
uint8_t OSABI) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 75bb851630..7917f7736e 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -47,6 +47,10 @@ enum Fixups {
/// fixup_ppc_tlsreg - Insert thread-pointer register number.
fixup_ppc_tlsreg,
+
+ /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
+ /// to __tls_get_addr for the TLS general and local dynamic models.
+ fixup_ppc_nofixup,
// Marker
LastTargetFixupKind,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 5b208d41f4..d048426d43 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -62,8 +63,6 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
- unsigned getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
@@ -82,11 +81,12 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const {
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
- // BL8_NOPELF and BLA8_NOP_ELF is both size of 8 bacause of the
+ // BL8_NOP_ELF, BLA8_NOP_ELF, etc., all have a size of 8 because of the
// following 'nop'.
unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
unsigned Opcode = MI.getOpcode();
- if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF)
+ if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF ||
+ Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD)
Size = 8;
// Output the constant in big endian byte order.
@@ -119,6 +119,17 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24));
+
+ // For special TLS calls, add another fixup for the symbol. Apparently
+ // BL8_NOP_ELF, BL8_NOP_ELF_TLSGD, and BL8_NOP_ELF_TLSLD are sufficiently
+ // similar that TblGen will not generate a separate case for the latter
+ // two, so this is the only way to get the extra fixup generated.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) {
+ const MCOperand &MO2 = MI.getOperand(OpNo+1);
+ Fixups.push_back(MCFixup::Create(0, MO2.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_nofixup));
+ }
return 0;
}
@@ -199,17 +210,6 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
}
-unsigned PPCMCCodeEmitter::getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
- const MCOperand &MO = MI.getOperand(OpNo);
-
- // Add a fixup for the GOT displacement to the TLS block offset.
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_toc16_ds));
- return 0;
-}
-
-
unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
@@ -223,7 +223,6 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
return getPPCRegisterNumbering(PPC::X13);
}
-
unsigned PPCMCCodeEmitter::
get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index a0e4cf3005..4a420929d0 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -14,6 +14,9 @@
#ifndef PPCMCTARGETDESC_H
#define PPCMCTARGETDESC_H
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
#include "llvm/Support/DataTypes.h"
namespace llvm {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index f872e861bf..972e13852e 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -14,6 +14,9 @@
#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
namespace llvm {
namespace PPC {
/// Predicate - These are "(BI << 5) | BO" for various predicates.
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index bbd247f20f..e6d38ebf21 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -72,9 +72,7 @@ namespace llvm {
MO_HA16 = 2 << 5,
MO_TPREL16_HA = 3 << 5,
- MO_TPREL16_LO = 4 << 5,
- MO_GOT_TPREL16_DS = 5 << 5,
- MO_TLS = 6 << 5
+ MO_TPREL16_LO = 4 << 5
};
} // end namespace PPCII
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 4315bc1a07..839f918a06 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -32,9 +32,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Constants.h"
#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -44,7 +45,6 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
@@ -426,20 +426,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
bool IsExternal = false;
bool IsFunction = false;
bool IsCommon = false;
+ bool IsAvailExt = false;
if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
- MOSymbol = Mang->getSymbol(GValue);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue);
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
- IsCommon = GVar && GValue->hasCommonLinkage();
+ IsCommon = GVar && RealGValue->hasCommonLinkage();
IsFunction = !GVar;
+ IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
- if (IsExternal || IsFunction || IsCommon || MO.isJTI())
+ if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
@@ -466,10 +471,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
else {
const GlobalValue *GValue = MO.getGlobal();
- MOSymbol = Mang->getSymbol(GValue);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue);
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
- if (!GVar || !GVar->hasInitializer() || GValue->hasCommonLinkage())
+ if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+ RealGValue->hasAvailableExternallyLinkage())
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
@@ -496,8 +505,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
- MOSymbol = Mang->getSymbol(GValue);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue);
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ MOSymbol = Mang->getSymbol(RealGValue);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
IsFunction = !GVar;
} else if (MO.isCPI())
@@ -513,8 +525,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitInstruction(TmpInst);
return;
}
- case PPC::LDgotTPREL: {
- // Transform %Xd = LDgotTPREL <ga:@sym>, %Xs
+ case PPC::ADDISgotTprelHA: {
+ // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTprel));
+ return;
+ }
+ case PPC::LDgotTprelL: {
+ // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
// Change the opcode to LDrs, which is a form of LD with the offset
@@ -524,12 +552,148 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
const MCExpr *Exp =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
return;
}
+ case PPC::ADDIStlsgdHA: {
+ // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsGD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::ADDItlsgdL: {
+ // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8L %Xs, sym@got@tlsgd@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsGD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::GETtlsADDR: {
+ // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_ELF_TLSGD __tls_get_addr(sym@tlsgd)
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymVar =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSGD)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+ return;
+ }
+ case PPC::ADDIStlsldHA: {
+ // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsLD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::ADDItlsldL: {
+ // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8L %Xs, sym@got@tlsld@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymGotTlsLD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::GETtlsldADDR: {
+ // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_ELF_TLSLD __tls_get_addr(sym@tlsld)
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymVar =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSLD)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+ return;
+ }
+ case PPC::ADDISdtprelHA: {
+ // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X3)
+ .addExpr(SymDtprel));
+ return;
+ }
+ case PPC::ADDIdtprelL: {
+ // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8L %Xs, sym@dtprel@l
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
+ return;
+ }
case PPC::MFCRpseud:
case PPC::MFCR8pseud:
// Transform: %R3 = MFCRpseud %CR7
@@ -568,14 +732,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
- 8/*size*/, 0/*addrspace*/);
+ 8 /*size*/);
MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
// Generates a R_PPC64_TOC relocation for TOC base insertion.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
MCSymbolRefExpr::VK_PPC_TOC, OutContext),
- 8/*size*/, 0/*addrspace*/);
+ 8/*size*/);
// Emit a null environment pointer.
- OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */);
+ OutStreamer.EmitIntValue(0, 8 /* size */);
OutStreamer.SwitchSection(Current);
MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
@@ -604,6 +768,25 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
}
}
+ MachineModuleInfoELF &MMIELF =
+ MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
return AsmPrinter::doFinalization(M);
}
@@ -867,7 +1050,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
else
// Internal to current translation unit.
//
@@ -877,7 +1060,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
// fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
- isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
@@ -896,7 +1079,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
OutContext),
- isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 05db2c5e73..a74932c1e1 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -43,7 +43,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Constants.h"
+#include "llvm/IR/Constants.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 0dcb5deb5d..d68bfd12e4 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -19,7 +19,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -68,7 +68,6 @@ namespace {
unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getTLSOffsetEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
@@ -245,13 +244,6 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
}
-unsigned PPCCodeEmitter::getTLSOffsetEncoding(const MachineInstr &MI,
- unsigned OpNo) const {
- llvm_unreachable("TLS not supported on the old JIT.");
- return 0;
-}
-
-
unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
unsigned OpNo) const {
llvm_unreachable("TLS not supported on the old JIT.");
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index fdd85ff11f..5901f36a2f 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -21,7 +21,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -198,8 +198,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
// to adjust the stack pointer (we fit in the Red Zone). For 64-bit
// SVR4, we also require a stack frame if we need to spill the CR,
// since this spill area is addressed relative to the stack pointer.
- bool DisableRedZone = MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::NoRedZone);
+ bool DisableRedZone = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
// FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can
// still generate stackless code if all local vars are reg-allocated.
// Try: (FrameSize <= 224
@@ -261,7 +261,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
// Naked functions have no stack frame pushed, so we don't have a frame
// pointer.
- if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+ if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked))
return false;
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index abcaa9cab0..762b3467c3 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -21,11 +21,12 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -1296,14 +1297,18 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue);
- assert((GVar || isa<Function>(GValue)) &&
+ const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+ const GlobalValue *RealGValue = GAlias ?
+ GAlias->resolveAliasedGlobal(false) : GValue;
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+ assert((GVar || isa<Function>(RealGValue)) &&
"Unexpected global value subclass!");
// An external variable is one without an initializer. For these,
// for variables with common linkage, and for Functions, generate
// the LDtocL form.
- if (!GVar || !GVar->hasInitializer() || GValue->hasCommonLinkage())
+ if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+ RealGValue->hasAvailableExternallyLinkage())
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
}
@@ -1311,11 +1316,6 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA);
}
- case PPCISD::LD_GOT_TPREL: {
- assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
- return CurDAG->getMachineNode(PPC::LDgotTPREL, dl, MVT::i64,
- N->getOperand(0), N->getOperand(1));
- }
}
return SelectCode(N);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 1168171b23..9966b2cc90 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17,7 +17,6 @@
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -25,10 +24,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -376,6 +376,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -442,6 +443,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
@@ -578,8 +581,17 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
- case PPCISD::LD_GOT_TPREL: return "PPCISD::LD_GOT_TPREL";
+ case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
+ case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
+ case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
+ case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
+ case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
+ case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
+ case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
+ case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
+ case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
}
}
@@ -1342,18 +1354,65 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
if (!is64bit)
llvm_unreachable("only local-exec is currently supported for ppc32");
- if (Model != TLSModel::InitialExec)
- llvm_unreachable("only local-exec and initial-exec TLS modes supported");
-
- SDValue GOTOffset = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_GOT_TPREL16_DS);
- SDValue TPReg = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TLS);
- SDValue GOTReg = DAG.getRegister(is64bit ? PPC::X2 : PPC::R2,
- is64bit ? MVT::i64 : MVT::i32);
- SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL, dl, PtrVT,
- GOTOffset, GOTReg);
- return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TPReg);
+ if (Model == TLSModel::InitialExec) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+ PtrVT, GOTReg, TGA);
+ SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
+ PtrVT, TGA, TPOffsetHi);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+ }
+
+ if (Model == TLSModel::GeneralDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
+ GOTEntryHi, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLS_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+ }
+
+ if (Model == TLSModel::LocalDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
+ GOTEntryHi, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLSLD_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+ SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
+ Chain, ParmReg, TGA);
+ return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
+ }
+
+ llvm_unreachable("Unknown TLS model!");
}
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -6763,8 +6822,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
MFI->hasVarSizedObjects()) &&
MFI->getStackSize() &&
- !MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::Naked);
+ !MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked);
unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
(is31 ? PPC::R31 : PPC::R1);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
@@ -6787,16 +6846,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If
-/// 'IsZeroVal' is true, that means it's safe to return a
-/// non-scalar-integer type, e.g. empty string source, constant, or loaded
-/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-/// constant so it does not need to be loaded.
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
if (this->PPCSubTarget.isPPC64()) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index f54a8b77a4..12b3df7c9a 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -178,11 +178,16 @@ namespace llvm {
CR6SET,
CR6UNSET,
- /// G8RC = LD_GOT_TPREL Symbol, G8RReg - Used by the initial-exec
+ /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
+ /// TLS model, produces an ADDIS8 instruction that adds the GOT
+ /// base to sym@got@tprel@ha.
+ ADDIS_GOT_TPREL_HA,
+
+ /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
/// TLS model, produces a LD instruction with base register G8RReg
- /// and offset sym@got@tprel. The latter identifies the GOT entry
- /// containing the offset of "sym" relative to the thread pointer.
- LD_GOT_TPREL,
+ /// and offset sym@got@tprel@l. This completes the addition that
+ /// finds the offset of "sym" relative to the thread pointer.
+ LD_GOT_TPREL_L,
/// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
/// model, produces an ADD instruction that adds the contents of
@@ -192,6 +197,46 @@ namespace llvm {
/// TLS sequence.
ADD_TLS,
+ /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym@got@tlsgd@ha.
+ ADDIS_TLSGD_HA,
+
+ /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@tlsgd@l.
+ ADDI_TLSGD_L,
+
+ /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym@tlsgd).
+ GET_TLS_ADDR,
+
+ /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym@got@tlsld@ha.
+ ADDIS_TLSLD_HA,
+
+ /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@tlsld@l.
+ ADDI_TLSLD_L,
+
+ /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym@tlsld).
+ GET_TLSLD_ADDR,
+
+ /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
+ /// local-dynamic TLS model, produces an ADDIS8 instruction
+ /// that adds X3 to sym@dtprel@ha. The Chain operand is needed
+ /// to tie this in place following a copy to %X3 from the result
+ /// of a GET_TLSLD_ADDR.
+ ADDIS_DTPREL_HA,
+
+ /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym@got@dtprel@l.
+ ADDI_DTPREL_L,
+
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -386,16 +431,15 @@ namespace llvm {
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
- /// probably because the source does not need to be loaded. If
- /// 'IsZeroVal' is true, that means it's safe to return a
- /// non-scalar-integer type, e.g. empty string source, constant, or loaded
- /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
- /// constant so it does not need to be loaded.
+ /// probably because the source does not need to be loaded. If 'IsMemset' is
+ /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+ /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+ /// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
virtual EVT
- getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal, bool MemcpyStrSrc,
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index dff15664a3..1dd5415733 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -37,12 +37,10 @@ def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64
let EncoderMethod = "getMemRIXEncoding";
let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg);
}
-def tlsaddr : Operand<i64> {
- let EncoderMethod = "getTLSOffsetEncoding";
-}
def tlsreg : Operand<i64> {
let EncoderMethod = "getTLSRegEncoding";
}
+def tlsgd : Operand<i64> {}
//===----------------------------------------------------------------------===//
// 64-bit transformation functions.
@@ -110,6 +108,16 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
(outs), (ins calltarget:$func),
"bl $func\n\tnop", BrB, []>;
+ let isCodeGenOnly = 1 in
+ def BL8_NOP_ELF_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func, tlsgd:$sym),
+ "bl $func($sym)\n\tnop", BrB, []>;
+
+ let isCodeGenOnly = 1 in
+ def BL8_NOP_ELF_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func, tlsgd:$sym),
+ "bl $func($sym)\n\tnop", BrB, []>;
+
def BLA8_ELF : IForm<18, 1, 1,
(outs), (ins aaddr:$func),
"bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
@@ -373,7 +381,7 @@ def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
// initial-exec thread-local storage model.
def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
- "add $rT, $rA, $rB", IntSimple,
+ "add $rT, $rA, $rB@tls", IntSimple,
[(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>;
let Defs = [CARRY] in {
@@ -709,13 +717,60 @@ def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
(PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64;
// Support for thread-local storage.
-def LDgotTPREL: Pseudo<(outs G8RC:$rD), (ins tlsaddr:$disp, G8RC:$reg),
- "#LDgotTPREL",
- [(set G8RC:$rD,
- (PPCldGotTprel G8RC:$reg, tglobaltlsaddr:$disp))]>,
- isPPC64;
+def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+ "#ADDISgotTprelHA",
+ [(set G8RC:$rD,
+ (PPCaddisGotTprelHA G8RC:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC:$reg),
+ "#LDgotTprelL",
+ [(set G8RC:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, G8RC:$reg))]>,
+ isPPC64;
def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g),
(ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>;
+def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+ "#ADDIStlsgdHA",
+ [(set G8RC:$rD,
+ (PPCaddisTlsgdHA G8RC:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+ "#ADDItlsgdL",
+ [(set G8RC:$rD,
+ (PPCaddiTlsgdL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+ "#GETtlsADDR",
+ [(set G8RC:$rD,
+ (PPCgetTlsAddr G8RC:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+ "#ADDIStlsldHA",
+ [(set G8RC:$rD,
+ (PPCaddisTlsldHA G8RC:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+ "#ADDItlsldL",
+ [(set G8RC:$rD,
+ (PPCaddiTlsldL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+ "#GETtlsldADDR",
+ [(set G8RC:$rD,
+ (PPCgetTlsldAddr G8RC:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+ "#ADDISdtprelHA",
+ [(set G8RC:$rD,
+ (PPCaddisDtprelHA G8RC:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+ "#ADDIdtprelL",
+ [(set G8RC:$rD,
+ (PPCaddiDtprelL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
let PPC970_Unit = 2 in {
// Truncating stores.
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index a29f40ad53..8c077b7517 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -91,8 +91,19 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
-def PPCldGotTprel : SDNode<"PPCISD::LD_GOT_TPREL", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>;
+def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
+ [SDNPMayLoad]>;
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
+def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
+def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
+def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
+def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
+def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
+def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
+ [SDNPHasChain]>;
+def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 62d15b371f..851de1778a 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -15,7 +15,7 @@
#include "PPCJITInfo.h"
#include "PPCRelocations.h"
#include "PPCTargetMachine.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Memory.h"
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 1c4af901f7..73f7a2cfd5 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -114,12 +114,6 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
break;
case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
break;
- case PPCII::MO_GOT_TPREL16_DS:
- RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS;
- break;
- case PPCII::MO_TLS:
- RefKind = MCSymbolRefExpr::VK_PPC_TLS;
- break;
}
// FIXME: This isn't right, but we don't have a good way to express this in
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 8d1ba23c11..378c147331 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -21,7 +21,6 @@
#include "PPCSubtarget.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -29,8 +28,10 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -40,7 +41,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
#include <cstdlib>
#define GET_REGINFO_TARGET_DESC
@@ -597,7 +597,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// to Offset to get the correct offset.
// Naked functions have stack size 0, although getStackSize may not reflect that
// because we didn't call all the pieces that compute it for naked functions.
- if (!MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+ if (!MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked))
Offset += MFI->getStackSize();
// If we can, encode the offset directly into the instruction. If this is a
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 71eff4c52a..d9b4e3061e 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -14,7 +14,7 @@
#include "PPCSubtarget.h"
#include "PPC.h"
#include "PPCRegisterInfo.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 22a78c5f1f..b8b7882ac0 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -43,8 +43,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ InstrItins(Subtarget.getInstrItineraryData()) {
// The binutils for the BG/P are too old for CFI.
if (Subtarget.isBGP())
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 0267ed1f23..d917d99ded 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -20,9 +20,8 @@
#include "PPCJITInfo.h"
#include "PPCSelectionDAGInfo.h"
#include "PPCSubtarget.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -37,8 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine {
PPCTargetLowering TLInfo;
PPCSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
PPCTargetMachine(const Target &T, StringRef TT,
@@ -66,12 +63,6 @@ public:
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 5dc8568d83..fa44331b8a 100644
--- a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
new file mode 100644
index 0000000000..0f5125d39b
--- /dev/null
+++ b/lib/Target/R600/AMDGPU.h
@@ -0,0 +1,49 @@
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_H
+#define AMDGPU_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class FunctionPass;
+class AMDGPUTargetMachine;
+
+// R600 Passes
+FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+
+// SI Passes
+FunctionPass *createSIAnnotateControlFlowPass();
+FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
+FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
+FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
+
+// Passes common to R600 and SI
+Pass *createAMDGPUStructurizeCFGPass();
+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+
+} // End namespace llvm
+
+namespace ShaderType {
+ enum Type {
+ PIXEL = 0,
+ VERTEX = 1,
+ GEOMETRY = 2,
+ COMPUTE = 3
+ };
+}
+
+#endif // AMDGPU_H
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
new file mode 100644
index 0000000000..40f474161a
--- /dev/null
+++ b/lib/Target/R600/AMDGPU.td
@@ -0,0 +1,40 @@
+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+// Include AMDIL TD files
+include "AMDILBase.td"
+
+
+def AMDGPUInstrInfo : InstrInfo {
+ let guessInstructionProperties = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+def AMDGPUAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+
+def AMDGPU : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AMDGPUInstrInfo;
+ let AssemblyWriters = [AMDGPUAsmWriter];
+}
+
+// Include AMDGPU TD files
+include "R600Schedule.td"
+include "SISchedule.td"
+include "Processors.td"
+include "AMDGPUInstrInfo.td"
+include "AMDGPUIntrinsics.td"
+include "AMDGPURegisterInfo.td"
+include "AMDGPUInstructions.td"
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
new file mode 100644
index 0000000000..754506c837
--- /dev/null
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -0,0 +1,138 @@
+//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
+/// code. When passed an MCAsmStreamer it prints assembly and when passed
+/// an MCObjectStreamer it outputs binary code.
+//
+//===----------------------------------------------------------------------===//
+//
+
+
+#include "AMDGPUAsmPrinter.h"
+#include "AMDGPU.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+
+static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
+ MCStreamer &Streamer) {
+ return new AMDGPUAsmPrinter(tm, Streamer);
+}
+
+extern "C" void LLVMInitializeR600AsmPrinter() {
+ TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
+}
+
+/// We need to override this function so we can avoid
+/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+ if (STM.dumpCode()) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ MF.dump();
+#endif
+ }
+ SetupMachineFunction(MF);
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+ if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ EmitProgramInfo(MF);
+ }
+ EmitFunctionBody();
+ return false;
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
+ unsigned MaxSGPR = 0;
+ unsigned MaxVGPR = 0;
+ bool VCCUsed = false;
+ const SIRegisterInfo * RI =
+ static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+
+ unsigned numOperands = MI.getNumOperands();
+ for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+ MachineOperand & MO = MI.getOperand(op_idx);
+ unsigned maxUsed;
+ unsigned width = 0;
+ bool isSGPR = false;
+ unsigned reg;
+ unsigned hwReg;
+ if (!MO.isReg()) {
+ continue;
+ }
+ reg = MO.getReg();
+ if (reg == AMDGPU::VCC) {
+ VCCUsed = true;
+ continue;
+ }
+ switch (reg) {
+ default: break;
+ case AMDGPU::EXEC:
+ case AMDGPU::SI_LITERAL_CONSTANT:
+ case AMDGPU::SREG_LIT_0:
+ case AMDGPU::M0:
+ continue;
+ }
+
+ if (AMDGPU::SReg_32RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 1;
+ } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 1;
+ } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 2;
+ } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 2;
+ } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 4;
+ } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 4;
+ } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 8;
+ } else {
+ assert(!"Unknown register class");
+ }
+ hwReg = RI->getEncodingValue(reg);
+ maxUsed = hwReg + width - 1;
+ if (isSGPR) {
+ MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
+ } else {
+ MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
+ }
+ }
+ }
+ }
+ if (VCCUsed) {
+ MaxSGPR += 2;
+ }
+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+ OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
+ OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
+ OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
+}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
new file mode 100644
index 0000000000..3812282b17
--- /dev/null
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -0,0 +1,44 @@
+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_ASMPRINTER_H
+#define AMDGPU_ASMPRINTER_H
+
+#include "llvm/CodeGen/AsmPrinter.h"
+
+namespace llvm {
+
+class AMDGPUAsmPrinter : public AsmPrinter {
+
+public:
+ explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "AMDGPU Assembly Printer";
+ }
+
+ /// \brief Emit register usage information so that the GPU driver
+ /// can correctly setup the GPU state.
+ void EmitProgramInfo(MachineFunction &MF);
+
+ /// Implemented in AMDGPUMCInstLower.cpp
+ virtual void EmitInstruction(const MachineInstr *MI);
+};
+
+} // End anonymous llvm
+
+#endif //AMDGPU_ASMPRINTER_H
diff --git a/lib/Target/R600/AMDGPUCodeEmitter.h b/lib/Target/R600/AMDGPUCodeEmitter.h
new file mode 100644
index 0000000000..84f3588496
--- /dev/null
+++ b/lib/Target/R600/AMDGPUCodeEmitter.h
@@ -0,0 +1,49 @@
+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUCODEEMITTER_H
+#define AMDGPUCODEEMITTER_H
+
+namespace llvm {
+
+class AMDGPUCodeEmitter {
+public:
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+ virtual uint64_t getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const { return 0; }
+ virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual uint64_t VOPPostEncode(const MachineInstr &MI,
+ uint64_t Value) const {
+ return Value;
+ }
+ virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
+ const {
+ return 0;
+ }
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUCODEEMITTER_H
diff --git a/lib/Target/R600/AMDGPUConvertToISA.cpp b/lib/Target/R600/AMDGPUConvertToISA.cpp
new file mode 100644
index 0000000000..50297d1f60
--- /dev/null
+++ b/lib/Target/R600/AMDGPUConvertToISA.cpp
@@ -0,0 +1,62 @@
+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass lowers AMDIL machine instructions to the appropriate
+/// hardware instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUConvertToISAPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ TargetMachine &TM;
+
+public:
+ AMDGPUConvertToISAPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
+
+};
+
+} // End anonymous namespace
+
+char AMDGPUConvertToISAPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
+ return new AMDGPUConvertToISAPass(tm);
+}
+
+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
+ const AMDGPUInstrInfo * TII =
+ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
new file mode 100644
index 0000000000..473dac4ddc
--- /dev/null
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -0,0 +1,417 @@
+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This is the parent TargetLowering class for hardware code gen
+/// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDILIntrinsicInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+using namespace llvm;
+
+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
+ TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+
+ // Initialize target lowering borrowed from AMDIL
+ InitAMDILLowering();
+
+ // We need to custom lower some of the intrinsics
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Library functions. These default to Expand, but we have instructions
+ // for them.
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FEXP2, MVT::f32, Legal);
+ setOperationAction(ISD::FPOW, MVT::f32, Legal);
+ setOperationAction(ISD::FLOG2, MVT::f32, Legal);
+ setOperationAction(ISD::FABS, MVT::f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+
+ // Lower floating point store/load to integer store/load to reduce the number
+ // of patterns in tablegen.
+ setOperationAction(ISD::STORE, MVT::f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
+
+ setOperationAction(ISD::STORE, MVT::v4f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
+
+ setOperationAction(ISD::LOAD, MVT::f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
+
+ setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+}
+
+//===---------------------------------------------------------------------===//
+// TargetLowering Callbacks
+//===---------------------------------------------------------------------===//
+
+SDValue AMDGPUTargetLowering::LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
+ InVals.push_back(SDValue());
+ }
+ return Chain;
+}
+
+SDValue AMDGPUTargetLowering::LowerReturn(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const {
+ return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
+}
+
+//===---------------------------------------------------------------------===//
+// Target specific lowering
+//===---------------------------------------------------------------------===//
+
+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+ const {
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ assert(0 && "Custom lowering code for this"
+ "instruction is not implemented yet!");
+ break;
+ // AMDIL DAG lowering
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::SREM: return LowerSREM(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ // AMDGPU DAG lowering
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ }
+ return Op;
+}
+
+SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ switch (IntrinsicID) {
+ default: return Op;
+ case AMDGPUIntrinsic::AMDIL_abs:
+ return LowerIntrinsicIABS(Op, DAG);
+ case AMDGPUIntrinsic::AMDIL_exp:
+ return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDGPU_lrp:
+ return LowerIntrinsicLRP(Op, DAG);
+ case AMDGPUIntrinsic::AMDIL_fraction:
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDIL_mad:
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ case AMDGPUIntrinsic::AMDIL_max:
+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_imax:
+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_umax:
+ return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDIL_min:
+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_imin:
+ return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_umin:
+ return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDIL_round_nearest:
+ return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
+ }
+}
+
+///IABS(a) = SMAX(sub(0, a), a)
+SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ Op.getOperand(1));
+
+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
+}
+
+/// Linear Interpolation
+/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
+SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(1.0f, MVT::f32),
+ Op.getOperand(1));
+ SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
+ Op.getOperand(3));
+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
+ Op.getOperand(2),
+ OneSubAC);
+}
+
+/// \brief Generate Min/Max node
+SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+
+ if (VT != MVT::f32 ||
+ !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
+ return SDValue();
+ }
+
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ switch (CCOpcode) {
+ case ISD::SETOEQ:
+ case ISD::SETONE:
+ case ISD::SETUNE:
+ case ISD::SETNE:
+ case ISD::SETUEQ:
+ case ISD::SETEQ:
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2:
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2:
+ case ISD::SETUO:
+ case ISD::SETO:
+ assert(0 && "Operation should already be optimised !");
+ case ISD::SETULE:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETOLT:
+ case ISD::SETLE:
+ case ISD::SETLT: {
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
+ else
+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
+ }
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGE:
+ case ISD::SETOGE:
+ case ISD::SETUGT:
+ case ISD::SETOGT: {
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
+ else
+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
+ }
+ case ISD::SETCC_INVALID:
+ assert(0 && "Invalid setcc condcode !");
+ }
+ return Op;
+}
+
+
+
+SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue Num = Op.getOperand(0);
+ SDValue Den = Op.getOperand(1);
+
+ SmallVector<SDValue, 8> Results;
+
+ // RCP = URECIP(Den) = 2^32 / Den + e
+ // e is rounding error.
+ SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
+
+ // RCP_LO = umulo(RCP, Den) */
+ SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
+
+ // RCP_HI = mulhu (RCP, Den) */
+ SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
+
+ // NEG_RCP_LO = -RCP_LO
+ SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ RCP_LO);
+
+ // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
+ SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ NEG_RCP_LO, RCP_LO,
+ ISD::SETEQ);
+ // Calculate the rounding error from the URECIP instruction
+ // E = mulhu(ABS_RCP_LO, RCP)
+ SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
+
+ // RCP_A_E = RCP + E
+ SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
+
+ // RCP_S_E = RCP - E
+ SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
+
+ // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
+ SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ RCP_A_E, RCP_S_E,
+ ISD::SETEQ);
+ // Quotient = mulhu(Tmp0, Num)
+ SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
+
+ // Num_S_Remainder = Quotient * Den
+ SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
+
+ // Remainder = Num - Num_S_Remainder
+ SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
+
+ // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
+ SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
+ DAG.getConstant(-1, VT),
+ DAG.getConstant(0, VT),
+ ISD::SETGE);
+ // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
+ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
+ DAG.getConstant(0, VT),
+ DAG.getConstant(-1, VT),
+ DAG.getConstant(0, VT),
+ ISD::SETGE);
+ // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
+ SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
+ Remainder_GE_Zero);
+
+ // Calculate Division result:
+
+ // Quotient_A_One = Quotient + 1
+ SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
+ DAG.getConstant(1, VT));
+
+ // Quotient_S_One = Quotient - 1
+ SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
+ DAG.getConstant(1, VT));
+
+ // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
+ SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ Quotient, Quotient_A_One, ISD::SETEQ);
+
+ // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
+ Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Quotient_S_One, Div, ISD::SETEQ);
+
+ // Calculate Rem result:
+
+ // Remainder_S_Den = Remainder - Den
+ SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
+
+ // Remainder_A_Den = Remainder + Den
+ SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
+
+ // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
+ SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ Remainder, Remainder_S_Den, ISD::SETEQ);
+
+ // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
+ Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Remainder_A_Den, Rem, ISD::SETEQ);
+ SDValue Ops[2];
+ Ops[0] = Div;
+ Ops[1] = Rem;
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ return CFP->isExactlyValue(1.0);
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ return C->isAllOnesValue();
+ }
+ return false;
+}
+
+bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ return CFP->getValueAPF().isZero();
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ return C->isNullValue();
+ }
+ return false;
+}
+
+SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
+ const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned VirtualRegister;
+ if (!MRI.isLiveIn(Reg)) {
+ VirtualRegister = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(Reg, VirtualRegister);
+ } else {
+ VirtualRegister = MRI.getLiveInVirtReg(Reg);
+ }
+ return DAG.getRegister(VirtualRegister, VT);
+}
+
+#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
+
+const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ // AMDIL DAG nodes
+ NODE_NAME_CASE(MAD);
+ NODE_NAME_CASE(CALL);
+ NODE_NAME_CASE(UMUL);
+ NODE_NAME_CASE(DIV_INF);
+ NODE_NAME_CASE(RET_FLAG);
+ NODE_NAME_CASE(BRANCH_COND);
+
+ // AMDGPU DAG nodes
+ NODE_NAME_CASE(DWORDADDR)
+ NODE_NAME_CASE(FRACT)
+ NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(SMAX)
+ NODE_NAME_CASE(UMAX)
+ NODE_NAME_CASE(FMIN)
+ NODE_NAME_CASE(SMIN)
+ NODE_NAME_CASE(UMIN)
+ NODE_NAME_CASE(URECIP)
+ NODE_NAME_CASE(INTERP)
+ NODE_NAME_CASE(INTERP_P0)
+ NODE_NAME_CASE(EXPORT)
+ }
+}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
new file mode 100644
index 0000000000..c7abaf69b4
--- /dev/null
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -0,0 +1,144 @@
+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition of the TargetLowering class that is common
+/// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUISELLOWERING_H
+#define AMDGPUISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class MachineRegisterInfo;
+
+class AMDGPUTargetLowering : public TargetLowering {
+private:
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+
+protected:
+
+ /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
+ /// MachineFunction.
+ ///
+ /// \returns a RegisterSDNode representing Reg.
+ SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
+ unsigned Reg, EVT VT) const;
+
+ bool isHWTrueValue(SDValue Op) const;
+ bool isHWFalseValue(SDValue Op) const;
+
+public:
+ AMDGPUTargetLowering(TargetMachine &TM);
+
+ virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
+ virtual const char* getTargetNodeName(unsigned Opcode) const;
+
+// Functions defined in AMDILISelLowering.cpp
+public:
+
+ /// \brief Determine which of the bits specified in \p Mask are known to be
+ /// either zero or one and return them in the \p KnownZero and \p KnownOne
+ /// bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const;
+
+ /// We want to mark f32/f64 floating point values as legal.
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+ /// We don't want to shrink f64/f32 constants.
+ bool ShouldShrinkFPConstant(EVT VT) const;
+
+private:
+ void InitAMDILLowering();
+ SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+};
+
+namespace AMDGPUISD {
+
+enum {
+ // AMDIL ISD Opcodes
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ MAD, // 32bit Fused Multiply Add instruction
+ CALL, // Function call based on a single integer
+ UMUL, // 32bit unsigned multiplication
+ DIV_INF, // Divide with infinity returned on zero divisor
+ RET_FLAG,
+ BRANCH_COND,
+ // End AMDIL ISD Opcodes
+ BITALIGN,
+ DWORDADDR,
+ FRACT,
+ FMAX,
+ SMAX,
+ UMAX,
+ FMIN,
+ SMIN,
+ UMIN,
+ URECIP,
+ INTERP,
+ INTERP_P0,
+ EXPORT,
+ LAST_AMDGPU_ISD_NUMBER
+};
+
+
+} // End namespace AMDGPUISD
+
+namespace SIISD {
+
+enum {
+ SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
+ VCC_AND,
+ VCC_BITCAST
+};
+
+} // End namespace SIISD
+
+} // End namespace llvm
+
+#endif // AMDGPUISELLOWERING_H
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
new file mode 100644
index 0000000000..e42a46d839
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -0,0 +1,257 @@
+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Implementation of the TargetInstrInfo class that is common to all
+/// AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AMDGPUGenInstrInfo.inc"
+
+using namespace llvm;
+
+AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
+ : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
+
+const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+// TODO: Implement this function
+ return false;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+
+bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+// TODO: Implement this function
+ return false;
+}
+
+MachineInstr *
+AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+// TODO: Implement this function
+ return NULL;
+}
+bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const {
+ while (iter != MBB.end()) {
+ switch (iter->getOpcode()) {
+ default:
+ break;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32:
+ case AMDGPU::BRANCH:
+ return true;
+ };
+ ++iter;
+ }
+ return false;
+}
+
+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator tmp = MBB->end();
+ if (!MBB->size()) {
+ return MBB->end();
+ }
+ while (--tmp) {
+ if (tmp->getOpcode() == AMDGPU::ENDLOOP
+ || tmp->getOpcode() == AMDGPU::ENDIF
+ || tmp->getOpcode() == AMDGPU::ELSE) {
+ if (tmp == MBB->begin()) {
+ return tmp;
+ } else {
+ continue;
+ }
+ } else {
+ return ++tmp;
+ }
+ }
+ return MBB->end();
+}
+
+void
+AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(!"Not Implemented");
+}
+
+void
+AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ assert(!"Not Implemented");
+}
+
+MachineInstr *
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+// TODO: Implement this function
+ return 0;
+}
+MachineInstr*
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
+ // TODO: Implement this function
+ return 0;
+}
+bool
+AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ // TODO: Implement this function
+ return false;
+}
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad,
+ bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const {
+ // TODO: Implement this function
+ return false;
+}
+
+unsigned
+AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
+ // TODO: Implement this function
+ return 0;
+}
+
+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ assert(Offset2 > Offset1
+ && "Second offset should be larger than first offset!");
+ // If we have less than 16 loads in a row, and the offsets are within 16,
+ // then schedule together.
+ // TODO: Make the loads schedule near if it fits in a cacheline
+ return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+}
+
+bool
+AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const {
+ // TODO: Implement this function
+ return true;
+}
+void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ // TODO: Implement this function
+}
+
+bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
+ // TODO: Implement this function
+ return false;
+}
+bool
+AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2)
+ const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // TODO: Implement this function
+ return false;
+}
+
+bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
+ // TODO: Implement this function
+ return MI->getDesc().isPredicable();
+}
+
+bool
+AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ // TODO: Implement this function
+ return true;
+}
+
+void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const AMDGPURegisterInfo & RI = getRegisterInfo();
+
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ MachineOperand &MO = MI.getOperand(i);
+ // Convert dst regclass to one that is supported by the ISA
+ if (MO.isReg() && MO.isDef()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
+ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
+
+ assert(newRegClass);
+
+ MRI.setRegClass(MO.getReg(), newRegClass);
+ }
+ }
+ }
+}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
new file mode 100644
index 0000000000..cb97af9831
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -0,0 +1,148 @@
+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Contains the definition of a TargetInstrInfo class that is common
+/// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTRUCTIONINFO_H
+#define AMDGPUINSTRUCTIONINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <map>
+
+#define GET_INSTRINFO_HEADER
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
+#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
+#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
+#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
+
+class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
+private:
+ const AMDGPURegisterInfo RI;
+ TargetMachine &TM;
+ bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
+ MachineBasicBlock &MBB) const;
+public:
+ explicit AMDGPUInstrInfo(TargetMachine &tm);
+
+ virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
+
+ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &DstReg, unsigned &SubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+ unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+ bool hasStoreFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
+
+ MachineInstr *
+ convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const = 0;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+protected:
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const;
+public:
+ bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+ bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const;
+ bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode *> &NewNodes) const;
+ unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex = 0) const;
+ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool isPredicated(const MachineInstr *MI) const;
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+ bool isPredicable(MachineInstr *MI) const;
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ // Helper functions that check the opcode for status information
+ bool isLoadInst(llvm::MachineInstr *MI) const;
+ bool isExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isSExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isZExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isAExtLoadInst(llvm::MachineInstr *MI) const;
+ bool isStoreInst(llvm::MachineInstr *MI) const;
+ bool isTruncStoreInst(llvm::MachineInstr *MI) const;
+
+ virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const = 0;
+ virtual unsigned getIEQOpcode() const = 0;
+ virtual bool isMov(unsigned opcode) const = 0;
+
+ /// \brief Convert the AMDIL MachineInstr to a supported ISA
+ /// MachineInstr
+ virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const;
+
+};
+
+} // End llvm namespace
+
+#endif // AMDGPUINSTRINFO_H
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
new file mode 100644
index 0000000000..96368e8541
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -0,0 +1,74 @@
+//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains DAG node defintions for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Profiles
+//===----------------------------------------------------------------------===//
+
+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Nodes
+//
+
+// out = ((a << 32) | b) >> c)
+//
+// Can be used to optimize rtol:
+// rotl(a, b) = bitalign(a, a, 32 - b)
+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
+
+// This argument to this node is a dword address.
+def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+
+// out = a - floor(a)
+def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
+
+// out = max(a, b) a and b are floats
+def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are signed ints
+def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are unsigned ints
+def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are floats
+def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a snd b are signed ints
+def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are unsigned ints
+def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// urecip - This operation is a helper for integer division, it returns the
+// result of 1 / a as a fractional unsigned integer.
+// out = (2^32 / a) + e
+// e is rounding error
+def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
+
+def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
new file mode 100644
index 0000000000..e634d20b61
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -0,0 +1,190 @@
+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction defs that are common to all hw codegen
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
+ field bits<16> AMDILOp = 0;
+ field bits<3> Gen = 0;
+
+ let Namespace = "AMDGPU";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let Itinerary = NullALU;
+ let TSFlags{42-40} = Gen;
+ let TSFlags{63-48} = AMDILOp;
+}
+
+class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
+ : AMDGPUInst<outs, ins, asm, pattern> {
+
+ field bits<32> Inst = 0xffffffff;
+
+}
+
+def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
+
+def COND_EQ : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOEQ: case ISD::SETUEQ:
+ case ISD::SETEQ: return true;}}}]
+>;
+
+def COND_NE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETONE: case ISD::SETUNE:
+ case ISD::SETNE: return true;}}}]
+>;
+def COND_GT : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOGT: case ISD::SETUGT:
+ case ISD::SETGT: return true;}}}]
+>;
+
+def COND_GE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOGE: case ISD::SETUGE:
+ case ISD::SETGE: return true;}}}]
+>;
+
+def COND_LT : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOLT: case ISD::SETULT:
+ case ISD::SETLT: return true;}}}]
+>;
+
+def COND_LE : PatLeaf <
+ (cond),
+ [{switch(N->get()){{default: return false;
+ case ISD::SETOLE: case ISD::SETULE:
+ case ISD::SETLE: return true;}}}]
+>;
+
+//===----------------------------------------------------------------------===//
+// Load/Store Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+class Constants {
+int TWO_PI = 0x40c90fdb;
+int PI = 0x40490fdb;
+int TWO_PI_INV = 0x3e22f983;
+}
+def CONST : Constants;
+
+def FP_ZERO : PatLeaf <
+ (fpimm),
+ [{return N->getValueAPF().isZero();}]
+>;
+
+def FP_ONE : PatLeaf <
+ (fpimm),
+ [{return N->isExactlyValue(1.0);}]
+>;
+
+let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
+
+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "CLAMP $dst, $src0",
+ [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+>;
+
+class FABS <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "FABS $dst, $src0",
+ [(set rc:$dst, (fabs rc:$src0))]
+>;
+
+class FNEG <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "FNEG $dst, $src0",
+ [(set rc:$dst, (fneg rc:$src0))]
+>;
+
+def SHADER_TYPE : AMDGPUShaderInst <
+ (outs),
+ (ins i32imm:$type),
+ "SHADER_TYPE $type",
+ [(int_AMDGPU_shader_type imm:$type)]
+>;
+
+} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
+
+/* Generic helper patterns for intrinsics */
+/* -------------------------------------- */
+
+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
+ RegisterClass rc> : Pat <
+ (fpow rc:$src0, rc:$src1),
+ (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
+>;
+
+/* Other helper patterns */
+/* --------------------- */
+
+/* Extract element pattern */
+class Extract_Element <ValueType sub_type, ValueType vec_type,
+ RegisterClass vec_class, int sub_idx,
+ SubRegIndex sub_reg>: Pat<
+ (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
+ (EXTRACT_SUBREG vec_class:$src, sub_reg)
+>;
+
+/* Insert element pattern */
+class Insert_Element <ValueType elem_type, ValueType vec_type,
+ RegisterClass elem_class, RegisterClass vec_class,
+ int sub_idx, SubRegIndex sub_reg> : Pat <
+
+ (vec_type (vector_insert (vec_type vec_class:$vec),
+ (elem_type elem_class:$elem), sub_idx)),
+ (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
+>;
+
+// Vector Build pattern
+class Vector_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
+ (elemType elemClass:$z), (elemType elemClass:$w))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
+ elemClass:$z, sel_z), elemClass:$w, sel_w)
+>;
+
+// bitconvert pattern
+class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
+ (dt (bitconvert (st rc:$src0))),
+ (dt rc:$src0)
+>;
+
+class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
+ (vt (AMDGPUdwordaddr (vt rc:$addr))),
+ (vt rc:$addr)
+>;
+
+include "R600Instructions.td"
+
+include "SIInstrInfo.td"
+
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
new file mode 100644
index 0000000000..2ba2d4b90d
--- /dev/null
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -0,0 +1,62 @@
+//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines intrinsics that are used by all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "AMDGPU", isTarget = 1 in {
+
+ def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
+ def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+ def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
+ def int_AMDGPU_kilp : Intrinsic<[], [], []>;
+ def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+
+ def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
+}
+
+let TargetPrefix = "TGSI", isTarget = 1 in {
+
+ def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
+}
+
+include "SIIntrinsics.td"
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
new file mode 100644
index 0000000000..1dc1c657df
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -0,0 +1,83 @@
+//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUMCInstLower.h"
+#include "AMDGPUAsmPrinter.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx):
+ Ctx(ctx)
+{ }
+
+void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_FPImmediate: {
+ const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
+ assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
+ "Only floating point immediates are supported at the moment.");
+ MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_Register:
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ }
+ OutMI.addOperand(MCOp);
+ }
+}
+
+void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ AMDGPUMCInstLower MCInstLowering(OutContext);
+
+ if (MI->isBundle()) {
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_instr_iterator I = MI;
+ ++I;
+ while (I != MBB->end() && I->isInsideBundle()) {
+ MCInst MCBundleInst;
+ const MachineInstr *BundledInst = I;
+ MCInstLowering.lower(BundledInst, MCBundleInst);
+ OutStreamer.EmitInstruction(MCBundleInst);
+ ++I;
+ }
+ } else {
+ MCInst TmpInst;
+ MCInstLowering.lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+}
diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h
new file mode 100644
index 0000000000..d7d538e925
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMCInstLower.h
@@ -0,0 +1,34 @@
+//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_MCINSTLOWER_H
+#define AMDGPU_MCINSTLOWER_H
+
+namespace llvm {
+
+class MCInst;
+class MCContext;
+class MachineInstr;
+
+class AMDGPUMCInstLower {
+
+ MCContext &Ctx;
+
+public:
+ AMDGPUMCInstLower(MCContext &ctx);
+
+ /// \brief Lower a MachineInstr to an MCInst
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+};
+
+} // End namespace llvm
+
+#endif //AMDGPU_MCINSTLOWER_H
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
new file mode 100644
index 0000000000..eeafec898d
--- /dev/null
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -0,0 +1,51 @@
+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPUGenRegisterInfo(0),
+ TM(tm),
+ TII(tii)
+ { }
+
+//===----------------------------------------------------------------------===//
+// Function handling callbacks - Functions are a seldom used feature of GPUS, so
+// they are not supported at this time.
+//===----------------------------------------------------------------------===//
+
+const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
+
+const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ return &CalleeSavedReg;
+}
+
+void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj,
+ RegScavenger *RS) const {
+ assert(!"Subroutines not supported yet");
+}
+
+unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ assert(!"Subroutines not supported yet");
+ return 0;
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "AMDGPUGenRegisterInfo.inc"
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
new file mode 100644
index 0000000000..76ee7ae06a
--- /dev/null
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -0,0 +1,63 @@
+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
+/// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUREGISTERINFO_H
+#define AMDGPUREGISTERINFO_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class TargetInstrInfo;
+
+struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
+ TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ static const uint16_t CalleeSavedReg;
+
+ AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+ assert(!"Unimplemented"); return BitVector();
+ }
+
+ /// \param RC is an AMDIL reg class.
+ ///
+ /// \returns The ISA reg class that is equivalent to \p RC.
+ virtual const TargetRegisterClass * getISARegClass(
+ const TargetRegisterClass * RC) const {
+ assert(!"Unimplemented"); return NULL;
+ }
+
+ virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
+ assert(!"Unimplemented"); return NULL;
+ }
+
+ const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ RegScavenger *RS) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+};
+
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H
diff --git a/lib/Target/R600/AMDGPURegisterInfo.td b/lib/Target/R600/AMDGPURegisterInfo.td
new file mode 100644
index 0000000000..8181e023aa
--- /dev/null
+++ b/lib/Target/R600/AMDGPURegisterInfo.td
@@ -0,0 +1,22 @@
+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tablegen register definitions common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AMDGPU" in {
+ def sel_x : SubRegIndex;
+ def sel_y : SubRegIndex;
+ def sel_z : SubRegIndex;
+ def sel_w : SubRegIndex;
+}
+
+include "R600RegisterInfo.td"
+include "SIRegisterInfo.td"
diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
new file mode 100644
index 0000000000..8295efdd4b
--- /dev/null
+++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
@@ -0,0 +1,714 @@
+//===-- AMDGPUStructurizeCFG.cpp - ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// The pass implemented in this file transforms the programs control flow
+/// graph into a form that's suitable for code generation on hardware that
+/// implements control flow by execution masking. This currently includes all
+/// AMD GPUs but may as well be useful for other types of hardware.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+namespace {
+
+// Definition of the complex types used in this pass.
+
+typedef std::pair<BasicBlock *, Value *> BBValuePair;
+typedef ArrayRef<BasicBlock*> BBVecRef;
+
+typedef SmallVector<RegionNode*, 8> RNVector;
+typedef SmallVector<BasicBlock*, 8> BBVector;
+typedef SmallVector<BBValuePair, 2> BBValueVector;
+
+typedef DenseMap<PHINode *, BBValueVector> PhiMap;
+typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
+typedef DenseMap<BasicBlock *, Value *> BBPredicates;
+typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
+typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
+
+// The name for newly created blocks.
+
+static const char *FlowBlockName = "Flow";
+
+/// @brief Transforms the control flow graph on one single entry/exit region
+/// at a time.
+///
+/// After the transform all "If"/"Then"/"Else" style control flow looks like
+/// this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 |
+/// | /
+/// |/
+/// 3
+/// || Where:
+/// | | 1 = "If" block, calculates the condition
+/// 4 | 2 = "Then" subregion, runs if the condition is true
+/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
+/// |/ 4 = "Else" optional subregion, runs if the condition is false
+/// 5 5 = "End" block, also rejoins the control flow
+/// \endverbatim
+///
+/// Control flow is expressed as a branch where the true exit goes into the
+/// "Then"/"Else" region, while the false exit skips the region
+/// The condition for the optional "Else" region is expressed as a PHI node.
+/// The incomming values of the PHI node are true for the "If" edge and false
+/// for the "Then" edge.
+///
+/// Additionally to that even complicated loops look like this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 ^ Where:
+/// | / 1 = "Entry" block
+/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
+/// 3 3 = "Flow" block, with back edge to entry block
+/// |
+/// \endverbatim
+///
+/// The back edge of the "Flow" block is always on the false side of the branch
+/// while the true side continues the general flow. So the loop condition
+/// consist of a network of PHI nodes where the true incoming values expresses
+/// breaks and the false values expresses continue states.
+class AMDGPUStructurizeCFG : public RegionPass {
+
+ static char ID;
+
+ Type *Boolean;
+ ConstantInt *BoolTrue;
+ ConstantInt *BoolFalse;
+ UndefValue *BoolUndef;
+
+ Function *Func;
+ Region *ParentRegion;
+
+ DominatorTree *DT;
+
+ RNVector Order;
+ VisitedMap Visited;
+ PredMap Predicates;
+ BBPhiMap DeletedPhis;
+ BBVector FlowsInserted;
+
+ BasicBlock *LoopStart;
+ BasicBlock *LoopEnd;
+ BBPredicates LoopPred;
+
+ void orderNodes();
+
+ void buildPredicate(BranchInst *Term, unsigned Idx,
+ BBPredicates &Pred, bool Invert);
+
+ void analyzeBlock(BasicBlock *BB);
+
+ void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx);
+
+ void collectInfos();
+
+ bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
+
+ void killTerminator(BasicBlock *BB);
+
+ RegionNode *skipChained(RegionNode *Node);
+
+ void delPhiValues(BasicBlock *From, BasicBlock *To);
+
+ void addPhiValues(BasicBlock *From, BasicBlock *To);
+
+ BasicBlock *getNextFlow(BasicBlock *Prev);
+
+ bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
+
+ BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node);
+
+ void createFlow();
+
+ void insertConditions();
+
+ void rebuildSSA();
+
+public:
+ AMDGPUStructurizeCFG():
+ RegionPass(ID) {
+
+ initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool doInitialization(Region *R, RGPassManager &RGM);
+
+ virtual bool runOnRegion(Region *R, RGPassManager &RGM);
+
+ virtual const char *getPassName() const {
+ return "AMDGPU simplify control flow";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ RegionPass::getAnalysisUsage(AU);
+ }
+
+};
+
+} // end anonymous namespace
+
+char AMDGPUStructurizeCFG::ID = 0;
+
+/// \brief Initialize the types and constants used in the pass
+bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
+ LLVMContext &Context = R->getEntry()->getContext();
+
+ Boolean = Type::getInt1Ty(Context);
+ BoolTrue = ConstantInt::getTrue(Context);
+ BoolFalse = ConstantInt::getFalse(Context);
+ BoolUndef = UndefValue::get(Boolean);
+
+ return false;
+}
+
+/// \brief Build up the general order of nodes
+void AMDGPUStructurizeCFG::orderNodes() {
+ scc_iterator<Region *> I = scc_begin(ParentRegion),
+ E = scc_end(ParentRegion);
+ for (Order.clear(); I != E; ++I) {
+ std::vector<RegionNode *> &Nodes = *I;
+ Order.append(Nodes.begin(), Nodes.end());
+ }
+}
+
+/// \brief Build blocks and loop predicates
+void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx,
+ BBPredicates &Pred, bool Invert) {
+ Value *True = Invert ? BoolFalse : BoolTrue;
+ Value *False = Invert ? BoolTrue : BoolFalse;
+
+ RegionInfo *RI = ParentRegion->getRegionInfo();
+ BasicBlock *BB = Term->getParent();
+
+ // Handle the case where multiple regions start at the same block
+ Region *R = BB != ParentRegion->getEntry() ?
+ RI->getRegionFor(BB) : ParentRegion;
+
+ if (R == ParentRegion) {
+ // It's a top level block in our region
+ Value *Cond = True;
+ if (Term->isConditional()) {
+ BasicBlock *Other = Term->getSuccessor(!Idx);
+
+ if (Visited.count(Other)) {
+ if (!Pred.count(Other))
+ Pred[Other] = False;
+
+ if (!Pred.count(BB))
+ Pred[BB] = True;
+ return;
+ }
+ Cond = Term->getCondition();
+
+ if (Idx != Invert)
+ Cond = BinaryOperator::CreateNot(Cond, "", Term);
+ }
+
+ Pred[BB] = Cond;
+
+ } else if (ParentRegion->contains(R)) {
+ // It's a block in a sub region
+ while(R->getParent() != ParentRegion)
+ R = R->getParent();
+
+ Pred[R->getEntry()] = True;
+
+ } else {
+ // It's a branch from outside into our parent region
+ Pred[BB] = True;
+ }
+}
+
+/// \brief Analyze the successors of each block and build up predicates
+void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ BBPredicates &Pred = Predicates[BB];
+
+ for (; PI != PE; ++PI) {
+ BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
+
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = Term->getSuccessor(i);
+ if (Succ != BB)
+ continue;
+ buildPredicate(Term, i, Pred, false);
+ }
+ }
+}
+
+/// \brief Analyze the conditions leading to loop to a previous block
+void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) {
+ BranchInst *Term = cast<BranchInst>(BB->getTerminator());
+
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = Term->getSuccessor(i);
+
+ // Ignore it if it's not a back edge
+ if (!Visited.count(Succ))
+ continue;
+
+ buildPredicate(Term, i, LoopPred, true);
+
+ LoopEnd = BB;
+ if (Visited[Succ] < LoopIdx) {
+ LoopIdx = Visited[Succ];
+ LoopStart = Succ;
+ }
+ }
+}
+
+/// \brief Collect various loop and predicate infos
+void AMDGPUStructurizeCFG::collectInfos() {
+ unsigned Number = 0, LoopIdx = ~0;
+
+ // Reset predicate
+ Predicates.clear();
+
+ // and loop infos
+ LoopStart = LoopEnd = 0;
+ LoopPred.clear();
+
+ RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
+ for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
+
+ // Analyze all the conditions leading to a node
+ analyzeBlock((*OI)->getEntry());
+
+ if ((*OI)->isSubRegion())
+ continue;
+
+ // Find the first/last loop nodes and loop predicates
+ analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx);
+ }
+}
+
+/// \brief Does A dominate all the predicates of B ?
+bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
+ BBPredicates &Preds = Predicates[B];
+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+
+ if (!DT->dominates(A, PI->first))
+ return false;
+ }
+ return true;
+}
+
+/// \brief Remove phi values from all successors and the remove the terminator.
+void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
+ TerminatorInst *Term = BB->getTerminator();
+ if (!Term)
+ return;
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+
+ delPhiValues(BB, *SI);
+ }
+
+ Term->eraseFromParent();
+}
+
+/// First: Skip forward to the first region node that either isn't a subregion or not
+/// dominating it's exit, remove all the skipped nodes from the node order.
+///
+/// Second: Handle the first successor directly if the resulting nodes successor
+/// predicates are still dominated by the original entry
+RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
+ BasicBlock *Entry = Node->getEntry();
+
+ // Skip forward as long as it is just a linear flow
+ while (true) {
+ BasicBlock *Entry = Node->getEntry();
+ BasicBlock *Exit;
+
+ if (Node->isSubRegion()) {
+ Exit = Node->getNodeAs<Region>()->getExit();
+ } else {
+ TerminatorInst *Term = Entry->getTerminator();
+ if (Term->getNumSuccessors() != 1)
+ break;
+ Exit = Term->getSuccessor(0);
+ }
+
+ // It's a back edge, break here so we can insert a loop node
+ if (!Visited.count(Exit))
+ return Node;
+
+ // More than node edges are pointing to exit
+ if (!DT->dominates(Entry, Exit))
+ return Node;
+
+ RegionNode *Next = ParentRegion->getNode(Exit);
+ RNVector::iterator I = std::find(Order.begin(), Order.end(), Next);
+ assert(I != Order.end());
+
+ Visited.erase(Next->getEntry());
+ Order.erase(I);
+ Node = Next;
+ }
+
+ BasicBlock *BB = Node->getEntry();
+ TerminatorInst *Term = BB->getTerminator();
+ if (Term->getNumSuccessors() != 2)
+ return Node;
+
+ // Our node has exactly two succesors, check if we can handle
+ // any of them directly
+ BasicBlock *Succ = Term->getSuccessor(0);
+ if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) {
+ Succ = Term->getSuccessor(1);
+ if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ))
+ return Node;
+ } else {
+ BasicBlock *Succ2 = Term->getSuccessor(1);
+ if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] &&
+ dominatesPredicates(Entry, Succ2))
+ Succ = Succ2;
+ }
+
+ RegionNode *Next = ParentRegion->getNode(Succ);
+ RNVector::iterator E = Order.end();
+ RNVector::iterator I = std::find(Order.begin(), E, Next);
+ assert(I != E);
+
+ killTerminator(BB);
+ FlowsInserted.push_back(BB);
+ Visited.erase(Succ);
+ Order.erase(I);
+ return ParentRegion->getNode(wireFlowBlock(BB, Next));
+}
+
+/// \brief Remove all PHI values coming from "From" into "To" and remember
+/// them in DeletedPhis
+void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
+ PhiMap &Map = DeletedPhis[To];
+ for (BasicBlock::iterator I = To->begin(), E = To->end();
+ I != E && isa<PHINode>(*I);) {
+
+ PHINode &Phi = cast<PHINode>(*I++);
+ while (Phi.getBasicBlockIndex(From) != -1) {
+ Value *Deleted = Phi.removeIncomingValue(From, false);
+ Map[&Phi].push_back(std::make_pair(From, Deleted));
+ }
+ }
+}
+
+/// \brief Add the PHI values back once we knew the new predecessor
+void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
+ if (!DeletedPhis.count(To))
+ return;
+
+ PhiMap &Map = DeletedPhis[To];
+ SSAUpdater Updater;
+
+ for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
+
+ PHINode *Phi = I->first;
+ Updater.Initialize(Phi->getType(), "");
+ BasicBlock *Fallback = To;
+ bool HaveFallback = false;
+
+ for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
+ VI != VE; ++VI) {
+
+ Updater.AddAvailableValue(VI->first, VI->second);
+ BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
+ if (Dom == VI->first)
+ HaveFallback = true;
+ else if (Dom != Fallback)
+ HaveFallback = false;
+ Fallback = Dom;
+ }
+ if (!HaveFallback) {
+ Value *Undef = UndefValue::get(Phi->getType());
+ Updater.AddAvailableValue(Fallback, Undef);
+ }
+
+ Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
+ }
+ DeletedPhis.erase(To);
+}
+
+/// \brief Create a new flow node and update dominator tree and region info
+BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
+ LLVMContext &Context = Func->getContext();
+ BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
+ Order.back()->getEntry();
+ BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
+ Func, Insert);
+ DT->addNewBlock(Flow, Prev);
+ ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
+ FlowsInserted.push_back(Flow);
+ return Flow;
+}
+
+/// \brief Can we predict that this node will always be called?
+bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev,
+ BasicBlock *Node) {
+ BBPredicates &Preds = Predicates[Node];
+ bool Dominated = false;
+
+ for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+
+ if (I->second != BoolTrue)
+ return false;
+
+ if (!Dominated && DT->dominates(I->first, Prev))
+ Dominated = true;
+ }
+ return Dominated;
+}
+
+/// \brief Wire up the new control flow by inserting or updating the branch
+/// instructions at node exits
+BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
+ RegionNode *Node) {
+ BasicBlock *Entry = Node->getEntry();
+
+ if (LoopStart == Entry) {
+ LoopStart = Prev;
+ LoopPred[Prev] = BoolTrue;
+ }
+
+ // Wire it up temporary, skipChained may recurse into us
+ BranchInst::Create(Entry, Prev);
+ DT->changeImmediateDominator(Entry, Prev);
+ addPhiValues(Prev, Entry);
+
+ Node = skipChained(Node);
+
+ BasicBlock *Next = getNextFlow(Prev);
+ if (!isPredictableTrue(Prev, Entry)) {
+ // Let Prev point to entry and next block
+ Prev->getTerminator()->eraseFromParent();
+ BranchInst::Create(Entry, Next, BoolUndef, Prev);
+ } else {
+ DT->changeImmediateDominator(Next, Entry);
+ }
+
+ // Let node exit(s) point to next block
+ if (Node->isSubRegion()) {
+ Region *SubRegion = Node->getNodeAs<Region>();
+ BasicBlock *Exit = SubRegion->getExit();
+
+ // Find all the edges from the sub region to the exit
+ BBVector ToDo;
+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
+ if (SubRegion->contains(*I))
+ ToDo.push_back(*I);
+ }
+
+ // Modify the edges to point to the new flow block
+ for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) {
+ delPhiValues(*I, Exit);
+ TerminatorInst *Term = (*I)->getTerminator();
+ Term->replaceUsesOfWith(Exit, Next);
+ }
+
+ // Update the region info
+ SubRegion->replaceExit(Next);
+
+ } else {
+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+ killTerminator(BB);
+ BranchInst::Create(Next, BB);
+
+ if (BB == LoopEnd)
+ LoopEnd = 0;
+ }
+
+ return Next;
+}
+
+/// Destroy node order and visited map, build up flow order instead.
+/// After this function control flow looks like it should be, but
+/// branches only have undefined conditions.
+void AMDGPUStructurizeCFG::createFlow() {
+ DeletedPhis.clear();
+
+ BasicBlock *Prev = Order.pop_back_val()->getEntry();
+ assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
+ Visited.erase(Prev);
+
+ if (LoopStart == Prev) {
+ // Loop starts at entry, split entry so that we can predicate it
+ BasicBlock::iterator Insert = Prev->getFirstInsertionPt();
+ BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName);
+ DT->addNewBlock(Split, Prev);
+ ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
+ Predicates[Split] = Predicates[Prev];
+ Order.push_back(ParentRegion->getBBNode(Split));
+ LoopPred[Prev] = BoolTrue;
+
+ } else if (LoopStart == Order.back()->getEntry()) {
+ // Loop starts behind entry, split entry so that we can jump to it
+ Instruction *Term = Prev->getTerminator();
+ BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName);
+ DT->addNewBlock(Split, Prev);
+ ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
+ Prev = Split;
+ }
+
+ killTerminator(Prev);
+ FlowsInserted.clear();
+ FlowsInserted.push_back(Prev);
+
+ while (!Order.empty()) {
+ RegionNode *Node = Order.pop_back_val();
+ Visited.erase(Node->getEntry());
+ Prev = wireFlowBlock(Prev, Node);
+ if (LoopStart && !LoopEnd) {
+ // Create an extra loop end node
+ LoopEnd = Prev;
+ Prev = getNextFlow(LoopEnd);
+ BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd);
+ addPhiValues(LoopEnd, LoopStart);
+ }
+ }
+
+ BasicBlock *Exit = ParentRegion->getExit();
+ BranchInst::Create(Exit, Prev);
+ addPhiValues(Prev, Exit);
+ if (DT->dominates(ParentRegion->getEntry(), Exit))
+ DT->changeImmediateDominator(Exit, Prev);
+
+ if (LoopStart && LoopEnd) {
+ BBVector::iterator FI = std::find(FlowsInserted.begin(),
+ FlowsInserted.end(),
+ LoopStart);
+ for (; *FI != LoopEnd; ++FI) {
+ addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
+ }
+ }
+
+ assert(Order.empty());
+ assert(Visited.empty());
+ assert(DeletedPhis.empty());
+}
+
+/// \brief Insert the missing branch conditions
+void AMDGPUStructurizeCFG::insertConditions() {
+ SSAUpdater PhiInserter;
+
+ for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end();
+ FI != FE; ++FI) {
+
+ BranchInst *Term = cast<BranchInst>((*FI)->getTerminator());
+ if (Term->isUnconditional())
+ continue;
+
+ PhiInserter.Initialize(Boolean, "");
+ PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse);
+
+ BasicBlock *Succ = Term->getSuccessor(0);
+ BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ];
+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+
+ PhiInserter.AddAvailableValue(PI->first, PI->second);
+ }
+
+ Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI));
+ }
+}
+
+/// Handle a rare case where the disintegrated nodes instructions
+/// no longer dominate all their uses. Not sure if this is really nessasary
+void AMDGPUStructurizeCFG::rebuildSSA() {
+ SSAUpdater Updater;
+ for (Region::block_iterator I = ParentRegion->block_begin(),
+ E = ParentRegion->block_end();
+ I != E; ++I) {
+
+ BasicBlock *BB = *I;
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+ II != IE; ++II) {
+
+ bool Initialized = false;
+ for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
+
+ Next = I->getNext();
+
+ Instruction *User = cast<Instruction>(I->getUser());
+ if (User->getParent() == BB) {
+ continue;
+
+ } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (UserPN->getIncomingBlock(*I) == BB)
+ continue;
+ }
+
+ if (DT->dominates(II, User))
+ continue;
+
+ if (!Initialized) {
+ Value *Undef = UndefValue::get(II->getType());
+ Updater.Initialize(II->getType(), "");
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+ Updater.AddAvailableValue(BB, II);
+ Initialized = true;
+ }
+ Updater.RewriteUseAfterInsertions(*I);
+ }
+ }
+ }
+}
+
+/// \brief Run the transformation for each region found
+bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
+ if (R->isTopLevelRegion())
+ return false;
+
+ Func = R->getEntry()->getParent();
+ ParentRegion = R;
+
+ DT = &getAnalysis<DominatorTree>();
+
+ orderNodes();
+ collectInfos();
+ createFlow();
+ insertConditions();
+ rebuildSSA();
+
+ Order.clear();
+ Visited.clear();
+ Predicates.clear();
+ DeletedPhis.clear();
+ FlowsInserted.clear();
+
+ return true;
+}
+
+/// \brief Create the pass
+Pass *llvm::createAMDGPUStructurizeCFGPass() {
+ return new AMDGPUStructurizeCFG();
+}
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
new file mode 100644
index 0000000000..0f356a1c3f
--- /dev/null
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -0,0 +1,87 @@
+//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AMDGPUGenSubtargetInfo.inc"
+
+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
+ AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
+ InstrItins = getInstrItineraryForCPU(CPU);
+
+ memset(CapsOverride, 0, sizeof(*CapsOverride)
+ * AMDGPUDeviceInfo::MaxNumberCapabilities);
+ // Default card
+ StringRef GPU = CPU;
+ Is64bit = false;
+ DefaultSize[0] = 64;
+ DefaultSize[1] = 1;
+ DefaultSize[2] = 1;
+ ParseSubtargetFeatures(GPU, FS);
+ DevName = GPU;
+ Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
+}
+
+AMDGPUSubtarget::~AMDGPUSubtarget() {
+ delete Device;
+}
+
+bool
+AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
+ assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
+ "Caps index is out of bounds!");
+ return CapsOverride[caps];
+}
+bool
+AMDGPUSubtarget::is64bit() const {
+ return Is64bit;
+}
+bool
+AMDGPUSubtarget::isTargetELF() const {
+ return false;
+}
+size_t
+AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
+ if (dim > 3) {
+ return 1;
+ } else {
+ return DefaultSize[dim];
+ }
+}
+
+std::string
+AMDGPUSubtarget::getDataLayout() const {
+ if (!Device) {
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
+ }
+ return Device->getDataLayout();
+}
+
+std::string
+AMDGPUSubtarget::getDeviceName() const {
+ return DevName;
+}
+const AMDGPUDevice *
+AMDGPUSubtarget::device() const {
+ return Device;
+}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
new file mode 100644
index 0000000000..1973fc6d54
--- /dev/null
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -0,0 +1,65 @@
+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUSUBTARGET_H
+#define AMDGPUSUBTARGET_H
+#include "AMDILDevice.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#define MAX_CB_SIZE (1 << 16)
+
+namespace llvm {
+
+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
+private:
+ bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
+ const AMDGPUDevice *Device;
+ size_t DefaultSize[3];
+ std::string DevName;
+ bool Is64bit;
+ bool Is32on64bit;
+ bool DumpCode;
+ bool R600ALUInst;
+
+ InstrItineraryData InstrItins;
+
+public:
+ AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+ virtual ~AMDGPUSubtarget();
+
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ virtual void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool isOverride(AMDGPUDeviceInfo::Caps) const;
+ bool is64bit() const;
+
+ // Helper functions to simplify if statements
+ bool isTargetELF() const;
+ const AMDGPUDevice* device() const;
+ std::string getDataLayout() const;
+ std::string getDeviceName() const;
+ virtual size_t getDefaultSize(uint32_t dim) const;
+ bool dumpCode() const { return DumpCode; }
+ bool r600ALUEncoding() const { return R600ALUInst; }
+
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUSUBTARGET_H
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
new file mode 100644
index 0000000000..d09dc2efff
--- /dev/null
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -0,0 +1,142 @@
+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The AMDGPU target machine contains all of the hardware specific
+/// information needed to emit code for R600 and SI GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include <llvm/CodeGen/Passes.h>
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeR600Target() {
+ // Register the target
+ RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
+}
+
+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OptLevel
+)
+:
+ LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
+ Subtarget(TT, CPU, FS),
+ Layout(Subtarget.getDataLayout()),
+ FrameLowering(TargetFrameLowering::StackGrowsUp,
+ Subtarget.device()->getStackAlignment(), 0),
+ IntrinsicInfo(this),
+ InstrItins(&Subtarget.getInstrItineraryData()) {
+ // TLInfo uses InstrInfo so it must be initialized after.
+ if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ InstrInfo = new R600InstrInfo(*this);
+ TLInfo = new R600TargetLowering(*this);
+ } else {
+ InstrInfo = new SIInstrInfo(*this);
+ TLInfo = new SITargetLowering(*this);
+ }
+}
+
+AMDGPUTargetMachine::~AMDGPUTargetMachine() {
+}
+
+namespace {
+class AMDGPUPassConfig : public TargetPassConfig {
+public:
+ AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
+ return getTM<AMDGPUTargetMachine>();
+ }
+
+ virtual bool addPreISel();
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // End of anonymous namespace
+
+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new AMDGPUPassConfig(this, PM);
+}
+
+bool
+AMDGPUPassConfig::addPreISel() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createAMDGPUStructurizeCFGPass());
+ addPass(createSIAnnotateControlFlowPass());
+ }
+ return false;
+}
+
+bool AMDGPUPassConfig::addInstSelector() {
+ addPass(createAMDGPUPeepholeOpt(*TM));
+ addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreRegAlloc() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createSIAssignInterpRegsPass(*TM));
+ }
+ addPass(createAMDGPUConvertToISAPass(*TM));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPostRegAlloc() {
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreSched2() {
+
+ addPass(&IfConverterID);
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreEmitPass() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createAMDGPUCFGPreparationPass(*TM));
+ addPass(createAMDGPUCFGStructurizerPass(*TM));
+ addPass(createR600ExpandSpecialInstrsPass(*TM));
+ addPass(&FinalizeMachineBundlesID);
+ } else {
+ addPass(createSILowerLiteralConstantsPass(*TM));
+ addPass(createSILowerControlFlowPass(*TM));
+ }
+
+ return false;
+}
+
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
new file mode 100644
index 0000000000..91f9a83a29
--- /dev/null
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -0,0 +1,70 @@
+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_TARGET_MACHINE_H
+#define AMDGPU_TARGET_MACHINE_H
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILFrameLowering.h"
+#include "AMDILIntrinsicInfo.h"
+#include "R600ISelLowering.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
+
+namespace llvm {
+
+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
+
+class AMDGPUTargetMachine : public LLVMTargetMachine {
+
+ AMDGPUSubtarget Subtarget;
+ const DataLayout Layout;
+ AMDGPUFrameLowering FrameLowering;
+ AMDGPUIntrinsicInfo IntrinsicInfo;
+ const AMDGPUInstrInfo * InstrInfo;
+ AMDGPUTargetLowering * TLInfo;
+ const InstrItineraryData* InstrItins;
+
+public:
+ AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ ~AMDGPUTargetMachine();
+ virtual const AMDGPUFrameLowering* getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
+ return &IntrinsicInfo;
+ }
+ virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
+ virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
+ virtual const AMDGPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+ virtual AMDGPUTargetLowering * getTargetLowering() const {
+ return TLInfo;
+ }
+ virtual const InstrItineraryData* getInstrItineraryData() const {
+ return InstrItins;
+ }
+ virtual const DataLayout* getDataLayout() const { return &Layout; }
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+} // End namespace llvm
+
+#endif // AMDGPU_TARGET_MACHINE_H
diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h
new file mode 100644
index 0000000000..4e577dc234
--- /dev/null
+++ b/lib/Target/R600/AMDIL.h
@@ -0,0 +1,106 @@
+//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// This file contains the entry points for global functions defined in the LLVM
+/// AMDGPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_H
+#define AMDIL_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define ARENA_SEGMENT_RESERVED_UAVS 12
+#define DEFAULT_ARENA_UAV_ID 8
+#define DEFAULT_RAW_UAV_ID 7
+#define GLOBAL_RETURN_RAW_UAV_ID 11
+#define HW_MAX_NUM_CB 8
+#define MAX_NUM_UNIQUE_UAVS 8
+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
+#define OPENCL_MAX_READ_IMAGES 128
+#define OPENCL_MAX_WRITE_IMAGES 8
+#define OPENCL_MAX_SAMPLERS 16
+
+// The next two values can never be zero, as zero is the ID that is
+// used to assert against.
+#define DEFAULT_LDS_ID 1
+#define DEFAULT_GDS_ID 1
+#define DEFAULT_SCRATCH_ID 1
+#define DEFAULT_VEC_SLOTS 8
+
+#define OCL_DEVICE_RV710 0x0001
+#define OCL_DEVICE_RV730 0x0002
+#define OCL_DEVICE_RV770 0x0004
+#define OCL_DEVICE_CEDAR 0x0008
+#define OCL_DEVICE_REDWOOD 0x0010
+#define OCL_DEVICE_JUNIPER 0x0020
+#define OCL_DEVICE_CYPRESS 0x0040
+#define OCL_DEVICE_CAICOS 0x0080
+#define OCL_DEVICE_TURKS 0x0100
+#define OCL_DEVICE_BARTS 0x0200
+#define OCL_DEVICE_CAYMAN 0x0400
+#define OCL_DEVICE_ALL 0x3FFF
+
+/// The number of function ID's that are reserved for
+/// internal compiler usage.
+const unsigned int RESERVED_FUNCS = 1024;
+
+namespace llvm {
+class AMDGPUInstrPrinter;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+// Instruction selection passes.
+FunctionPass*
+ createAMDGPUISelDag(TargetMachine &TM);
+FunctionPass*
+ createAMDGPUPeepholeOpt(TargetMachine &TM);
+
+// Pre emit passes.
+FunctionPass*
+ createAMDGPUCFGPreparationPass(TargetMachine &TM);
+FunctionPass*
+ createAMDGPUCFGStructurizerPass(TargetMachine &TM);
+
+extern Target TheAMDGPUTarget;
+} // end namespace llvm;
+
+// Include device information enumerations
+#include "AMDILDeviceInfo.h"
+
+namespace llvm {
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a seperate piece of memory that is unique from other
+/// memory locations.
+namespace AMDGPUAS {
+enum AddressSpaces {
+ PRIVATE_ADDRESS = 0, ///< Address space for private memory.
+ GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
+ CONSTANT_ADDRESS = 2, ///< Address space for constant memory.
+ LOCAL_ADDRESS = 3, ///< Address space for local memory.
+ REGION_ADDRESS = 4, ///< Address space for region memory.
+ ADDRESS_NONE = 5, ///< Address space for unknown memory.
+ PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
+ PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
+ USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
+ LAST_ADDRESS = 9
+};
+
+} // namespace AMDGPUAS
+
+} // end namespace llvm
+#endif // AMDIL_H
diff --git a/lib/Target/R600/AMDIL7XXDevice.cpp b/lib/Target/R600/AMDIL7XXDevice.cpp
new file mode 100644
index 0000000000..ea6ac34f57
--- /dev/null
+++ b/lib/Target/R600/AMDIL7XXDevice.cpp
@@ -0,0 +1,115 @@
+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDIL7XXDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevice.h"
+
+using namespace llvm;
+
+AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
+ setCaps();
+ std::string name = mSTM->getDeviceName();
+ if (name == "rv710") {
+ DeviceFlag = OCL_DEVICE_RV710;
+ } else if (name == "rv730") {
+ DeviceFlag = OCL_DEVICE_RV730;
+ } else {
+ DeviceFlag = OCL_DEVICE_RV770;
+ }
+}
+
+AMDGPU7XXDevice::~AMDGPU7XXDevice() {
+}
+
+void AMDGPU7XXDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+}
+
+size_t AMDGPU7XXDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_700;
+ }
+ return 0;
+}
+
+size_t AMDGPU7XXDevice::getWavefrontSize() const {
+ return AMDGPUDevice::HalfWavefrontSize;
+}
+
+uint32_t AMDGPU7XXDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD4XXX;
+}
+
+uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
+ switch (DeviceID) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case GLOBAL_ID:
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ case ARENA_UAV_ID:
+ break;
+ case LDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ }
+ break;
+ case SCRATCH_ID:
+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ }
+ break;
+ case GDS_ID:
+ assert(0 && "GDS UAV ID is not supported on this chip");
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ }
+ break;
+ };
+
+ return 0;
+}
+
+uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
+ return 1;
+}
+
+AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
+ setCaps();
+}
+
+AMDGPU770Device::~AMDGPU770Device() {
+}
+
+void AMDGPU770Device::setCaps() {
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ }
+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
+ mHWBits.reset(AMDGPUDeviceInfo::LongOps);
+ mSWBits.set(AMDGPUDeviceInfo::LongOps);
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+}
+
+size_t AMDGPU770Device::getWavefrontSize() const {
+ return AMDGPUDevice::WavefrontSize;
+}
+
+AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
+}
+
+AMDGPU710Device::~AMDGPU710Device() {
+}
+
+size_t AMDGPU710Device::getWavefrontSize() const {
+ return AMDGPUDevice::QuarterWavefrontSize;
+}
diff --git a/lib/Target/R600/AMDIL7XXDevice.h b/lib/Target/R600/AMDIL7XXDevice.h
new file mode 100644
index 0000000000..1cf4ca415a
--- /dev/null
+++ b/lib/Target/R600/AMDIL7XXDevice.h
@@ -0,0 +1,72 @@
+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDIL7XXDEVICEIMPL_H
+#define AMDIL7XXDEVICEIMPL_H
+#include "AMDILDevice.h"
+
+namespace llvm {
+class AMDGPUSubtarget;
+
+//===----------------------------------------------------------------------===//
+// 7XX generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+/// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
+///
+/// All 7XX devices are derived from this class. The AMDGPU7XX device will only
+/// support the minimal features that are required to be considered OpenCL 1.0
+/// compliant and nothing more.
+class AMDGPU7XXDevice : public AMDGPUDevice {
+public:
+ AMDGPU7XXDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU7XXDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getResourceID(uint32_t DeviceID) const;
+ virtual uint32_t getMaxNumUAVs() const;
+
+protected:
+ virtual void setCaps();
+};
+
+/// \brief The AMDGPU770Device class represents the RV770 chip and it's
+/// derivative cards.
+///
+/// The difference between this device and the base class is this device device
+/// adds support for double precision and has a larger wavefront size.
+class AMDGPU770Device : public AMDGPU7XXDevice {
+public:
+ AMDGPU770Device(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU770Device();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+};
+
+/// \brief The AMDGPU710Device class derives from the 7XX base class.
+///
+/// This class is a smaller derivative, so we need to overload some of the
+/// functions in order to correctly specify this information.
+class AMDGPU710Device : public AMDGPU7XXDevice {
+public:
+ AMDGPU710Device(AMDGPUSubtarget *ST);
+ virtual ~AMDGPU710Device();
+ virtual size_t getWavefrontSize() const;
+};
+
+} // namespace llvm
+#endif // AMDILDEVICEIMPL_H
diff --git a/lib/Target/R600/AMDILBase.td b/lib/Target/R600/AMDILBase.td
new file mode 100644
index 0000000000..c12cedcf7f
--- /dev/null
+++ b/lib/Target/R600/AMDILBase.td
@@ -0,0 +1,85 @@
+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+// Dummy Instruction itineraries for pseudo instructions
+def ALU_NULL : FuncUnit;
+def NullALU : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// AMDIL Subtarget features.
+//===----------------------------------------------------------------------===//
+def FeatureFP64 : SubtargetFeature<"fp64",
+ "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
+ "true",
+ "Enable 64bit double precision operations">;
+def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
+ "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
+ "true",
+ "Enable byte addressable stores">;
+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
+ "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
+ "true",
+ "Enable duplicate barrier detection(HD5XXX or later).">;
+def FeatureImages : SubtargetFeature<"images",
+ "CapsOverride[AMDGPUDeviceInfo::Images]",
+ "true",
+ "Enable image functions">;
+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
+ "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
+ "true",
+ "Generate multiple UAV code(HD5XXX family or later)">;
+def FeatureMacroDB : SubtargetFeature<"macrodb",
+ "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
+ "true",
+ "Use internal macrodb, instead of macrodb in driver">;
+def FeatureNoAlias : SubtargetFeature<"noalias",
+ "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
+ "true",
+ "assert that all kernel argument pointers are not aliased">;
+def FeatureNoInline : SubtargetFeature<"no-inline",
+ "CapsOverride[AMDGPUDeviceInfo::NoInline]",
+ "true",
+ "specify whether to not inline functions">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+ "Is64bit",
+ "false",
+ "Specify if 64bit addressing should be used.">;
+
+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
+ "Is32on64bit",
+ "false",
+ "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+def FeatureDebug : SubtargetFeature<"debug",
+ "CapsOverride[AMDGPUDeviceInfo::Debug]",
+ "true",
+ "Debug mode is enabled, so disable hardware accelerated address spaces.">;
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+ "DumpCode",
+ "true",
+ "Dump MachineInstrs in the CodeEmitter">;
+
+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
+ "R600ALUInst",
+ "false",
+ "Older version of ALU instructions encoding.">;
+
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+
+include "AMDILRegisterInfo.td"
+include "AMDILInstrInfo.td"
+
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
new file mode 100644
index 0000000000..aa8ab6bd93
--- /dev/null
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -0,0 +1,3049 @@
+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+
+#define DEBUGME 0
+#define DEBUG_TYPE "structcfg"
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDIL.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
+ "matched");
+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
+ "matched");
+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
+ "pattern matched");
+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
+ "pattern matched");
+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
+ "matched");
+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace llvmCFGStruct {
+#define SHOWNEWINSTR(i) \
+ if (DEBUGME) errs() << "New instr: " << *i << "\n"
+
+#define SHOWNEWBLK(b, msg) \
+if (DEBUGME) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ errs() << "\n"; \
+}
+
+#define SHOWBLK_DETAIL(b, msg) \
+if (DEBUGME) { \
+ if (b) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(errs()); \
+ errs() << "\n"; \
+ } \
+}
+
+#define INVALIDSCCNUM -1
+#define INVALIDREGNUM 0
+
+template<class LoopinfoT>
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
+ iterEnd = LoopInfo.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->print(OS, 0);
+ }
+}
+
+template<class NodeT>
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
+ size_t sz = Src.size();
+ for (size_t i = 0; i < sz/2; ++i) {
+ NodeT *t = Src[i];
+ Src[i] = Src[sz - i - 1];
+ Src[sz - i - 1] = t;
+ }
+}
+
+} //end namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+template<class PassT>
+struct CFGStructTraits {
+};
+
+template <class InstrT>
+class BlockInformation {
+public:
+ bool isRetired;
+ int sccNum;
+ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
+ //Instructions defining the corresponding successor.
+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+};
+
+template <class BlockT, class InstrT, class RegiT>
+class LandInformation {
+public:
+ BlockT *landBlk;
+ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
+ //WHILELOOP(thisloop) init before entering
+ //thisloop.
+ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
+ //WHILELOOP(thisloop) init after entering
+ //thisloop.
+ std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
+ //land block, branch cond on this reg.
+ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
+ //endif" after ENDLOOP(thisloop) break
+ //outerLoopOf(thisLoop).
+ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
+ //endif" after ENDLOOP(thisloop) continue on
+ //outerLoopOf(thisLoop).
+ LandInformation() : landBlk(NULL) {}
+};
+
+} //end of namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
+template<class PassT>
+class CFGStructurizer {
+public:
+ typedef enum {
+ Not_SinglePath = 0,
+ SinglePath_InPath = 1,
+ SinglePath_NotInPath = 2
+ } PathToKind;
+
+public:
+ typedef typename PassT::InstructionType InstrT;
+ typedef typename PassT::FunctionType FuncT;
+ typedef typename PassT::DominatortreeType DomTreeT;
+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
+ typedef typename PassT::LoopinfoType LoopInfoT;
+
+ typedef GraphTraits<FuncT *> FuncGTraits;
+ //typedef FuncGTraits::nodes_iterator BlockIterator;
+ typedef typename FuncT::iterator BlockIterator;
+
+ typedef typename FuncGTraits::NodeType BlockT;
+ typedef GraphTraits<BlockT *> BlockGTraits;
+ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
+ //typedef BlockGTraits::succ_iterator InstructionIterator;
+ typedef typename BlockT::iterator InstrIterator;
+
+ typedef CFGStructTraits<PassT> CFGTraits;
+ typedef BlockInformation<InstrT> BlockInfo;
+ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
+
+ typedef int RegiT;
+ typedef typename PassT::LoopType LoopT;
+ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
+ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
+ //landing info for loop break
+ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
+
+public:
+ CFGStructurizer();
+ ~CFGStructurizer();
+
+ /// Perform the CFG structurization
+ bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+
+ /// Perform the CFG preparation
+ bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+
+private:
+ void reversePredicateSetter(typename BlockT::iterator);
+ void orderBlocks();
+ void printOrderedBlocks(llvm::raw_ostream &OS);
+ int patternMatch(BlockT *CurBlock);
+ int patternMatchGroup(BlockT *CurBlock);
+
+ int serialPatternMatch(BlockT *CurBlock);
+ int ifPatternMatch(BlockT *CurBlock);
+ int switchPatternMatch(BlockT *CurBlock);
+ int loopendPatternMatch(BlockT *CurBlock);
+ int loopPatternMatch(BlockT *CurBlock);
+
+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ //int loopWithoutBreak(BlockT *);
+
+ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
+ BlockT *ContBlock, LoopT *contLoop);
+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT **LandBlockPtr);
+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT *LandBlock,
+ bool Detail = false);
+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
+
+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
+ BlockT *TrueBlock, BlockT *FalseBlock,
+ BlockT *LandBlock);
+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
+ BlockT *ExitLandBlock, RegiT SetReg);
+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
+ RegiT SetReg);
+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
+ std::set<BlockT*> &ExitBlockSet,
+ BlockT *ExitLandBlk);
+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
+ BlockTSmallerVector &ExitingBlocks,
+ BlockTSmallerVector &ExitBlocks);
+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
+ void removeUnconditionalBranch(BlockT *SrcBlock);
+ void removeRedundantConditionalBranch(BlockT *SrcBlock);
+ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
+
+ void removeSuccessor(BlockT *SrcBlock);
+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
+
+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
+ InstrIterator InsertPos);
+
+ void recordSccnum(BlockT *SrcBlock, int SCCNum);
+ int getSCCNum(BlockT *srcBlk);
+
+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
+ bool isRetiredBlock(BlockT *SrcBlock);
+ bool isActiveLoophead(BlockT *CurBlock);
+ bool needMigrateBlock(BlockT *Block);
+
+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
+ BlockTSmallerVector &exitBlocks,
+ std::set<BlockT*> &ExitBlockSet);
+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
+ BlockT *getLoopLandBlock(LoopT *LoopRep);
+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
+
+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
+
+ bool hasBackEdge(BlockT *curBlock);
+ unsigned getLoopDepth (LoopT *LoopRep);
+ int countActiveBlock(
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
+ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+
+private:
+ DomTreeT *domTree;
+ PostDomTreeT *postDomTree;
+ LoopInfoT *loopInfo;
+ PassT *passRep;
+ FuncT *funcRep;
+
+ BlockInfoMap blockInfoMap;
+ LoopLandInfoMap loopLandInfoMap;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
+ const AMDGPURegisterInfo *TRI;
+
+}; //template class CFGStructurizer
+
+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
+}
+
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
+ E = blockInfoMap.end(); I != E; ++I) {
+ delete I->second;
+ }
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
+ const AMDGPURegisterInfo * tri) {
+ passRep = &pass;
+ funcRep = &func;
+ TRI = tri;
+
+ bool changed = false;
+
+ //FIXME: if not reducible flow graph, make it so ???
+
+ if (DEBUGME) {
+ errs() << "AMDGPUCFGStructurizer::prepare\n";
+ }
+
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
+
+ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
+ iterEnd = loopInfo->end();
+ iter != iterEnd; ++iter) {
+ LoopT* loopRep = (*iter);
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (exitingBlks.size() == 0) {
+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
+ if (dummyExitBlk != NULL)
+ retBlks.push_back(dummyExitBlk);
+ }
+ }
+
+ // Remove unconditional branch instr.
+ // Add dummy exit block iff there are multiple returns.
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
+ iterBlk != iterEndBlk;
+ ++iterBlk) {
+ BlockT *curBlk = *iterBlk;
+ removeUnconditionalBranch(curBlk);
+ removeRedundantConditionalBranch(curBlk);
+ if (CFGTraits::isReturnBlock(curBlk)) {
+ retBlks.push_back(curBlk);
+ }
+ assert(curBlk->succ_size() <= 2);
+ } //for
+
+ if (retBlks.size() >= 2) {
+ addDummyExitBlock(retBlks);
+ changed = true;
+ }
+
+ return changed;
+} //CFGStructurizer::prepare
+
+template<class PassT>
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
+ const AMDGPURegisterInfo * tri) {
+ passRep = &pass;
+ funcRep = &func;
+ TRI = tri;
+
+ //Assume reducible CFG...
+ if (DEBUGME) {
+ errs() << "AMDGPUCFGStructurizer::run\n";
+ func.viewCFG();
+ }
+
+ domTree = CFGTraits::getDominatorTree(pass);
+ if (DEBUGME) {
+ domTree->print(errs(), (const llvm::Module*)0);
+ }
+
+ postDomTree = CFGTraits::getPostDominatorTree(pass);
+ if (DEBUGME) {
+ postDomTree->print(errs());
+ }
+
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+#ifdef STRESSTEST
+ //Use the worse block ordering to test the algorithm.
+ ReverseVector(orderedBlks);
+#endif
+
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+ int numIter = 0;
+ bool finish = false;
+ BlockT *curBlk;
+ bool makeProgress = false;
+ int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
+ orderedBlks.end());
+
+ do {
+ ++numIter;
+ if (DEBUGME) {
+ errs() << "numIter = " << numIter
+ << ", numRemaintedBlk = " << numRemainedBlk << "\n";
+ }
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin();
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlkEnd = orderedBlks.end();
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ sccBeginIter = iterBlk;
+ BlockT *sccBeginBlk = NULL;
+ int sccNumBlk = 0; // The number of active blocks, init to a
+ // maximum possible number.
+ int sccNumIter; // Number of iteration in this SCC.
+
+ while (iterBlk != iterBlkEnd) {
+ curBlk = *iterBlk;
+
+ if (sccBeginBlk == NULL) {
+ sccBeginIter = iterBlk;
+ sccBeginBlk = curBlk;
+ sccNumIter = 0;
+ sccNumBlk = numRemainedBlk; // Init to maximum possible number.
+ if (DEBUGME) {
+ errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
+ errs() << "\n";
+ }
+ }
+
+ if (!isRetiredBlock(curBlk)) {
+ patternMatch(curBlk);
+ }
+
+ ++iterBlk;
+
+ bool contNextScc = true;
+ if (iterBlk == iterBlkEnd
+ || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+ // Just finish one scc.
+ ++sccNumIter;
+ int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
+ if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
+ if (DEBUGME) {
+ errs() << "Can't reduce SCC " << getSCCNum(curBlk)
+ << ", sccNumIter = " << sccNumIter;
+ errs() << "doesn't make any progress\n";
+ }
+ contNextScc = true;
+ } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
+ sccNumBlk = sccRemainedNumBlk;
+ iterBlk = sccBeginIter;
+ contNextScc = false;
+ if (DEBUGME) {
+ errs() << "repeat processing SCC" << getSCCNum(curBlk)
+ << "sccNumIter = " << sccNumIter << "\n";
+ func.viewCFG();
+ }
+ } else {
+ // Finish the current scc.
+ contNextScc = true;
+ }
+ } else {
+ // Continue on next component in the current scc.
+ contNextScc = false;
+ }
+
+ if (contNextScc) {
+ sccBeginBlk = NULL;
+ }
+ } //while, "one iteration" over the function.
+
+ BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
+ if (entryBlk->succ_size() == 0) {
+ finish = true;
+ if (DEBUGME) {
+ errs() << "Reduce to one block\n";
+ }
+ } else {
+ int newnumRemainedBlk
+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+ // consider cloned blocks ??
+ if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
+ makeProgress = true;
+ numRemainedBlk = newnumRemainedBlk;
+ } else {
+ makeProgress = false;
+ if (DEBUGME) {
+ errs() << "No progress\n";
+ }
+ }
+ }
+ } while (!finish && makeProgress);
+
+ // Misc wrap up to maintain the consistency of the Function representation.
+ CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+
+ // Detach retired Block, release memory.
+ for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
+ iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ if ((*iterMap).second && (*iterMap).second->isRetired) {
+ assert(((*iterMap).first)->getNumber() != -1);
+ if (DEBUGME) {
+ errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
+ }
+ (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
+ }
+ delete (*iterMap).second;
+ }
+ blockInfoMap.clear();
+
+ // clear loopLandInfoMap
+ for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
+ iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ delete (*iterMap).second;
+ }
+ loopLandInfoMap.clear();
+
+ if (DEBUGME) {
+ func.viewCFG();
+ }
+
+ if (!finish) {
+ assert(!"IRREDUCIBL_CF");
+ }
+
+ return true;
+} //CFGStructurizer::run
+
+/// Print the ordered Blocks.
+///
+template<class PassT>
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
+ size_t i = 0;
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
+ iterBlk != iterBlkEnd;
+ ++iterBlk, ++i) {
+ os << "BB" << (*iterBlk)->getNumber();
+ os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+ if (i != 0 && i % 10 == 0) {
+ os << "\n";
+ } else {
+ os << " ";
+ }
+ }
+} //printOrderedBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
+ int sccNum = 0;
+ BlockT *bb;
+ for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
+ sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
+ std::vector<BlockT *> &sccNext = *sccIter;
+ for (typename std::vector<BlockT *>::const_iterator
+ blockIter = sccNext.begin(), blockEnd = sccNext.end();
+ blockIter != blockEnd; ++blockIter) {
+ bb = *blockIter;
+ orderedBlks.push_back(bb);
+ recordSccnum(bb, sccNum);
+ }
+ }
+
+ //walk through all the block in func to check for unreachable
+ for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
+ blockEnd1 = FuncGTraits::nodes_end(funcRep);
+ blockIter1 != blockEnd1; ++blockIter1) {
+ BlockT *bb = &(*blockIter1);
+ sccNum = getSCCNum(bb);
+ if (sccNum == INVALIDSCCNUM) {
+ errs() << "unreachable block BB" << bb->getNumber() << "\n";
+ }
+ }
+} //orderBlocks
+
+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
+ int numMatch = 0;
+ int curMatch;
+
+ if (DEBUGME) {
+ errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
+ }
+
+ while ((curMatch = patternMatchGroup(curBlk)) > 0) {
+ numMatch += curMatch;
+ }
+
+ if (DEBUGME) {
+ errs() << "End patternMatch BB" << curBlk->getNumber()
+ << ", numMatch = " << numMatch << "\n";
+ }
+
+ return numMatch;
+} //patternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
+ int numMatch = 0;
+ numMatch += serialPatternMatch(curBlk);
+ numMatch += ifPatternMatch(curBlk);
+ numMatch += loopendPatternMatch(curBlk);
+ numMatch += loopPatternMatch(curBlk);
+ return numMatch;
+}//patternMatchGroup
+
+template<class PassT>
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 1) {
+ return 0;
+ }
+
+ BlockT *childBlk = *curBlk->succ_begin();
+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+ return 0;
+ }
+
+ mergeSerialBlock(curBlk, childBlk);
+ ++numSerialPatternMatch;
+ return 1;
+} //serialPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
+ //two edges
+ if (curBlk->succ_size() != 2) {
+ return 0;
+ }
+
+ if (hasBackEdge(curBlk)) {
+ return 0;
+ }
+
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
+ if (branchInstr == NULL) {
+ return 0;
+ }
+
+ assert(CFGTraits::isCondBranch(branchInstr));
+
+ BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
+ BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
+ BlockT *landBlk;
+ int cloned = 0;
+
+ // TODO: Simplify
+ if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
+ && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
+ landBlk = *trueBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
+ landBlk = NULL;
+ } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
+ landBlk = falseBlk;
+ falseBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && *falseBlk->succ_begin() == trueBlk) {
+ landBlk = trueBlk;
+ trueBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
+ landBlk = *falseBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
+ landBlk = *trueBlk->succ_begin();
+ } else {
+ return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+ }
+
+ // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+ // new BB created for landBlk==NULL may introduce new challenge to the
+ // reduction process.
+ if (landBlk != NULL &&
+ ((trueBlk && trueBlk->pred_size() > 1)
+ || (falseBlk && falseBlk->pred_size() > 1))) {
+ cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+ }
+
+ if (trueBlk && trueBlk->pred_size() > 1) {
+ trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
+ ++cloned;
+ }
+
+ if (falseBlk && falseBlk->pred_size() > 1) {
+ falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
+ ++cloned;
+ }
+
+ mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+
+ ++numIfPatternMatch;
+
+ numClonedBlock += cloned;
+
+ return 1 + cloned;
+} //ifPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
+ return 0;
+} //switchPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ typename std::vector<LoopT *> nestedLoops;
+ while (loopRep) {
+ nestedLoops.push_back(loopRep);
+ loopRep = loopRep->getParentLoop();
+ }
+
+ if (nestedLoops.size() == 0) {
+ return 0;
+ }
+
+ // Process nested loop outside->inside, so "continue" to a outside loop won't
+ // be mistaken as "break" of the current loop.
+ int num = 0;
+ for (typename std::vector<LoopT *>::reverse_iterator
+ iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
+ iter != iterEnd; ++iter) {
+ loopRep = *iter;
+
+ if (getLoopLandBlock(loopRep) != NULL) {
+ continue;
+ }
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
+
+ if (numBreak == -1) {
+ break;
+ }
+
+ int numCont = loopcontPatternMatch(loopRep, loopHeader);
+ num += numBreak + numCont;
+ }
+
+ return num;
+} //loopendPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 0) {
+ return 0;
+ }
+
+ int numLoop = 0;
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+ if (loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ mergeLooplandBlock(curBlk, loopLand);
+ ++numLoop;
+ }
+ }
+ loopRep = loopRep->getParentLoop();
+ }
+
+ numLoopPatternMatch += numLoop;
+
+ return numLoop;
+} //loopPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
+ }
+
+ if (exitingBlks.size() == 0) {
+ setLoopLandBlock(loopRep);
+ return 0;
+ }
+
+ // Compute the corresponding exitBlks and exit block set.
+ BlockTSmallerVector exitBlks;
+ std::set<BlockT *> exitBlkSet;
+ for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
+ iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *exitingBlk = *iter;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ exitBlks.push_back(exitBlk);
+ exitBlkSet.insert(exitBlk); //non-duplicate insert
+ }
+
+ assert(exitBlkSet.size() > 0);
+ assert(exitBlks.size() == exitingBlks.size());
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+ }
+
+ // Find exitLandBlk.
+ BlockT *exitLandBlk = NULL;
+ int numCloned = 0;
+ int numSerial = 0;
+
+ if (exitBlkSet.size() == 1) {
+ exitLandBlk = *exitBlkSet.begin();
+ } else {
+ exitLandBlk = findNearestCommonPostDom(exitBlkSet);
+
+ if (exitLandBlk == NULL) {
+ return -1;
+ }
+
+ bool allInPath = true;
+ bool allNotInPath = true;
+ for (typename std::set<BlockT*>::const_iterator
+ iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+
+ PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
+ if (DEBUGME) {
+ errs() << "BB" << exitBlk->getNumber()
+ << " to BB" << exitLandBlk->getNumber() << " PathToKind="
+ << pathKind << "\n";
+ }
+
+ allInPath = allInPath && (pathKind == SinglePath_InPath);
+ allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
+
+ if (!allInPath && !allNotInPath) {
+ if (DEBUGME) {
+ errs() << "singlePath check fail\n";
+ }
+ return -1;
+ }
+ } // check all exit blocks
+
+ if (allNotInPath) {
+
+ // TODO: Simplify, maybe separate function?
+ LoopT *parentLoopRep = loopRep->getParentLoop();
+ BlockT *parentLoopHeader = NULL;
+ if (parentLoopRep)
+ parentLoopHeader = parentLoopRep->getHeader();
+
+ if (exitLandBlk == parentLoopHeader &&
+ (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
+ loopRep,
+ exitBlkSet,
+ exitLandBlk)) != NULL) {
+ if (DEBUGME) {
+ errs() << "relocateLoopcontBlock success\n";
+ }
+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+ exitingBlks,
+ exitBlks)) != NULL) {
+ if (DEBUGME) {
+ errs() << "insertEndbranchBlock success\n";
+ }
+ } else {
+ if (DEBUGME) {
+ errs() << "loop exit fail\n";
+ }
+ return -1;
+ }
+ }
+
+ // Handle side entry to exit path.
+ exitBlks.clear();
+ exitBlkSet.clear();
+ for (typename BlockTSmallerVector::iterator iterExiting =
+ exitingBlks.begin(),
+ iterExitingEnd = exitingBlks.end();
+ iterExiting != iterExitingEnd; ++iterExiting) {
+ BlockT *exitingBlk = *iterExiting;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ BlockT *newExitBlk = exitBlk;
+
+ if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
+ newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
+ ++numCloned;
+ }
+
+ numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
+
+ exitBlks.push_back(newExitBlk);
+ exitBlkSet.insert(newExitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ numSerial += serialPatternMatch(exitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ if (exitBlk->pred_size() > 1) {
+ if (exitBlk != exitLandBlk) {
+ return -1;
+ }
+ } else {
+ if (exitBlk != exitLandBlk &&
+ (exitBlk->succ_size() != 1 ||
+ *exitBlk->succ_begin() != exitLandBlk)) {
+ return -1;
+ }
+ }
+ }
+ } // else
+
+ exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
+
+ // Fold break into the breaking block. Leverage across level breaks.
+ assert(exitingBlks.size() == exitBlks.size());
+ for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
+ iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
+ BlockT *exitBlk = *iterExit;
+ BlockT *exitingBlk = *iterExiting;
+ assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
+ LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
+ handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
+ }
+
+ int numBreak = static_cast<int>(exitingBlks.size());
+ numLoopbreakPatternMatch += numBreak;
+ numClonedBlock += numCloned;
+ return numBreak + numSerial + numCloned;
+} //loopbreakPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ int numCont = 0;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
+ for (typename InvBlockGTraits::ChildIteratorType iter =
+ InvBlockGTraits::child_begin(loopHeader),
+ iterEnd = InvBlockGTraits::child_end(loopHeader);
+ iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ if (loopRep->contains(curBlk)) {
+ handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
+ loopHeader, loopRep);
+ contBlk.push_back(curBlk);
+ ++numCont;
+ }
+ }
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
+ iter = contBlk.begin(), iterEnd = contBlk.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->removeSuccessor(loopHeader);
+ }
+
+ numLoopcontPatternMatch += numCont;
+
+ return numCont;
+} //loopcontPatternMatch
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
+ BlockT *src2Blk) {
+ // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
+ // same loop with LoopLandInfo without explicitly keeping track of
+ // loopContBlks and loopBreakBlks, this is a method to get the information.
+ //
+ if (src1Blk->succ_size() == 0) {
+ LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
+ if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+ if (theEntry != NULL) {
+ if (DEBUGME) {
+ errs() << "isLoopContBreakBlock yes src1 = BB"
+ << src1Blk->getNumber()
+ << " src2 = BB" << src2Blk->getNumber() << "\n";
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+} //isSameloopDetachedContbreak
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
+ if (num == 0) {
+ if (DEBUGME) {
+ errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+ }
+ num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
+ }
+ return num;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = 0;
+ BlockT *downBlk;
+
+ //trueBlk could be the common post dominator
+ downBlk = trueBlk;
+
+ if (DEBUGME) {
+ errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
+ << " true = BB" << trueBlk->getNumber()
+ << ", numSucc=" << trueBlk->succ_size()
+ << " false = BB" << falseBlk->getNumber() << "\n";
+ }
+
+ while (downBlk) {
+ if (DEBUGME) {
+ errs() << "check down = BB" << downBlk->getNumber();
+ }
+
+ if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
+ if (DEBUGME) {
+ errs() << " working\n";
+ }
+
+ num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
+ num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+
+ numClonedBlock += num;
+ num += serialPatternMatch(*headBlk->succ_begin());
+ num += serialPatternMatch(*(++headBlk->succ_begin()));
+ num += ifPatternMatch(headBlk);
+ assert(num > 0);
+
+ break;
+ }
+ if (DEBUGME) {
+ errs() << " not working\n";
+ }
+ downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+ } // walk down the postDomTree
+
+ return num;
+} //handleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk,
+ bool detail) {
+ errs() << "head = BB" << headBlk->getNumber()
+ << " size = " << headBlk->size();
+ if (detail) {
+ errs() << "\n";
+ headBlk->print(errs());
+ errs() << "\n";
+ }
+
+ if (trueBlk) {
+ errs() << ", true = BB" << trueBlk->getNumber() << " size = "
+ << trueBlk->size() << " numPred = " << trueBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ trueBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (falseBlk) {
+ errs() << ", false = BB" << falseBlk->getNumber() << " size = "
+ << falseBlk->size() << " numPred = " << falseBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ falseBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (landBlk) {
+ errs() << ", land = BB" << landBlk->getNumber() << " size = "
+ << landBlk->size() << " numPred = " << landBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ landBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+
+ errs() << "\n";
+} //showImproveSimpleJumpintoIf
+
+template<class PassT>
+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT **plandBlk) {
+ bool migrateTrue = false;
+ bool migrateFalse = false;
+
+ BlockT *landBlk = *plandBlk;
+
+ assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
+ && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+
+ if (trueBlk == falseBlk) {
+ return 0;
+ }
+
+ migrateTrue = needMigrateBlock(trueBlk);
+ migrateFalse = needMigrateBlock(falseBlk);
+
+ if (!migrateTrue && !migrateFalse) {
+ return 0;
+ }
+
+ // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+ // have more than one predecessors. without doing this, its predecessor
+ // rather than headBlk will have undefined value in initReg.
+ if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
+ migrateTrue = true;
+ }
+ if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
+ migrateFalse = true;
+ }
+
+ if (DEBUGME) {
+ errs() << "before improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+
+ // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+ //
+ // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+ // {initReg = 0; org falseBlk branch }
+ // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+ // => org landBlk
+ // if landBlk->pred_size() > 2, put the about if-else inside
+ // if (initReg !=2) {...}
+ //
+ // add initReg = initVal to headBlk
+
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ unsigned initReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ if (!migrateTrue || !migrateFalse) {
+ int initVal = migrateTrue ? 0 : 1;
+ CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+ }
+
+ int numNewBlk = 0;
+
+ if (landBlk == NULL) {
+ landBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(landBlk); //insert to function
+
+ if (trueBlk) {
+ trueBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ if (falseBlk) {
+ falseBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ numNewBlk ++;
+ }
+
+ bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+
+ //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos
+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
+
+ if (landBlkHasOtherPred) {
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
+ unsigned cmpResReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+
+ CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
+ initReg, immReg);
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos,
+ AMDGPU::IF_PREDICATE_SET, passRep,
+ cmpResReg, DebugLoc());
+ }
+
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
+ passRep, initReg, DebugLoc());
+
+ if (migrateTrue) {
+ migrateInstruction(trueBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 1).
+ CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+ }
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
+
+ if (migrateFalse) {
+ migrateInstruction(falseBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 0)
+ CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+ }
+
+ if (landBlkHasOtherPred) {
+ // add endif
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
+
+ // put initReg = 2 to other predecessors of landBlk
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
+ ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (curBlk != trueBlk && curBlk != falseBlk) {
+ CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
+ }
+ } //for
+ }
+ if (DEBUGME) {
+ errs() << "result from improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+
+ // update landBlk
+ *plandBlk = landBlk;
+
+ return numNewBlk;
+} //improveSimpleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
+ LoopT *exitingLoop,
+ BlockT *exitBlk,
+ LoopT *exitLoop,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
+ << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
+ }
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+ RegiT initReg = INVALIDREGNUM;
+ if (exitingLoop != exitLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(initReg != INVALIDREGNUM);
+ addLoopBreakInitReg(exitLoop, initReg);
+ while (exitingLoop != exitLoop && exitingLoop) {
+ addLoopBreakOnReg(exitingLoop, initReg);
+ exitingLoop = exitingLoop->getParentLoop();
+ }
+ assert(exitingLoop == exitLoop);
+ }
+
+ mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+
+} //handleLoopbreak
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
+ LoopT *contingLoop,
+ BlockT *contBlk,
+ LoopT *contLoop) {
+ if (DEBUGME) {
+ errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
+ << " header = BB" << contBlk->getNumber() << "\n";
+
+ errs() << "Trying to continue loop-depth = "
+ << getLoopDepth(contLoop)
+ << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ if (contingLoop != contLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(initReg != INVALIDREGNUM);
+ addLoopContInitReg(contLoop, initReg);
+ while (contingLoop && contingLoop->getParentLoop() != contLoop) {
+ addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
+ contingLoop = contingLoop->getParentLoop();
+ }
+ assert(contingLoop && contingLoop->getParentLoop() == contLoop);
+ addLoopContOnReg(contingLoop, initReg);
+ }
+
+ settleLoopcontBlock(contingBlk, contBlk, initReg);
+} //handleLoopcontBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "serialPattern BB" << dstBlk->getNumber()
+ << " <= BB" << srcBlk->getNumber() << "\n";
+ }
+ dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
+
+ dstBlk->removeSuccessor(srcBlk);
+ CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
+
+ removeSuccessor(srcBlk);
+ retireBlock(dstBlk, srcBlk);
+} //mergeSerialBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
+ BlockT *curBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "ifPattern BB" << curBlk->getNumber();
+ errs() << "{ ";
+ if (trueBlk) {
+ errs() << "BB" << trueBlk->getNumber();
+ }
+ errs() << " } else ";
+ errs() << "{ ";
+ if (falseBlk) {
+ errs() << "BB" << falseBlk->getNumber();
+ }
+ errs() << " }\n ";
+ errs() << "landBlock: ";
+ if (landBlk == NULL) {
+ errs() << "NULL";
+ } else {
+ errs() << "BB" << landBlk->getNumber();
+ }
+ errs() << "\n";
+ }
+
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc branchDL = branchInstr->getDebugLoc();
+
+// transform to
+// if cond
+// trueBlk
+// else
+// falseBlk
+// endif
+// landBlk
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(curBlk, branchInstr);
+ CFGTraits::insertCondBranchBefore(branchInstrPos,
+ CFGTraits::getBranchNzeroOpcode(oldOpcode),
+ passRep,
+ branchDL);
+
+ if (trueBlk) {
+ curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
+ curBlk->removeSuccessor(trueBlk);
+ if (landBlk && trueBlk->succ_size()!=0) {
+ trueBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, trueBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
+
+ if (falseBlk) {
+ curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
+ falseBlk->end());
+ curBlk->removeSuccessor(falseBlk);
+ if (landBlk && falseBlk->succ_size() != 0) {
+ falseBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, falseBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
+
+ branchInstr->eraseFromParent();
+
+ if (landBlk && trueBlk && falseBlk) {
+ curBlk->addSuccessor(landBlk);
+ }
+
+} //mergeIfthenelseBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
+ LoopLandInfo *loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+
+ if (DEBUGME) {
+ errs() << "loopPattern header = BB" << dstBlk->getNumber()
+ << " land = BB" << landBlk->getNumber() << "\n";
+ }
+
+ // Loop contInitRegs are init at the beginning of the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->contInitRegs.begin(),
+ iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the
+ * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
+ * search for the DebugLoc in the that statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
+ DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
+ // Loop breakInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakInitRegs.begin(),
+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+ // Loop endbranchInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->endbranchInitRegs.begin(),
+ iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
+ * search for the DebugLoc in the continue statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
+ DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
+ // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
+ // loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakOnRegs.begin(),
+ iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
+ *iter);
+ }
+
+ // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
+ // loop.
+ for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
+ iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
+ passRep, *iter);
+ }
+
+ dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
+
+ for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
+ iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
+ }
+
+ removeSuccessor(landBlk);
+ retireBlock(dstBlk, landBlk);
+} //mergeLooplandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
+ while (I--) {
+ if (I->getOpcode() == AMDGPU::PRED_X) {
+ switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
+ return;
+ case OPCODE_IS_NOT_ZERO_INT:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
+ return;
+ case OPCODE_IS_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
+ return;
+ case OPCODE_IS_NOT_ZERO:
+ static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
+ return;
+ default:
+ assert(0 && "PRED_X Opcode invalid!");
+ }
+ }
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
+ BlockT *exitBlk,
+ BlockT *exitLandBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
+ << " exit = BB" << exitBlk->getNumber()
+ << " land = BB" << exitLandBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+
+ // transform exitingBlk to
+ // if ( ) {
+ // exitBlk (if exitBlk != exitLandBlk)
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(exitingBlk) - exitBlk}
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(exitingBlk, branchInstr);
+
+ if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
+ //break_logical
+
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstrPos);
+ }
+ CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
+ } else {
+ if (trueBranch != exitBlk) {
+ reversePredicateSetter(branchInstr);
+ }
+ CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
+ exitBlk->end());
+ }
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
+ } //if_logical
+
+ //now branchInst can be erase safely
+ branchInstr->eraseFromParent();
+
+ //now take care of successors, retire blocks
+ exitingBlk->removeSuccessor(exitBlk);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitBlk->removeSuccessor(exitLandBlk);
+ retireBlock(exitingBlk, exitBlk);
+ }
+
+} //mergeLoopbreakBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
+ BlockT *contBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "settleLoopcontBlock conting = BB"
+ << contingBlk->getNumber()
+ << ", cont = BB" << contBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
+ if (branchInstr) {
+ assert(CFGTraits::isCondBranch(branchInstr));
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(contingBlk, branchInstr);
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ // transform contingBlk to
+ // if () {
+ // move instr after branchInstr
+ // continue
+ // or
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(contingBlk) - loopHeader}
+
+ bool useContinueLogical =
+ (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
+
+ if (useContinueLogical == false) {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
+ }
+
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
+ } else {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
+ : CFGTraits::getContinueZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+ }
+
+ branchInstr->eraseFromParent();
+ } else {
+ // if we've arrived here then we've already erased the branch instruction
+ // travel back up the basic block to see the last reference of our debug location
+ // we've just inserted that reference here so it should be representative
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ }
+ } //else
+
+} //settleLoopcontBlock
+
+// BBs in exitBlkSet are determined as in break-path for loopRep,
+// before we can put code for BBs as inside loop-body for loopRep
+// check whether those BBs are determined as cont-BB for parentLoopRep
+// earlier.
+// If so, generate a new BB newBlk
+// (1) set newBlk common successor of BBs in exitBlkSet
+// (2) change the continue-instr in BBs in exitBlkSet to break-instr
+// (3) generate continue-instr in newBlk
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
+ LoopT *loopRep,
+ std::set<BlockT *> &exitBlkSet,
+ BlockT *exitLandBlk) {
+ std::set<BlockT *> endBlkSet;
+
+
+
+ for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+ BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
+
+ if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
+ return NULL;
+
+ endBlkSet.insert(endBlk);
+ }
+
+ BlockT *newBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newBlk); //insert to function
+ CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
+ SHOWNEWBLK(newBlk, "New continue block: ");
+
+ for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
+ iterEnd = endBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *endBlk = *iter;
+ InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
+ if (contInstr) {
+ contInstr->eraseFromParent();
+ }
+ endBlk->addSuccessor(newBlk);
+ if (DEBUGME) {
+ errs() << "Add new continue Block to BB"
+ << endBlk->getNumber() << " successors\n";
+ }
+ }
+
+ return newBlk;
+} //relocateLoopcontBlock
+
+
+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
+// pathes corresponding to the loop exiting branches.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
+ BlockTSmallerVector &exitingBlks,
+ BlockTSmallerVector &exitBlks) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+ RegiT endBranchReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(endBranchReg >= 0);
+
+ // reg = 0 before entering the loop
+ addLoopEndbranchInitReg(loopRep, endBranchReg);
+
+ uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
+ assert(numBlks >=2 && numBlks == exitBlks.size());
+
+ BlockT *preExitingBlk = exitingBlks[0];
+ BlockT *preExitBlk = exitBlks[0];
+ BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(preBranchBlk); //insert to function
+ SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
+
+ BlockT *newLandBlk = preBranchBlk;
+
+ CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
+ newLandBlk);
+ preExitingBlk->removeSuccessor(preExitBlk);
+ preExitingBlk->addSuccessor(newLandBlk);
+
+ //it is redundant to add reg = 0 to exitingBlks[0]
+
+ // For 1..n th exiting path (the last iteration handles two pathes) create the
+ // branch to the previous path and the current path.
+ for (uint32_t i = 1; i < numBlks; ++i) {
+ BlockT *curExitingBlk = exitingBlks[i];
+ BlockT *curExitBlk = exitBlks[i];
+ BlockT *curBranchBlk;
+
+ if (i == numBlks - 1) {
+ curBranchBlk = curExitBlk;
+ } else {
+ curBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(curBranchBlk); //insert to function
+ SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
+ }
+
+ // Add reg = i to exitingBlks[i].
+ CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
+ endBranchReg, i);
+
+ // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
+ // (exitingBlks[i], newLandBlk).
+ CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
+ newLandBlk);
+ curExitingBlk->removeSuccessor(curExitBlk);
+ curExitingBlk->addSuccessor(newLandBlk);
+
+ // add to preBranchBlk the branch instruction:
+ // if (endBranchReg == preVal)
+ // preExitBlk
+ // else
+ // curBranchBlk
+ //
+ // preValReg = i - 1
+
+ DebugLoc DL;
+ RegiT preValReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+
+ preBranchBlk->insert(preBranchBlk->begin(),
+ tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
+ i - 1));
+
+ // condResReg = (endBranchReg == preValReg)
+ RegiT condResReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
+ .addReg(endBranchReg).addReg(preValReg);
+
+ BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
+ .addMBB(preExitBlk).addReg(condResReg);
+
+ preBranchBlk->addSuccessor(preExitBlk);
+ preBranchBlk->addSuccessor(curBranchBlk);
+
+ // Update preExitingBlk, preExitBlk, preBranchBlk.
+ preExitingBlk = curExitingBlk;
+ preExitBlk = curExitBlk;
+ preBranchBlk = curBranchBlk;
+
+ } //end for 1 .. n blocks
+
+ return newLandBlk;
+} //addLoopEndbranchBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::PathToKind
+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return Not_SinglePath;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return SinglePath_NotInPath;
+ }
+
+ return Not_SinglePath;
+} //singlePathTo
+
+// If there is a single path from srcBlk to dstBlk, return the last block before
+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
+// last block in the path Otherwise, return NULL
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return srcBlk;
+ }
+
+ if (srcBlk->succ_size() == 0) {
+ return srcBlk;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ BlockT *preBlk = srcBlk;
+
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == NULL) {
+ return preBlk;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return NULL;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return srcBlk;
+ }
+
+ return NULL;
+
+} //singlePathEnd
+
+template<class PassT>
+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
+ BlockT *dstBlk) {
+ int cloned = 0;
+ assert(preBlk->isSuccessor(srcBlk));
+ while (srcBlk && srcBlk != dstBlk) {
+ assert(srcBlk->succ_size() == 1);
+ if (srcBlk->pred_size() > 1) {
+ srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
+ ++cloned;
+ }
+
+ preBlk = srcBlk;
+ srcBlk = *srcBlk->succ_begin();
+ }
+
+ return cloned;
+} //cloneOnSideEntryTo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
+ BlockT *predBlk) {
+ assert(predBlk->isSuccessor(curBlk) &&
+ "succBlk is not a prececessor of curBlk");
+
+ BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
+ CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+ //srcBlk, oldBlk, newBlk
+
+ predBlk->removeSuccessor(curBlk);
+ predBlk->addSuccessor(cloneBlk);
+
+ // add all successor to cloneBlk
+ CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+
+ numClonedInstr += curBlk->size();
+
+ if (DEBUGME) {
+ errs() << "Cloned block: " << "BB"
+ << curBlk->getNumber() << "size " << curBlk->size() << "\n";
+ }
+
+ SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+
+ return cloneBlk;
+} //cloneBlockForPredecessor
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
+ BlockT *exitingBlk) {
+ BlockT *exitBlk = NULL;
+
+ for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
+ iterSuccEnd = exitingBlk->succ_end();
+ iterSucc != iterSuccEnd; ++iterSucc) {
+ BlockT *curBlk = *iterSucc;
+ if (!loopRep->contains(curBlk)) {
+ assert(exitBlk == NULL);
+ exitBlk = curBlk;
+ }
+ }
+
+ assert(exitBlk != NULL);
+
+ return exitBlk;
+} //exitingBlock2ExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
+ BlockT *dstBlk,
+ InstrIterator insertPos) {
+ InstrIterator spliceEnd;
+ //look for the input branchinstr, not the AMDGPU branchinstr
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ if (branchInstr == NULL) {
+ if (DEBUGME) {
+ errs() << "migrateInstruction don't see branch instr\n" ;
+ }
+ spliceEnd = srcBlk->end();
+ } else {
+ if (DEBUGME) {
+ errs() << "migrateInstruction see branch instr\n" ;
+ branchInstr->dump();
+ }
+ spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
+ }
+ if (DEBUGME) {
+ errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+
+ //splice insert before insertPos
+ dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+
+ if (DEBUGME) {
+ errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+} //migrateInstruction
+
+// normalizeInfiniteLoopExit change
+// B1:
+// uncond_br LoopHeader
+//
+// to
+// B1:
+// cond_br 1 LoopHeader dummyExit
+// and return the newly added dummy exit block
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
+ BlockT *loopHeader;
+ BlockT *loopLatch;
+ loopHeader = LoopRep->getHeader();
+ loopLatch = LoopRep->getLoopLatch();
+ BlockT *dummyExitBlk = NULL;
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ if (loopHeader!=NULL && loopLatch!=NULL) {
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
+ if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
+ dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+
+ if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
+
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos(loopLatch, branchInstr);
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
+ InstrT *newInstr =
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
+ MachineInstrBuilder MIB(*funcRep, newInstr);
+ MIB.addMBB(loopHeader);
+ MIB.addReg(immReg, false);
+
+ SHOWNEWINSTR(newInstr);
+
+ branchInstr->eraseFromParent();
+ loopLatch->addSuccessor(dummyExitBlk);
+ }
+ }
+
+ return dummyExitBlk;
+} //normalizeInfiniteLoopExit
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
+ InstrT *branchInstr;
+
+ // I saw two unconditional branch in one basic block in example
+ // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+ while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
+ && CFGTraits::isUncondBranch(branchInstr)) {
+ if (DEBUGME) {
+ errs() << "Removing unconditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ }
+} //removeUnconditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
+ if (srcBlk->succ_size() == 2) {
+ BlockT *blk1 = *srcBlk->succ_begin();
+ BlockT *blk2 = *(++srcBlk->succ_begin());
+
+ if (blk1 == blk2) {
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+ if (DEBUGME) {
+ errs() << "Removing unneeded conditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ SHOWNEWBLK(blk1, "Removing redundant successor");
+ srcBlk->removeSuccessor(blk1);
+ }
+ }
+} //removeRedundantConditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
+ DEFAULT_VEC_SLOTS> &retBlks) {
+ BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
+ retBlks.begin(),
+ iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
+ if (curInstr) {
+ curInstr->eraseFromParent();
+ }
+ curBlk->addSuccessor(dummyExitBlk);
+ if (DEBUGME) {
+ errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
+ << " successors\n";
+ }
+ } //for
+
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
+} //addDummyExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
+ while (srcBlk->succ_size()) {
+ srcBlk->removeSuccessor(*srcBlk->succ_begin());
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->sccNum = sccNum;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
+ }
+
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->isRetired = true;
+ assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+ && "can't retire block yet");
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return (srcBlkInfo && srcBlkInfo->isRetired);
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+
+ if(loopLand == NULL)
+ return true;
+
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ return true;
+ }
+
+ loopRep = loopRep->getParentLoop();
+ }
+
+ return false;
+} //isActiveLoophead
+
+template<class PassT>
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
+ const unsigned blockSizeThreshold = 30;
+ const unsigned cloneInstrThreshold = 100;
+
+ bool multiplePreds = blk && (blk->pred_size() > 1);
+
+ if(!multiplePreds)
+ return false;
+
+ unsigned blkSize = blk->size();
+ return ((blkSize > blockSizeThreshold)
+ && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
+} //needMigrateBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
+ BlockTSmallerVector &exitBlks,
+ std::set<BlockT *> &exitBlkSet) {
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
+
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end();
+ predIter != predIterEnd; ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
+ inpathBlks.push_back(curBlk);
+ }
+ } //for
+
+ //if landBlk has predecessors that are not in the given loop,
+ //create a new block
+ BlockT *newLandBlk = landBlk;
+ if (inpathBlks.size() != landBlk->pred_size()) {
+ newLandBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newLandBlk); //insert to function
+ newLandBlk->addSuccessor(landBlk);
+ for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
+ inpathBlks.begin(),
+ iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
+ //srcBlk, oldBlk, newBlk
+ curBlk->removeSuccessor(landBlk);
+ curBlk->addSuccessor(newLandBlk);
+ }
+ for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
+ if (exitBlks[i] == landBlk) {
+ exitBlks[i] = newLandBlk;
+ }
+ }
+ SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
+ }
+
+ setLoopLandBlock(loopRep, newLandBlk);
+
+ return newLandBlk;
+} // recordLoopbreakLand
+
+template<class PassT>
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ assert(theEntry->landBlk == NULL);
+
+ if (blk == NULL) {
+ blk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(blk); //insert to function
+ SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
+ }
+
+ theEntry->landBlk = blk;
+
+ if (DEBUGME) {
+ errs() << "setLoopLandBlock loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " landing-block = BB" << blk->getNumber() << "\n";
+ }
+} // setLoopLandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+
+ theEntry->breakOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->breakInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
+ RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->endbranchInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopEndbranchInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopEndbranchInitReg
+
+template<class PassT>
+typename CFGStructurizer<PassT>::LoopLandInfo *
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry;
+} // getLoopLandInfo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry ? theEntry->landBlk : NULL;
+} // getLoopLandBlock
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ if (loopRep == NULL)
+ return false;
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ return curBlk->isSuccessor(loopHeader);
+
+} //hasBackEdge
+
+template<class PassT>
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
+ return loopRep ? loopRep->getLoopDepth() : 0;
+} //getLoopDepth
+
+template<class PassT>
+int CFGStructurizer<PassT>::countActiveBlock
+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
+ int count = 0;
+ while (iterStart != iterEnd) {
+ if (!isRetiredBlock(*iterStart)) {
+ ++count;
+ }
+ ++iterStart;
+ }
+
+ return count;
+} //countActiveBlock
+
+// This is work around solution for findNearestCommonDominator not avaiable to
+// post dom a proper fix should go to Dominators.h.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT*
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
+
+ if (postDomTree->dominates(blk1, blk2)) {
+ return blk1;
+ }
+ if (postDomTree->dominates(blk2, blk1)) {
+ return blk2;
+ }
+
+ DomTreeNodeT *node1 = postDomTree->getNode(blk1);
+ DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+
+ // Handle newly cloned node.
+ if (node1 == NULL && blk1->succ_size() == 1) {
+ return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
+ }
+ if (node2 == NULL && blk2->succ_size() == 1) {
+ return findNearestCommonPostDom(blk1, *blk2->succ_begin());
+ }
+
+ if (node1 == NULL || node2 == NULL) {
+ return NULL;
+ }
+
+ node1 = node1->getIDom();
+ while (node1) {
+ if (postDomTree->dominates(node1, node2)) {
+ return node1->getBlock();
+ }
+ node1 = node1->getIDom();
+ }
+
+ return NULL;
+}
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::findNearestCommonPostDom
+(typename std::set<BlockT *> &blks) {
+ BlockT *commonDom;
+ typename std::set<BlockT *>::const_iterator iter = blks.begin();
+ typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
+ for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
+ BlockT *curBlk = *iter;
+ if (curBlk != commonDom) {
+ commonDom = findNearestCommonPostDom(curBlk, commonDom);
+ }
+ }
+
+ if (DEBUGME) {
+ errs() << "Common post dominator for exit blocks is ";
+ if (commonDom) {
+ errs() << "BB" << commonDom->getNumber() << "\n";
+ } else {
+ errs() << "NULL\n";
+ }
+ }
+
+ return commonDom;
+} //findNearestCommonPostDom
+
+} //end namespace llvm
+
+//todo: move-end
+
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer for AMDGPU
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGStructurizer : public MachineFunctionPass {
+public:
+ typedef MachineInstr InstructionType;
+ typedef MachineFunction FunctionType;
+ typedef MachineBasicBlock BlockType;
+ typedef MachineLoopInfo LoopinfoType;
+ typedef MachineDominatorTree DominatortreeType;
+ typedef MachinePostDominatorTree PostDominatortreeType;
+ typedef MachineDomTreeNode DomTreeNodeType;
+ typedef MachineLoop LoopType;
+
+protected:
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ const AMDGPURegisterInfo *TRI;
+
+public:
+ AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
+ const TargetInstrInfo *getTargetInstrInfo() const;
+
+private:
+
+};
+
+} //end of namespace llvm
+AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
+: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
+ TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) {
+}
+
+const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
+ return TII;
+}
+//===----------------------------------------------------------------------===//
+//
+// CFGPrepare
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
+public:
+ static char ID;
+
+public:
+ AMDGPUCFGPrepare(TargetMachine &tm);
+
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+};
+
+char AMDGPUCFGPrepare::ID = 0;
+} //end of namespace llvm
+
+AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
+ : AMDGPUCFGStructurizer(ID, tm ) {
+}
+const char *AMDGPUCFGPrepare::getPassName() const {
+ return "AMD IL Control Flow Graph Preparation Pass";
+}
+
+void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGPerform
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
+public:
+ static char ID;
+
+public:
+ AMDGPUCFGPerform(TargetMachine &tm);
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+};
+
+char AMDGPUCFGPerform::ID = 0;
+} //end of namespace llvm
+
+ AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
+: AMDGPUCFGStructurizer(ID, tm) {
+}
+
+const char *AMDGPUCFGPerform::getPassName() const {
+ return "AMD IL Control Flow Graph structurizer Pass";
+}
+
+void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructTraits<AMDGPUCFGStructurizer>
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+// this class is tailor to the AMDGPU backend
+template<>
+struct CFGStructTraits<AMDGPUCFGStructurizer> {
+ typedef int RegiT;
+
+ static int getBranchNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
+ default:
+ assert(0 && "internal error");
+ }
+ return -1;
+ }
+
+ static int getBranchZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
+ default:
+ assert(0 && "internal error");
+ }
+ return -1;
+ }
+
+ static int getContinueNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
+ default:
+ assert(0 && "internal error");
+ }
+ return -1;
+ }
+
+ static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
+ return instr->getOperand(0).getMBB();
+ }
+
+ static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
+ instr->getOperand(0).setMBB(blk);
+ }
+
+ static MachineBasicBlock *
+ getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(blk->succ_size() == 2);
+ MachineBasicBlock *trueBranch = getTrueBranch(instr);
+ MachineBasicBlock::succ_iterator iter = blk->succ_begin();
+ MachineBasicBlock::succ_iterator iterNext = iter;
+ ++iterNext;
+
+ return (*iter == trueBranch) ? *iterNext : *iter;
+ }
+
+ static bool isCondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDGPU::JUMP:
+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
+ case AMDGPU::BRANCH_COND_i32:
+ case AMDGPU::BRANCH_COND_f32:
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isUncondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDGPU::JUMP:
+ return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
+ case AMDGPU::BRANCH:
+ return true;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
+ //get DebugLoc from the first MachineBasicBlock instruction with debug info
+ DebugLoc DL;
+ for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getDebugLoc().isUnknown() == false) {
+ DL = instr->getDebugLoc();
+ }
+ }
+ return DL;
+ }
+
+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ MachineInstr *instr = &*iter;
+ if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
+ return instr;
+ }
+ return NULL;
+ }
+
+ // The correct naming for this is getPossibleLoopendBlockBranchInstr.
+ //
+ // BB with backward-edge could have move instructions after the branch
+ // instruction. Such move instruction "belong to" the loop backward-edge.
+ //
+ static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
+ const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
+ blk->getParent()->getTarget().getInstrInfo());
+
+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
+ iterEnd = blk->rend(); iter != iterEnd; ++iter) {
+ // FIXME: Simplify
+ MachineInstr *instr = &*iter;
+ if (instr) {
+ if (isCondBranch(instr) || isUncondBranch(instr)) {
+ return instr;
+ } else if (!TII->isMov(instr->getOpcode())) {
+ break;
+ }
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::RETURN) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::CONTINUE) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static bool isReturnBlock(MachineBasicBlock *blk) {
+ MachineInstr *instr = getReturnInstr(blk);
+ bool isReturn = (blk->succ_size() == 0);
+ if (instr) {
+ assert(isReturn);
+ } else if (isReturn) {
+ if (DEBUGME) {
+ errs() << "BB" << blk->getNumber()
+ <<" is return block without RETURN instr\n";
+ }
+ }
+
+ return isReturn;
+ }
+
+ static MachineBasicBlock::iterator
+ getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(instr->getParent() == blk && "instruction doesn't belong to block");
+ MachineBasicBlock::iterator iter = blk->begin();
+ MachineBasicBlock::iterator iterEnd = blk->end();
+ while (&(*iter) != instr && iter != iterEnd) {
+ ++iter;
+ }
+
+ assert(iter != iterEnd);
+ return iter;
+ }//getInstrPos
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrBefore
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ MachineBasicBlock::iterator res;
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrEnd
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr = blk->getParent()
+ ->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->push_back(newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ } //insertInstrEnd
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DebugLoc());
+
+ blk->insert(instrPos, newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ DebugLoc DL) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineFunction *MF = blk->getParent();
+ MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->insert(instrPos, newInstr);
+ MachineInstrBuilder MIB(*MF, newInstr);
+ MIB.addReg(oldInstr->getOperand(1).getReg(), false);
+
+ SHOWNEWINSTR(newInstr);
+ //erase later oldInstr->eraseFromParent();
+ } //insertCondBranchBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator insertPos,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum,
+ DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineFunction *MF = blk->getParent();
+
+ MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ //insert before
+ blk->insert(insertPos, newInstr);
+ MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchBefore
+
+ static void insertCondBranchEnd(MachineBasicBlock *blk,
+ int newOpcode,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineFunction *MF = blk->getParent();
+ MachineInstr *newInstr =
+ MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
+
+ blk->push_back(newInstr);
+ MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchEnd
+
+
+ static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+ regVal);
+ blk->insert(instrPos, newInstr);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertAssignInstrBefore
+
+ static void insertAssignInstrBefore(MachineBasicBlock *blk,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+
+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+ regVal);
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ } //insertInstrBefore
+
+ static void insertCompareInstrBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator instrPos,
+ AMDGPUCFGStructurizer *passRep,
+ RegiT dstReg, RegiT src1Reg,
+ RegiT src2Reg) {
+ const AMDGPUInstrInfo *tii =
+ static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+ MachineFunction *MF = blk->getParent();
+ MachineInstr *newInstr =
+ MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
+
+ MachineInstrBuilder MIB(*MF, newInstr);
+ MIB.addReg(dstReg, RegState::Define); //set target
+ MIB.addReg(src1Reg); //set src value
+ MIB.addReg(src2Reg); //set src value
+
+ blk->insert(instrPos, newInstr);
+ SHOWNEWINSTR(newInstr);
+
+ } //insertCompareInstrBefore
+
+ static void cloneSuccessorList(MachineBasicBlock *dstBlk,
+ MachineBasicBlock *srcBlk) {
+ for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
+ iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
+ }
+ } //cloneSuccessorList
+
+ static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
+ MachineFunction *func = srcBlk->getParent();
+ MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
+ func->push_back(newBlk); //insert to function
+ for (MachineBasicBlock::iterator iter = srcBlk->begin(),
+ iterEnd = srcBlk->end();
+ iter != iterEnd; ++iter) {
+ MachineInstr *instr = func->CloneMachineInstr(iter);
+ newBlk->push_back(instr);
+ }
+ return newBlk;
+ }
+
+ //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
+ //the AMDGPU instruction is not recognized as terminator fix this and retire
+ //this routine
+ static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
+ MachineBasicBlock *oldBlk,
+ MachineBasicBlock *newBlk) {
+ MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
+ if (branchInstr && isCondBranch(branchInstr) &&
+ getTrueBranch(branchInstr) == oldBlk) {
+ setTrueBranch(branchInstr, newBlk);
+ }
+ }
+
+ static void wrapup(MachineBasicBlock *entryBlk) {
+ assert((!entryBlk->getParent()->getJumpTableInfo()
+ || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
+ && "found a jump table");
+
+ //collect continue right before endloop
+ SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
+ MachineBasicBlock::iterator pre = entryBlk->begin();
+ MachineBasicBlock::iterator iterEnd = entryBlk->end();
+ MachineBasicBlock::iterator iter = pre;
+ while (iter != iterEnd) {
+ if (pre->getOpcode() == AMDGPU::CONTINUE
+ && iter->getOpcode() == AMDGPU::ENDLOOP) {
+ contInstr.push_back(pre);
+ }
+ pre = iter;
+ ++iter;
+ } //end while
+
+ //delete continue right before endloop
+ for (unsigned i = 0; i < contInstr.size(); ++i) {
+ contInstr[i]->eraseFromParent();
+ }
+
+ // TODO to fix up jump table so later phase won't be confused. if
+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+ // there isn't such an interface yet. alternatively, replace all the other
+ // blocks in the jump table with the entryBlk //}
+
+ } //wrapup
+
+ static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineDominatorTree>();
+ }
+
+ static MachinePostDominatorTree*
+ getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachinePostDominatorTree>();
+ }
+
+ static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineLoopInfo>();
+ }
+}; // template class CFGStructTraits
+} //end of namespace llvm
+
+// createAMDGPUCFGPreparationPass- Returns a pass
+FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
+ ) {
+ return new AMDGPUCFGPrepare(tm );
+}
+
+bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
+ *this,
+ TRI);
+}
+
+// createAMDGPUCFGStructurizerPass- Returns a pass
+FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
+ ) {
+ return new AMDGPUCFGPerform(tm );
+}
+
+bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
+ *this,
+ TRI);
+}
diff --git a/lib/Target/R600/AMDILDevice.cpp b/lib/Target/R600/AMDILDevice.cpp
new file mode 100644
index 0000000000..eec5059de2
--- /dev/null
+++ b/lib/Target/R600/AMDILDevice.cpp
@@ -0,0 +1,124 @@
+//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevice.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+// Default implementation for all of the classes.
+AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST) {
+ mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
+ mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
+ setCaps();
+ DeviceFlag = OCL_DEVICE_ALL;
+}
+
+AMDGPUDevice::~AMDGPUDevice() {
+ mHWBits.clear();
+ mSWBits.clear();
+}
+
+size_t AMDGPUDevice::getMaxGDSSize() const {
+ return 0;
+}
+
+uint32_t
+AMDGPUDevice::getDeviceFlag() const {
+ return DeviceFlag;
+}
+
+size_t AMDGPUDevice::getMaxNumCBs() const {
+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
+ return HW_MAX_NUM_CB;
+ }
+
+ return 0;
+}
+
+size_t AMDGPUDevice::getMaxCBSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
+ return MAX_CB_SIZE;
+ }
+
+ return 0;
+}
+
+size_t AMDGPUDevice::getMaxScratchSize() const {
+ return 65536;
+}
+
+uint32_t AMDGPUDevice::getStackAlignment() const {
+ return 16;
+}
+
+void AMDGPUDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::HalfOps);
+ mSWBits.set(AMDGPUDeviceInfo::ByteOps);
+ mSWBits.set(AMDGPUDeviceInfo::ShortOps);
+ mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
+ mSWBits.set(AMDGPUDeviceInfo::NoInline);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
+ mSWBits.set(AMDGPUDeviceInfo::MacroDB);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
+ }
+ mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
+ mSWBits.set(AMDGPUDeviceInfo::LongOps);
+}
+
+AMDGPUDeviceInfo::ExecutionMode
+AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const {
+ if (mHWBits[Caps]) {
+ assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDGPUDeviceInfo::Hardware;
+ }
+
+ if (mSWBits[Caps]) {
+ assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDGPUDeviceInfo::Software;
+ }
+
+ return AMDGPUDeviceInfo::Unsupported;
+
+}
+
+bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const {
+ return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
+}
+
+bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const {
+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
+}
+
+bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const {
+ return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
+}
+
+std::string
+AMDGPUDevice::getDataLayout() const {
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
diff --git a/lib/Target/R600/AMDILDevice.h b/lib/Target/R600/AMDILDevice.h
new file mode 100644
index 0000000000..97df98cafb
--- /dev/null
+++ b/lib/Target/R600/AMDILDevice.h
@@ -0,0 +1,117 @@
+//===---- AMDILDevice.h - Define Device Data for AMDGPU -----*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+//
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDILDEVICEIMPL_H
+#define AMDILDEVICEIMPL_H
+#include "AMDIL.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+ class AMDGPUSubtarget;
+ class MCStreamer;
+//===----------------------------------------------------------------------===//
+// Interface for data that is specific to a single device
+//===----------------------------------------------------------------------===//
+class AMDGPUDevice {
+public:
+ AMDGPUDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUDevice();
+
+ // Enum values for the various memory types.
+ enum {
+ RAW_UAV_ID = 0,
+ ARENA_UAV_ID = 1,
+ LDS_ID = 2,
+ GDS_ID = 3,
+ SCRATCH_ID = 4,
+ CONSTANT_ID = 5,
+ GLOBAL_ID = 6,
+ MAX_IDS = 7
+ } IO_TYPE_IDS;
+
+ /// \returns The max LDS size that the hardware supports. Size is in
+ /// bytes.
+ virtual size_t getMaxLDSSize() const = 0;
+
+ /// \returns The max GDS size that the hardware supports if the GDS is
+ /// supported by the hardware. Size is in bytes.
+ virtual size_t getMaxGDSSize() const;
+
+ /// \returns The max number of hardware constant address spaces that
+ /// are supported by this device.
+ virtual size_t getMaxNumCBs() const;
+
+ /// \returns The max number of bytes a single hardware constant buffer
+ /// can support. Size is in bytes.
+ virtual size_t getMaxCBSize() const;
+
+ /// \returns The max number of bytes allowed by the hardware scratch
+ /// buffer. Size is in bytes.
+ virtual size_t getMaxScratchSize() const;
+
+ /// \brief Get the flag that corresponds to the device.
+ virtual uint32_t getDeviceFlag() const;
+
+ /// \returns The number of work-items that exist in a single hardware
+ /// wavefront.
+ virtual size_t getWavefrontSize() const = 0;
+
+ /// \brief Get the generational name of this specific device.
+ virtual uint32_t getGeneration() const = 0;
+
+ /// \brief Get the stack alignment of this specific device.
+ virtual uint32_t getStackAlignment() const;
+
+ /// \brief Get the resource ID for this specific device.
+ virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
+
+ /// \brief Get the max number of UAV's for this device.
+ virtual uint32_t getMaxNumUAVs() const = 0;
+
+
+ // API utilizing more detailed capabilities of each family of
+ // cards. If a capability is supported, then either usesHardware or
+ // usesSoftware returned true. If usesHardware returned true, then
+ // usesSoftware must return false for the same capability. Hardware
+ // execution means that the feature is done natively by the hardware
+ // and is not emulated by the softare. Software execution means
+ // that the feature could be done in the hardware, but there is
+ // software that emulates it with possibly using the hardware for
+ // support since the hardware does not fully comply with OpenCL
+ // specs.
+
+ bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
+ bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
+ bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
+ virtual std::string getDataLayout() const;
+ static const unsigned int MAX_LDS_SIZE_700 = 16384;
+ static const unsigned int MAX_LDS_SIZE_800 = 32768;
+ static const unsigned int WavefrontSize = 64;
+ static const unsigned int HalfWavefrontSize = 32;
+ static const unsigned int QuarterWavefrontSize = 16;
+protected:
+ virtual void setCaps();
+ BitVector mHWBits;
+ llvm::BitVector mSWBits;
+ AMDGPUSubtarget *mSTM;
+ uint32_t DeviceFlag;
+private:
+ AMDGPUDeviceInfo::ExecutionMode
+ getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
+};
+
+} // namespace llvm
+#endif // AMDILDEVICEIMPL_H
diff --git a/lib/Target/R600/AMDILDeviceInfo.cpp b/lib/Target/R600/AMDILDeviceInfo.cpp
new file mode 100644
index 0000000000..9605fbe633
--- /dev/null
+++ b/lib/Target/R600/AMDILDeviceInfo.cpp
@@ -0,0 +1,94 @@
+//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Function that creates DeviceInfo from a device name and other information.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+namespace llvm {
+namespace AMDGPUDeviceInfo {
+
+AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
+ AMDGPUSubtarget *ptr,
+ bool is64bit, bool is64on32bit) {
+ if (deviceName.c_str()[2] == '7') {
+ switch (deviceName.c_str()[3]) {
+ case '1':
+ return new AMDGPU710Device(ptr);
+ case '7':
+ return new AMDGPU770Device(ptr);
+ default:
+ return new AMDGPU7XXDevice(ptr);
+ }
+ } else if (deviceName == "cypress") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCypressDevice(ptr);
+ } else if (deviceName == "juniper") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUEvergreenDevice(ptr);
+ } else if (deviceName == "redwood") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPURedwoodDevice(ptr);
+ } else if (deviceName == "cedar") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCedarDevice(ptr);
+ } else if (deviceName == "barts" || deviceName == "turks") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUNIDevice(ptr);
+ } else if (deviceName == "cayman") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUCaymanDevice(ptr);
+ } else if (deviceName == "caicos") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPUNIDevice(ptr);
+ } else if (deviceName == "SI") {
+ return new AMDGPUSIDevice(ptr);
+ } else {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDGPU7XXDevice(ptr);
+ }
+}
+} // End namespace AMDGPUDeviceInfo
+} // End namespace llvm
diff --git a/lib/Target/R600/AMDILDeviceInfo.h b/lib/Target/R600/AMDILDeviceInfo.h
new file mode 100644
index 0000000000..4b2c3a53c7
--- /dev/null
+++ b/lib/Target/R600/AMDILDeviceInfo.h
@@ -0,0 +1,88 @@
+//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#ifndef AMDILDEVICEINFO_H
+#define AMDILDEVICEINFO_H
+
+
+#include <string>
+
+namespace llvm {
+ class AMDGPUDevice;
+ class AMDGPUSubtarget;
+ namespace AMDGPUDeviceInfo {
+ /// Each Capabilities can be executed using a hardware instruction,
+ /// emulated with a sequence of software instructions, or not
+ /// supported at all.
+ enum ExecutionMode {
+ Unsupported = 0, ///< Unsupported feature on the card(Default value)
+ /// This is the execution mode that is set if the feature is emulated in
+ /// software.
+ Software,
+ /// This execution mode is set if the feature exists natively in hardware
+ Hardware
+ };
+
+ enum Caps {
+ HalfOps = 0x1, ///< Half float is supported or not.
+ DoubleOps = 0x2, ///< Double is supported or not.
+ ByteOps = 0x3, ///< Byte(char) is support or not.
+ ShortOps = 0x4, ///< Short is supported or not.
+ LongOps = 0x5, ///< Long is supported or not.
+ Images = 0x6, ///< Images are supported or not.
+ ByteStores = 0x7, ///< ByteStores available(!HD4XXX).
+ ConstantMem = 0x8, ///< Constant/CB memory.
+ LocalMem = 0x9, ///< Local/LDS memory.
+ PrivateMem = 0xA, ///< Scratch/Private/Stack memory.
+ RegionMem = 0xB, ///< OCL GDS Memory Extension.
+ FMA = 0xC, ///< Use HW FMA or SW FMA.
+ ArenaSegment = 0xD, ///< Use for Arena UAV per pointer 12-1023.
+ MultiUAV = 0xE, ///< Use for UAV per Pointer 0-7.
+ Reserved0 = 0xF, ///< ReservedFlag
+ NoAlias = 0x10, ///< Cached loads.
+ Signed24BitOps = 0x11, ///< Peephole Optimization.
+ /// Debug mode implies that no hardware features or optimizations
+ /// are performned and that all memory access go through a single
+ /// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
+ Debug = 0x12,
+ CachedMem = 0x13, ///< Cached mem is available or not.
+ BarrierDetect = 0x14, ///< Detect duplicate barriers.
+ Reserved1 = 0x15, ///< Reserved flag
+ ByteLDSOps = 0x16, ///< Flag to specify if byte LDS ops are available.
+ ArenaVectors = 0x17, ///< Flag to specify if vector loads from arena work.
+ TmrReg = 0x18, ///< Flag to specify if Tmr register is supported.
+ NoInline = 0x19, ///< Flag to specify that no inlining should occur.
+ MacroDB = 0x1A, ///< Flag to specify that backend handles macrodb.
+ HW64BitDivMod = 0x1B, ///< Flag for backend to generate 64bit div/mod.
+ ArenaUAV = 0x1C, ///< Flag to specify that arena uav is supported.
+ PrivateUAV = 0x1D, ///< Flag to specify that private memory uses uav's.
+ /// If more capabilities are required, then
+ /// this number needs to be increased.
+ /// All capabilities must come before this
+ /// number.
+ MaxNumberCapabilities = 0x20
+ };
+ /// These have to be in order with the older generations
+ /// having the lower number enumerations.
+ enum Generation {
+ HD4XXX = 0, ///< 7XX based devices.
+ HD5XXX, ///< Evergreen based devices.
+ HD6XXX, ///< NI/Evergreen+ based devices.
+ HD7XXX, ///< Southern Islands based devices.
+ HDTEST, ///< Experimental feature testing device.
+ HDNUMGEN
+ };
+
+
+ AMDGPUDevice*
+ getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
+ bool is64bit = false, bool is64on32bit = false);
+ } // namespace AMDILDeviceInfo
+} // namespace llvm
+#endif // AMDILDEVICEINFO_H
diff --git a/lib/Target/R600/AMDILDevices.h b/lib/Target/R600/AMDILDevices.h
new file mode 100644
index 0000000000..636fa6d359
--- /dev/null
+++ b/lib/Target/R600/AMDILDevices.h
@@ -0,0 +1,19 @@
+//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#ifndef AMDIL_DEVICES_H
+#define AMDIL_DEVICES_H
+// Include all of the device specific header files
+#include "AMDIL7XXDevice.h"
+#include "AMDILDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSIDevice.h"
+
+#endif // AMDIL_DEVICES_H
diff --git a/lib/Target/R600/AMDILEvergreenDevice.cpp b/lib/Target/R600/AMDILEvergreenDevice.cpp
new file mode 100644
index 0000000000..c5213a0410
--- /dev/null
+++ b/lib/Target/R600/AMDILEvergreenDevice.cpp
@@ -0,0 +1,169 @@
+//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILEvergreenDevice.h"
+
+using namespace llvm;
+
+AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
+: AMDGPUDevice(ST) {
+ setCaps();
+ std::string name = ST->getDeviceName();
+ if (name == "cedar") {
+ DeviceFlag = OCL_DEVICE_CEDAR;
+ } else if (name == "redwood") {
+ DeviceFlag = OCL_DEVICE_REDWOOD;
+ } else if (name == "cypress") {
+ DeviceFlag = OCL_DEVICE_CYPRESS;
+ } else {
+ DeviceFlag = OCL_DEVICE_JUNIPER;
+ }
+}
+
+AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
+}
+
+size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
+ return 12;
+}
+
+uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
+ switch(id) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ return GLOBAL_RETURN_RAW_UAV_ID;
+ case GLOBAL_ID:
+ case ARENA_UAV_ID:
+ return DEFAULT_ARENA_UAV_ID;
+ case LDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case GDS_ID:
+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case SCRATCH_ID:
+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ };
+ return 0;
+}
+
+size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
+ return AMDGPUDevice::WavefrontSize;
+}
+
+uint32_t AMDGPUEvergreenDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD5XXX;
+}
+
+void AMDGPUEvergreenDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
+ mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
+ mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
+ mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
+ mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
+ mHWBits.set(AMDGPUDeviceInfo::ByteStores);
+ }
+ if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+ mSWBits.set(AMDGPUDeviceInfo::RegionMem);
+ } else {
+ mHWBits.set(AMDGPUDeviceInfo::LocalMem);
+ mHWBits.set(AMDGPUDeviceInfo::RegionMem);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::Images);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
+ mHWBits.set(AMDGPUDeviceInfo::NoAlias);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::CachedMem);
+ if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
+ mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
+ mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
+ mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
+ mHWBits.set(AMDGPUDeviceInfo::LongOps);
+ mSWBits.reset(AMDGPUDeviceInfo::LongOps);
+ mHWBits.set(AMDGPUDeviceInfo::TmrReg);
+}
+
+AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPUCypressDevice::~AMDGPUCypressDevice() {
+}
+
+void AMDGPUCypressDevice::setCaps() {
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ mHWBits.set(AMDGPUDeviceInfo::FMA);
+ }
+}
+
+
+AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPUCedarDevice::~AMDGPUCedarDevice() {
+}
+
+void AMDGPUCedarDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+}
+
+size_t AMDGPUCedarDevice::getWavefrontSize() const {
+ return AMDGPUDevice::QuarterWavefrontSize;
+}
+
+AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDGPURedwoodDevice::~AMDGPURedwoodDevice() {
+}
+
+void AMDGPURedwoodDevice::setCaps() {
+ mSWBits.set(AMDGPUDeviceInfo::FMA);
+}
+
+size_t AMDGPURedwoodDevice::getWavefrontSize() const {
+ return AMDGPUDevice::HalfWavefrontSize;
+}
diff --git a/lib/Target/R600/AMDILEvergreenDevice.h b/lib/Target/R600/AMDILEvergreenDevice.h
new file mode 100644
index 0000000000..ea90f774a8
--- /dev/null
+++ b/lib/Target/R600/AMDILEvergreenDevice.h
@@ -0,0 +1,93 @@
+//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDILEVERGREENDEVICE_H
+#define AMDILEVERGREENDEVICE_H
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevice.h"
+
+namespace llvm {
+ class AMDGPUSubtarget;
+//===----------------------------------------------------------------------===//
+// Evergreen generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+
+/// \brief The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
+/// series of cards.
+///
+/// This class contains information required to differentiate
+/// the Evergreen device from the generic AMDGPUDevice. This device represents
+/// that capabilities of the 'Juniper' cards, also known as the HD57XX.
+class AMDGPUEvergreenDevice : public AMDGPUDevice {
+public:
+ AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUEvergreenDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getMaxGDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getMaxNumUAVs() const;
+ virtual uint32_t getResourceID(uint32_t) const;
+protected:
+ virtual void setCaps();
+};
+
+/// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
+/// support for double precision operations. This device is used to represent
+/// both the Cypress and Hemlock cards, which are commercially known as HD58XX
+/// and HD59XX cards.
+class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUCypressDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUCypressDevice();
+private:
+ virtual void setCaps();
+};
+
+
+/// \brief The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
+/// devices.
+///
+/// This class differs from the base AMDGPUEvergreenDevice in that the
+/// device is a ~quarter of the 'Juniper'. These are commercially known as the
+/// HD54XX and HD53XX series of cards.
+class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUCedarDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPUCedarDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+};
+
+/// \brief The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
+/// devices.
+///
+/// This class differs from the base class, in that these devices are
+/// considered about half of a 'Juniper' device. These are commercially known as
+/// the HD55XX and HD56XX series of cards.
+class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
+ virtual ~AMDGPURedwoodDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+};
+
+} // namespace llvm
+#endif // AMDILEVERGREENDEVICE_H
diff --git a/lib/Target/R600/AMDILFrameLowering.cpp b/lib/Target/R600/AMDILFrameLowering.cpp
new file mode 100644
index 0000000000..9ad495ab48
--- /dev/null
+++ b/lib/Target/R600/AMDILFrameLowering.cpp
@@ -0,0 +1,47 @@
+//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface to describe a layout of a stack frame on a AMDGPU target
+/// machine.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILFrameLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+using namespace llvm;
+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
+ int LAO, unsigned TransAl)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl) {
+}
+
+AMDGPUFrameLowering::~AMDGPUFrameLowering() {
+}
+
+int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI);
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ NumEntries = 0;
+ return 0;
+}
+void
+AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+void
+AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
+}
+bool
+AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
+ return false;
+}
diff --git a/lib/Target/R600/AMDILFrameLowering.h b/lib/Target/R600/AMDILFrameLowering.h
new file mode 100644
index 0000000000..51337c3dd2
--- /dev/null
+++ b/lib/Target/R600/AMDILFrameLowering.h
@@ -0,0 +1,40 @@
+//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface to describe a layout of a stack frame on a AMDIL target
+/// machine.
+//
+//===----------------------------------------------------------------------===//
+#ifndef AMDILFRAME_LOWERING_H
+#define AMDILFRAME_LOWERING_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+/// \brief Information about the stack frame layout on the AMDGPU targets.
+///
+/// It holds the direction of the stack growth, the known stack alignment on
+/// entry to each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+class AMDGPUFrameLowering : public TargetFrameLowering {
+public:
+ AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+ unsigned TransAl = 1);
+ virtual ~AMDGPUFrameLowering();
+ virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool hasFP(const MachineFunction &MF) const;
+};
+} // namespace llvm
+#endif // AMDILFRAME_LOWERING_H
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
new file mode 100644
index 0000000000..d15ed393c1
--- /dev/null
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -0,0 +1,485 @@
+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Defines an instruction selector for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
+#include "AMDGPURegisterInfo.h"
+#include "AMDILDevices.h"
+#include "R600InstrInfo.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include <list>
+#include <queue>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// AMDGPU specific code to select AMDGPU machine instructions for
+/// SelectionDAG operations.
+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
+ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
+ // make the right decision when generating code for different targets.
+ const AMDGPUSubtarget &Subtarget;
+public:
+ AMDGPUDAGToDAGISel(TargetMachine &TM);
+ virtual ~AMDGPUDAGToDAGISel();
+
+ SDNode *Select(SDNode *N);
+ virtual const char *getPassName() const;
+
+private:
+ inline SDValue getSmallIPtrImm(unsigned Imm);
+
+ // Complex pattern selectors
+ bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
+ bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+
+ static bool checkType(const Value *ptr, unsigned int addrspace);
+ static const Value *getBasePointerValue(const Value *V);
+
+ static bool isGlobalStore(const StoreSDNode *N);
+ static bool isPrivateStore(const StoreSDNode *N);
+ static bool isLocalStore(const StoreSDNode *N);
+ static bool isRegionStore(const StoreSDNode *N);
+
+ static bool isCPLoad(const LoadSDNode *N);
+ static bool isConstantLoad(const LoadSDNode *N, int cbID);
+ static bool isGlobalLoad(const LoadSDNode *N);
+ static bool isParamLoad(const LoadSDNode *N);
+ static bool isPrivateLoad(const LoadSDNode *N);
+ static bool isLocalLoad(const LoadSDNode *N);
+ static bool isRegionLoad(const LoadSDNode *N);
+
+ bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
+ bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
+ bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+/// \brief This pass converts a legalized DAG into a AMDGPU-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
+ ) {
+ return new AMDGPUDAGToDAGISel(TM);
+}
+
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
+ )
+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
+}
+
+AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
+}
+
+SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRParam(
+ SDValue Addr, SDValue& R1, SDValue& R2) {
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+ return SelectADDRParam(Addr, R1, R2);
+}
+
+
+bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ return true;
+}
+
+SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+ switch (Opc) {
+ default: break;
+ case ISD::FrameIndex: {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
+ unsigned int FI = FIN->getIndex();
+ EVT OpVT = N->getValueType(0);
+ unsigned int NewOpc = AMDGPU::COPY;
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
+ }
+ break;
+ }
+ case ISD::ConstantFP:
+ case ISD::Constant: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ // XXX: Custom immediate lowering not implemented yet. Instead we use
+ // pseudo instructions defined in SIInstructions.td
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ break;
+ }
+ const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+
+ uint64_t ImmValue = 0;
+ unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
+
+ if (N->getOpcode() == ISD::ConstantFP) {
+ // XXX: 64-bit Immediates not supported yet
+ assert(N->getValueType(0) != MVT::f64);
+
+ ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
+ APFloat Value = C->getValueAPF();
+ float FloatValue = Value.convertToFloat();
+ if (FloatValue == 0.0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (FloatValue == 0.5) {
+ ImmReg = AMDGPU::HALF;
+ } else if (FloatValue == 1.0) {
+ ImmReg = AMDGPU::ONE;
+ } else {
+ ImmValue = Value.bitcastToAPInt().getZExtValue();
+ }
+ } else {
+ // XXX: 64-bit Immediates not supported yet
+ assert(N->getValueType(0) != MVT::i64);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ if (C->getZExtValue() == 0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (C->getZExtValue() == 1) {
+ ImmReg = AMDGPU::ONE_INT;
+ } else {
+ ImmValue = C->getZExtValue();
+ }
+ }
+
+ for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
+ Use != SDNode::use_end(); Use = Next) {
+ Next = llvm::next(Use);
+ std::vector<SDValue> Ops;
+ for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
+ Ops.push_back(Use->getOperand(i));
+ }
+
+ if (!Use->isMachineOpcode()) {
+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ // We can only use literal constants (e.g. AMDGPU::ZERO,
+ // AMDGPU::ONE, etc) in machine opcodes.
+ continue;
+ }
+ } else {
+ if (!TII->isALUInstr(Use->getMachineOpcode())) {
+ continue;
+ }
+
+ int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
+ assert(ImmIdx != -1);
+
+ // subtract one from ImmIdx, because the DST operand is usually index
+ // 0 for MachineInstrs, but we have no DST in the Ops vector.
+ ImmIdx--;
+
+ // Check that we aren't already using an immediate.
+ // XXX: It's possible for an instruction to have more than one
+ // immediate operand, but this is not supported yet.
+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
+ assert(C);
+
+ if (C->getZExtValue() != 0) {
+ // This instruction is already using an immediate.
+ continue;
+ }
+
+ // Set the immediate value
+ Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
+ }
+ }
+ // Set the immediate register
+ Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
+
+ CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
+ }
+ break;
+ }
+ }
+ return SelectCode(N);
+}
+
+bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
+ if (!ptr) {
+ return false;
+ }
+ Type *ptrType = ptr->getType();
+ return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
+}
+
+const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) {
+ if (!V) {
+ return NULL;
+ }
+ const Value *ret = NULL;
+ ValueMap<const Value *, bool> ValueBitMap;
+ std::queue<const Value *, std::list<const Value *> > ValueQueue;
+ ValueQueue.push(V);
+ while (!ValueQueue.empty()) {
+ V = ValueQueue.front();
+ if (ValueBitMap.find(V) == ValueBitMap.end()) {
+ ValueBitMap[V] = true;
+ if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
+ ret = V;
+ break;
+ } else if (dyn_cast<GlobalVariable>(V)) {
+ ret = V;
+ break;
+ } else if (dyn_cast<Constant>(V)) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
+ if (CE) {
+ ValueQueue.push(CE->getOperand(0));
+ }
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ ret = AI;
+ break;
+ } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ uint32_t numOps = I->getNumOperands();
+ for (uint32_t x = 0; x < numOps; ++x) {
+ ValueQueue.push(I->getOperand(x));
+ }
+ } else {
+ assert(!"Found a Value that we didn't know how to handle!");
+ }
+ }
+ ValueQueue.pop();
+ }
+ return ret;
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
+ return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
+}
+
+bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
+ if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
+ return true;
+ }
+ MachineMemOperand *MMO = N->getMemOperand();
+ const Value *V = MMO->getValue();
+ const Value *BV = getBasePointerValue(V);
+ if (MMO
+ && MMO->getValue()
+ && ((V && dyn_cast<GlobalValue>(V))
+ || (BV && dyn_cast<GlobalValue>(
+ getBasePointerValue(MMO->getValue()))))) {
+ return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
+ } else {
+ return false;
+ }
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
+ MachineMemOperand *MMO = N->getMemOperand();
+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+ if (MMO) {
+ const Value *V = MMO->getValue();
+ const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+ if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
+ if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+ // Check to make sure we are not a constant pool load or a constant load
+ // that is marked as a private load
+ if (isCPLoad(N) || isConstantLoad(N, -1)) {
+ return false;
+ }
+ }
+ if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) {
+ return true;
+ }
+ return false;
+}
+
+const char *AMDGPUDAGToDAGISel::getPassName() const {
+ return "AMDGPU DAG->DAG Pattern Instruction Selection";
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP
+
+///==== AMDGPU Functions ====///
+
+bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
+ SDValue& Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ bool Match = false;
+
+ // Find the base ptr and the offset
+ for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
+ SDValue Arg = Addr.getOperand(i);
+ ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
+ // This arg isn't a constant so it must be the base PTR.
+ if (!OffsetNode) {
+ Base = Addr.getOperand(i);
+ continue;
+ }
+ // Check if the constant argument fits in 8-bits. The offset is in bytes
+ // so we need to convert it to dwords.
+ if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) {
+ Match = true;
+ Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
+ MVT::i32);
+ }
+ }
+ return Match;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode * IMMOffset;
+
+ if (Addr.getOpcode() == ISD::ADD
+ && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ return true;
+ // If the pointer address is constant, we can move it to the offset field.
+ } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+ Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ CurDAG->getEntryNode().getDebugLoc(),
+ AMDGPU::ZERO, MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ return true;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
+ SDValue& Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() != ISD::ADD) {
+ return false;
+ }
+
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+
+ return true;
+}
diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
new file mode 100644
index 0000000000..2e60adcc99
--- /dev/null
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -0,0 +1,651 @@
+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief TargetLowering functions borrowed from AMDIL.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevices.h"
+#include "AMDILIntrinsicInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+#include "AMDGPUGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+void AMDGPUTargetLowering::InitAMDILLowering() {
+ int types[] = {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::f32,
+ (int)MVT::f64,
+ (int)MVT::i64,
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+
+ int IntTypes[] = {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::i64
+ };
+
+ int FloatTypes[] = {
+ (int)MVT::f32,
+ (int)MVT::f64
+ };
+
+ int VectorTypes[] = {
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+ size_t NumTypes = sizeof(types) / sizeof(*types);
+ size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+ size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+ size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+ const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
+ // These are the current register classes that are
+ // supported
+
+ for (unsigned int x = 0; x < NumTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+ // We cannot sextinreg, expand to shifts
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::BRCOND, VT, Custom);
+ setOperationAction(ISD::BR_JT, VT, Expand);
+ setOperationAction(ISD::BRIND, VT, Expand);
+ // TODO: Implement custom UREM/SREM routines
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ if (VT != MVT::i64 && VT != MVT::v2i64) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ }
+ }
+ for (unsigned int x = 0; x < NumFloatTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+ // IL does not have these operations for floating point types
+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+ setOperationAction(ISD::SETOLT, VT, Expand);
+ setOperationAction(ISD::SETOGE, VT, Expand);
+ setOperationAction(ISD::SETOGT, VT, Expand);
+ setOperationAction(ISD::SETOLE, VT, Expand);
+ setOperationAction(ISD::SETULT, VT, Expand);
+ setOperationAction(ISD::SETUGE, VT, Expand);
+ setOperationAction(ISD::SETUGT, VT, Expand);
+ setOperationAction(ISD::SETULE, VT, Expand);
+ }
+
+ for (unsigned int x = 0; x < NumIntTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+ // GPU also does not have divrem function for signed or unsigned
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ // GPU doesn't have a rotl, rotr, or byteswap instruction
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ // GPU doesn't have any counting operators
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ // setOperationAction(ISD::VSETCC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+
+ }
+ if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+ setOperationAction(ISD::Constant , MVT::i64 , Legal);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+ }
+ if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
+ // we support loading/storing v2f64 but not operations on the type
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
+ // We want to expand vector conversions into their scalar
+ // counterparts.
+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ }
+ // TODO: Fix the UDIV24 algorithm so it works for these
+ // types correctly. This needs vector comparisons
+ // for this to work correctly.
+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+ setOperationAction(ISD::SUBC, MVT::Other, Expand);
+ setOperationAction(ISD::ADDE, MVT::Other, Expand);
+ setOperationAction(ISD::ADDC, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+
+
+ // Use the default implementation.
+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
+ setOperationAction(ISD::Constant , MVT::i32 , Legal);
+
+ setSchedulingPreference(Sched::RegPressure);
+ setPow2DivIsCheap(false);
+ setSelectIsExpensive(true);
+ setJumpIsExpensive(true);
+
+ maxStoresPerMemcpy = 4096;
+ maxStoresPerMemmove = 4096;
+ maxStoresPerMemset = 4096;
+
+}
+
+bool
+AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const {
+ return false;
+}
+
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool
+AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ APInt KnownZero2;
+ APInt KnownOne2;
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::SELECT_CC:
+ DAG.ComputeMaskedBits(
+ Op.getOperand(1),
+ KnownZero,
+ KnownOne,
+ Depth + 1
+ );
+ DAG.ComputeMaskedBits(
+ Op.getOperand(0),
+ KnownZero2,
+ KnownOne2
+ );
+ assert((KnownZero & KnownOne) == 0
+ && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0
+ && "Bits known to be one AND zero?");
+ // Only known if known in both the LHS and RHS
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerSDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerSREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerSREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Data = Op.getOperand(0);
+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+ DebugLoc DL = Op.getDebugLoc();
+ EVT DVT = Data.getValueType();
+ EVT BVT = BaseType->getVT();
+ unsigned baseBits = BVT.getScalarType().getSizeInBits();
+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+ unsigned shiftBits = srcBits - baseBits;
+ if (srcBits < 32) {
+ // If the op is less than 32 bits, then it needs to extend to 32bits
+ // so it can properly keep the upper bits valid.
+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+ shiftBits = 32 - baseBits;
+ DVT = IVT;
+ }
+ SDValue Shift = DAG.getConstant(shiftBits, DVT);
+ // Shift left by 'Shift' bits.
+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+ // Signed shift Right by 'Shift' bits.
+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+ if (srcBits < 32) {
+ // Once the sign extension is done, the op needs to be converted to
+ // its original type.
+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+ }
+ return Data;
+}
+EVT
+AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
+ int iSize = (size * numEle);
+ int vEle = (iSize >> ((size == 64) ? 6 : 5));
+ if (!vEle) {
+ vEle = 1;
+ }
+ if (size == 64) {
+ if (vEle == 1) {
+ return EVT(MVT::i64);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i64, vEle));
+ }
+ } else {
+ if (vEle == 1) {
+ return EVT(MVT::i32);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i32, vEle));
+ }
+ }
+}
+
+SDValue
+AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+ SDValue Result;
+ Result = DAG.getNode(
+ AMDGPUISD::BRANCH_COND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Chain, Jump, Cond);
+ return Result;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSelectCC(DL,
+ r0, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSelectCC(DL,
+ r1, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td
new file mode 100644
index 0000000000..e969bbf8ca
--- /dev/null
+++ b/lib/Target/R600/AMDILInstrInfo.td
@@ -0,0 +1,208 @@
+//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file describes the AMDIL instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+// AMDIL Instruction Predicate Definitions
+// Predicate that is set to true if the hardware supports double precision
+// divide
+def HasHWDDiv : Predicate<"Subtarget.device()"
+ "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware supports double, but not double
+// precision divide in hardware
+def HasSWDDiv : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
+ "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware support 24bit signed
+// math ops. Otherwise a software expansion to 32bit math ops is used instead.
+def HasHWSign24Bit : Predicate<"Subtarget.device()"
+ "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
+
+// Predicate that is set to true if 64bit operations are supported or not
+def HasHW64Bit : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::LongOps)">;
+def HasSW64Bit : Predicate<"Subtarget.device()"
+ "->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
+
+// Predicate that is set to true if the timer register is supported
+def HasTmrRegister : Predicate<"Subtarget.device()"
+ "->isSupported(AMDGPUDeviceInfo::TmrReg)">;
+// Predicate that is true if we are at least evergreen series
+def HasDeviceIDInst : Predicate<"Subtarget.device()"
+ "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
+
+// Predicate that is true if we have region address space.
+def hasRegionAS : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
+
+// Predicate that is false if we don't have region address space.
+def noRegionAS : Predicate<"!Subtarget.device()"
+ "->isSupported(AMDGPUDeviceInfo::RegionMem)">;
+
+
+// Predicate that is set to true if 64bit Mul is supported in the IL or not
+def HasHW64Mul : Predicate<"Subtarget.calVersion()"
+ ">= CAL_VERSION_SC_139"
+ "&& Subtarget.device()"
+ "->getGeneration() >="
+ "AMDGPUDeviceInfo::HD5XXX">;
+def HasSW64Mul : Predicate<"Subtarget.calVersion()"
+ "< CAL_VERSION_SC_139">;
+// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
+def HasHW64DivMod : Predicate<"Subtarget.device()"
+ "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
+def HasSW64DivMod : Predicate<"Subtarget.device()"
+ "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
+
+// Predicate that is set to true if 64bit pointer are used.
+def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
+def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
+//===--------------------------------------------------------------------===//
+// Custom Operands
+//===--------------------------------------------------------------------===//
+def brtarget : Operand<OtherVT>;
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Type Profiles
+//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Generic Profile Types
+//===----------------------------------------------------------------------===//
+
+def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+ ]>;
+def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
+ ]>;
+def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
+ SDTCisEltOfVec<1, 0>
+ ]>;
+
+//===----------------------------------------------------------------------===//
+// Flow Control Profile Types
+//===----------------------------------------------------------------------===//
+// Branch instruction where second and third are basic blocks
+def SDTIL_BRCond : SDTypeProfile<0, 2, [
+ SDTCisVT<0, OtherVT>
+ ]>;
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Nodes
+//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Flow Control DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Call/Return DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+//===--------------------------------------------------------------------===//
+// Instructions
+//===--------------------------------------------------------------------===//
+// Floating point math functions
+def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
+def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Integer functions
+//===----------------------------------------------------------------------===//
+def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+
+//===--------------------------------------------------------------------===//
+// Custom Pattern DAG Nodes
+//===--------------------------------------------------------------------===//
+def global_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Load pattern fragments
+//===----------------------------------------------------------------------===//
+// Global address space loads
+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Constant address space loads
+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Complex addressing mode patterns
+//===----------------------------------------------------------------------===//
+def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
+def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
+def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
+def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format classes
+//===----------------------------------------------------------------------===//
+class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+ let Namespace = "AMDGPU";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let Pattern = pattern;
+ let AsmString = !strconcat(asmstr, "\n");
+ let isPseudo = 1;
+ let Itinerary = NullALU;
+ bit hasIEEEFlag = 0;
+ bit hasZeroOpFlag = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// Multiclass Instruction formats
+//===--------------------------------------------------------------------===//
+// Multiclass that handles branch instructions
+multiclass BranchConditional<SDNode Op> {
+ def _i32 : ILFormat<(outs),
+ (ins brtarget:$target, GPRI32:$src0),
+ "; i32 Pseudo branch instruction",
+ [(Op bb:$target, GPRI32:$src0)]>;
+ def _f32 : ILFormat<(outs),
+ (ins brtarget:$target, GPRF32:$src0),
+ "; f32 Pseudo branch instruction",
+ [(Op bb:$target, GPRF32:$src0)]>;
+}
+
+// Only scalar types should generate flow control
+multiclass BranchInstr<string name> {
+ def _i32 : ILFormat<(outs), (ins GPRI32:$src),
+ !strconcat(name, " $src"), []>;
+ def _f32 : ILFormat<(outs), (ins GPRF32:$src),
+ !strconcat(name, " $src"), []>;
+}
+// Only scalar types should generate flow control
+multiclass BranchInstr2<string name> {
+ def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+ def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+}
+
+//===--------------------------------------------------------------------===//
+// Intrinsics support
+//===--------------------------------------------------------------------===//
+include "AMDILIntrinsics.td"
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.cpp b/lib/Target/R600/AMDILIntrinsicInfo.cpp
new file mode 100644
index 0000000000..4ddb057d80
--- /dev/null
+++ b/lib/Target/R600/AMDILIntrinsicInfo.cpp
@@ -0,0 +1,79 @@
+//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Implementation of the IntrinsicInfo class.
+//
+//===-----------------------------------------------------------------------===//
+
+#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDIL.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
+ : TargetIntrinsicInfo() {
+}
+
+std::string
+AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
+ unsigned int numTys) const {
+ static const char* const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ if (IntrID < Intrinsic::num_intrinsics) {
+ return 0;
+ }
+ assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
+ && "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+unsigned int
+AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const {
+#define GET_FUNCTION_RECOGNIZER
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ AMDGPUIntrinsic::ID IntrinsicID
+ = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
+ IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name);
+
+ if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
+ return IntrinsicID;
+ }
+ return 0;
+}
+
+bool
+AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
+ // Overload Table
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+Function*
+AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTys) const {
+ llvm_unreachable("Not implemented");
+}
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.h b/lib/Target/R600/AMDILIntrinsicInfo.h
new file mode 100644
index 0000000000..35559e23fc
--- /dev/null
+++ b/lib/Target/R600/AMDILIntrinsicInfo.h
@@ -0,0 +1,49 @@
+//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
+//
+//===-----------------------------------------------------------------------===//
+#ifndef AMDIL_INTRINSICS_H
+#define AMDIL_INTRINSICS_H
+
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+class TargetMachine;
+
+namespace AMDGPUIntrinsic {
+enum ID {
+ last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_AMDGPU_intrinsics
+};
+
+} // end namespace AMDGPUIntrinsic
+
+class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
+public:
+ AMDGPUIntrinsicInfo(TargetMachine *tm);
+ std::string getName(unsigned int IntrId, Type **Tys = 0,
+ unsigned int numTys = 0) const;
+ unsigned int lookupName(const char *Name, unsigned int Len) const;
+ bool isOverloaded(unsigned int IID) const;
+ Function *getDeclaration(Module *M, unsigned int ID,
+ Type **Tys = 0,
+ unsigned int numTys = 0) const;
+};
+
+} // end namespace llvm
+
+#endif // AMDIL_INTRINSICS_H
+
diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td
new file mode 100644
index 0000000000..3f9e20f0c8
--- /dev/null
+++ b/lib/Target/R600/AMDILIntrinsics.td
@@ -0,0 +1,242 @@
+//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines all of the amdil-specific intrinsics
+//
+//===---------------------------------------------------------------===//
+//===--------------------------------------------------------------------===//
+// Intrinsic classes
+// Generic versions of the above classes but for Target specific intrinsics
+// instead of SDNode patterns.
+//===--------------------------------------------------------------------===//
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ class VoidIntLong :
+ Intrinsic<[llvm_i64_ty], [], []>;
+ class VoidIntInt :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class VoidIntBool :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class UnaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ class UnaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ class ConvertIntFTOI :
+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
+ class ConvertIntITOF :
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
+ class UnaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty], []>;
+ class UnaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty], []>;
+ class BinaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class BinaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class BinaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
+ class BinaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
+ class TernaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class TernaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class QuaternaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class UnaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ class UnaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
+
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
+
+ def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
+ TernaryIntInt;
+ def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
+ TernaryIntInt;
+ def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
+ UnaryIntInt;
+ def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
+ UnaryIntInt;
+ def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
+ UnaryIntInt;
+ def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
+ TernaryIntInt;
+ def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
+ TernaryIntInt;
+ def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
+ QuaternaryIntInt;
+ def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
+ TernaryIntInt;
+ def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
+ BinaryIntInt;
+ def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
+ TernaryIntInt;
+ def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
+ TernaryIntInt;
+ def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
+ TernaryIntFloat;
+ def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
+ BinaryIntInt;
+ def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
+ BinaryIntInt;
+ def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
+ BinaryIntInt;
+ def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
+ BinaryIntInt;
+ def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
+ TernaryIntInt;
+ def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
+ TernaryIntInt;
+ def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
+ BinaryIntInt;
+ def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
+ BinaryIntInt;
+ def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
+ BinaryIntInt;
+ def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
+ BinaryIntInt;
+ def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
+ BinaryIntFloat;
+ def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
+ BinaryIntInt;
+ def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
+ BinaryIntInt;
+ def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
+ BinaryIntFloat;
+ def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
+ TernaryIntInt;
+ def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
+ TernaryIntInt;
+ def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
+ TernaryIntInt;
+ def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
+ UnaryIntFloat;
+ def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
+ TernaryIntFloat;
+ def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
+ UnaryIntFloat;
+ def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
+ UnaryIntFloat;
+ def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
+ UnaryIntFloat;
+ def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
+ UnaryIntFloat;
+ def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
+ UnaryIntFloat;
+ def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
+ UnaryIntFloat;
+ def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
+ UnaryIntFloat;
+ def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
+ UnaryIntFloat;
+ def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
+ UnaryIntFloat;
+ def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
+ UnaryIntFloat;
+ def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
+ def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
+ def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
+ def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
+ UnaryIntFloat;
+ def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
+ UnaryIntFloat;
+ def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
+ UnaryIntFloat;
+ def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
+ UnaryIntFloat;
+ def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
+ UnaryIntFloat;
+ def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
+ TernaryIntFloat;
+ def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
+ UnaryIntFloat;
+ def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
+ UnaryIntFloat;
+ def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
+ UnaryIntFloat;
+ def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
+ TernaryIntFloat;
+ def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i32_ty], []>;
+
+ def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
+ def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
+ def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+ def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
+ ConvertIntITOF;
+ def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
+ ConvertIntFTOI;
+ def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
+ ConvertIntFTOI;
+ def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
+ def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
+ ConvertIntITOF;
+ def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
+ ConvertIntITOF;
+ def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty, llvm_float_ty], []>;
+ def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty], []>;
+ def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
+ def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
+}
diff --git a/lib/Target/R600/AMDILNIDevice.cpp b/lib/Target/R600/AMDILNIDevice.cpp
new file mode 100644
index 0000000000..47c3f7f209
--- /dev/null
+++ b/lib/Target/R600/AMDILNIDevice.cpp
@@ -0,0 +1,65 @@
+//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILNIDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+
+using namespace llvm;
+
+AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+ std::string name = ST->getDeviceName();
+ if (name == "caicos") {
+ DeviceFlag = OCL_DEVICE_CAICOS;
+ } else if (name == "turks") {
+ DeviceFlag = OCL_DEVICE_TURKS;
+ } else if (name == "cayman") {
+ DeviceFlag = OCL_DEVICE_CAYMAN;
+ } else {
+ DeviceFlag = OCL_DEVICE_BARTS;
+ }
+}
+AMDGPUNIDevice::~AMDGPUNIDevice() {
+}
+
+size_t
+AMDGPUNIDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDGPUNIDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD6XXX;
+}
+
+
+AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
+ : AMDGPUNIDevice(ST) {
+ setCaps();
+}
+
+AMDGPUCaymanDevice::~AMDGPUCaymanDevice() {
+}
+
+void
+AMDGPUCaymanDevice::setCaps() {
+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+ mHWBits.set(AMDGPUDeviceInfo::FMA);
+ }
+ mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
+ mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
+ mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
+}
+
diff --git a/lib/Target/R600/AMDILNIDevice.h b/lib/Target/R600/AMDILNIDevice.h
new file mode 100644
index 0000000000..24a640845e
--- /dev/null
+++ b/lib/Target/R600/AMDILNIDevice.h
@@ -0,0 +1,57 @@
+//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef AMDILNIDEVICE_H
+#define AMDILNIDEVICE_H
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+
+namespace llvm {
+
+class AMDGPUSubtarget;
+//===---------------------------------------------------------------------===//
+// NI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+/// \brief The AMDGPUNIDevice is the base class for all Northern Island series of
+/// cards.
+///
+/// It is very similiar to the AMDGPUEvergreenDevice, with the major
+/// exception being differences in wavefront size and hardware capabilities. The
+/// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
+/// integer operations
+class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUNIDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUNIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+};
+
+/// Just as the AMDGPUCypressDevice is the double capable version of the
+/// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version
+/// of the AMDGPUNIDevice. The other major difference is that the Cayman Device
+/// has 4 wide ALU's, whereas the rest of the NI family is a 5 wide.
+class AMDGPUCaymanDevice: public AMDGPUNIDevice {
+public:
+ AMDGPUCaymanDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUCaymanDevice();
+private:
+ virtual void setCaps();
+};
+
+static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
+} // namespace llvm
+#endif // AMDILNIDEVICE_H
diff --git a/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/lib/Target/R600/AMDILPeepholeOptimizer.cpp
new file mode 100644
index 0000000000..a5f7ee5053
--- /dev/null
+++ b/lib/Target/R600/AMDILPeepholeOptimizer.cpp
@@ -0,0 +1,1215 @@
+//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "PeepholeOpt"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILDevices.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+#include <sstream>
+
+#if 0
+STATISTIC(PointerAssignments, "Number of dynamic pointer "
+ "assigments discovered");
+STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
+#endif
+
+using namespace llvm;
+// The Peephole optimization pass is used to do simple last minute optimizations
+// that are required for correct code or to remove redundant functions
+namespace {
+
+class OpaqueType;
+
+class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
+public:
+ TargetMachine &TM;
+ static char ID;
+ AMDGPUPeepholeOpt(TargetMachine &tm);
+ ~AMDGPUPeepholeOpt();
+ const char *getPassName() const;
+ bool runOnFunction(Function &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+protected:
+private:
+ // Function to initiate all of the instruction level optimizations.
+ bool instLevelOptimizations(BasicBlock::iterator *inst);
+ // Quick check to see if we need to dump all of the pointers into the
+ // arena. If this is correct, then we set all pointers to exist in arena. This
+ // is a workaround for aliasing of pointers in a struct/union.
+ bool dumpAllIntoArena(Function &F);
+ // Because I don't want to invalidate any pointers while in the
+ // safeNestedForEachFunction. I push atomic conversions to a vector and handle
+ // it later. This function does the conversions if required.
+ void doAtomicConversionIfNeeded(Function &F);
+ // Because __amdil_is_constant cannot be properly evaluated if
+ // optimizations are disabled, the call's are placed in a vector
+ // and evaluated after the __amdil_image* functions are evaluated
+ // which should allow the __amdil_is_constant function to be
+ // evaluated correctly.
+ void doIsConstCallConversionIfNeeded();
+ bool mChanged;
+ bool mDebug;
+ bool mConvertAtomics;
+ CodeGenOpt::Level optLevel;
+ // Run a series of tests to see if we can optimize a CALL instruction.
+ bool optimizeCallInst(BasicBlock::iterator *bbb);
+ // A peephole optimization to optimize bit extract sequences.
+ bool optimizeBitExtract(Instruction *inst);
+ // A peephole optimization to optimize bit insert sequences.
+ bool optimizeBitInsert(Instruction *inst);
+ bool setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift);
+ // Expand the bit field insert instruction on versions of OpenCL that
+ // don't support it.
+ bool expandBFI(CallInst *CI);
+ // Expand the bit field mask instruction on version of OpenCL that
+ // don't support it.
+ bool expandBFM(CallInst *CI);
+ // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
+ // this case we need to expand them. These functions check for 24bit functions
+ // and then expand.
+ bool isSigned24BitOps(CallInst *CI);
+ void expandSigned24BitOps(CallInst *CI);
+ // One optimization that can occur is that if the required workgroup size is
+ // specified then the result of get_local_size is known at compile time and
+ // can be returned accordingly.
+ bool isRWGLocalOpt(CallInst *CI);
+ // On northern island cards, the division is slightly less accurate than on
+ // previous generations, so we need to utilize a more accurate division. So we
+ // can translate the accurate divide to a normal divide on all other cards.
+ bool convertAccurateDivide(CallInst *CI);
+ void expandAccurateDivide(CallInst *CI);
+ // If the alignment is set incorrectly, it can produce really inefficient
+ // code. This checks for this scenario and fixes it if possible.
+ bool correctMisalignedMemOp(Instruction *inst);
+
+ // If we are in no opt mode, then we need to make sure that
+ // local samplers are properly propagated as constant propagation
+ // doesn't occur and we need to know the value of kernel defined
+ // samplers at compile time.
+ bool propagateSamplerInst(CallInst *CI);
+
+ // Helper functions
+
+ // Group of functions that recursively calculate the size of a structure based
+ // on it's sub-types.
+ size_t getTypeSize(Type * const T, bool dereferencePtr = false);
+ size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
+ size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
+ size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
+ size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
+ size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
+ size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
+ size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
+
+ LLVMContext *mCTX;
+ Function *mF;
+ const AMDGPUSubtarget *mSTM;
+ SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
+ SmallVector<CallInst *, 16> isConstVec;
+}; // class AMDGPUPeepholeOpt
+ char AMDGPUPeepholeOpt::ID = 0;
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator.
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+ SecondIterator S, Function F) {
+ for ( ; First != Last; ++First) {
+ SecondIterator sf, sl;
+ for (sf = First->begin(), sl = First->end();
+ sf != sl; ) {
+ if (!F(&sf)) {
+ ++sf;
+ }
+ }
+ }
+ return F;
+}
+
+} // anonymous namespace
+
+namespace llvm {
+ FunctionPass *
+ createAMDGPUPeepholeOpt(TargetMachine &tm) {
+ return new AMDGPUPeepholeOpt(tm);
+ }
+} // llvm namespace
+
+AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
+ : FunctionPass(ID), TM(tm) {
+ mDebug = DEBUGME;
+ optLevel = TM.getOptLevel();
+
+}
+
+AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() {
+}
+
+const char *
+AMDGPUPeepholeOpt::getPassName() const {
+ return "AMDGPU PeepHole Optimization Pass";
+}
+
+bool
+containsPointerType(Type *Ty) {
+ if (!Ty) {
+ return false;
+ }
+ switch(Ty->getTypeID()) {
+ default:
+ return false;
+ case Type::StructTyID: {
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ for (StructType::element_iterator stb = ST->element_begin(),
+ ste = ST->element_end(); stb != ste; ++stb) {
+ if (!containsPointerType(*stb)) {
+ continue;
+ }
+ return true;
+ }
+ break;
+ }
+ case Type::VectorTyID:
+ case Type::ArrayTyID:
+ return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
+ case Type::PointerTyID:
+ return true;
+ };
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) {
+ bool dumpAll = false;
+ for (Function::const_arg_iterator cab = F.arg_begin(),
+ cae = F.arg_end(); cab != cae; ++cab) {
+ const Argument *arg = cab;
+ const PointerType *PT = dyn_cast<PointerType>(arg->getType());
+ if (!PT) {
+ continue;
+ }
+ Type *DereferencedType = PT->getElementType();
+ if (!dyn_cast<StructType>(DereferencedType)
+ ) {
+ continue;
+ }
+ if (!containsPointerType(DereferencedType)) {
+ continue;
+ }
+ // FIXME: Because a pointer inside of a struct/union may be aliased to
+ // another pointer we need to take the conservative approach and place all
+ // pointers into the arena until more advanced detection is implemented.
+ dumpAll = true;
+ }
+ return dumpAll;
+}
+void
+AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() {
+ if (isConstVec.empty()) {
+ return;
+ }
+ for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
+ CallInst *CI = isConstVec[x];
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ CI->eraseFromParent();
+ }
+ isConstVec.clear();
+}
+void
+AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) {
+ // Don't do anything if we don't have any atomic operations.
+ if (atomicFuncs.empty()) {
+ return;
+ }
+ // Change the function name for the atomic if it is required
+ uint32_t size = atomicFuncs.size();
+ for (uint32_t x = 0; x < size; ++x) {
+ atomicFuncs[x].first->setOperand(
+ atomicFuncs[x].first->getNumOperands()-1,
+ atomicFuncs[x].second);
+
+ }
+ mChanged = true;
+ if (mConvertAtomics) {
+ return;
+ }
+}
+
+bool
+AMDGPUPeepholeOpt::runOnFunction(Function &MF) {
+ mChanged = false;
+ mF = &MF;
+ mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
+ if (mDebug) {
+ MF.dump();
+ }
+ mCTX = &MF.getType()->getContext();
+ mConvertAtomics = true;
+ safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
+ std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
+ this));
+
+ doAtomicConversionIfNeeded(MF);
+ doIsConstCallConversionIfNeeded();
+
+ if (mDebug) {
+ MF.dump();
+ }
+ return mChanged;
+}
+
+bool
+AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) {
+ Instruction *inst = (*bbb);
+ CallInst *CI = dyn_cast<CallInst>(inst);
+ if (!CI) {
+ return false;
+ }
+ if (isSigned24BitOps(CI)) {
+ expandSigned24BitOps(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (propagateSamplerInst(CI)) {
+ return false;
+ }
+ if (expandBFI(CI) || expandBFM(CI)) {
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ if (convertAccurateDivide(CI)) {
+ expandAccurateDivide(CI);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+
+ StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
+ if (calleeName.startswith("__amdil_is_constant")) {
+ // If we do not have optimizations, then this
+ // cannot be properly evaluated, so we add the
+ // call instruction to a vector and process
+ // them at the end of processing after the
+ // samplers have been correctly handled.
+ if (optLevel == CodeGenOpt::None) {
+ isConstVec.push_back(CI);
+ return false;
+ } else {
+ Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+ : ConstantInt::get(aType, 0);
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ }
+
+ if (calleeName.equals("__amdil_is_asic_id_i32")) {
+ ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
+ Type *aType = Type::getInt32Ty(*mCTX);
+ Value *Val = CV;
+ if (Val) {
+ Val = ConstantInt::get(aType,
+ mSTM->device()->getDeviceFlag() & CV->getZExtValue());
+ } else {
+ Val = ConstantInt::get(aType, 0);
+ }
+ CI->replaceAllUsesWith(Val);
+ ++(*bbb);
+ CI->eraseFromParent();
+ return true;
+ }
+ Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
+ if (!F) {
+ return false;
+ }
+ if (F->getName().startswith("__atom") && !CI->getNumUses()
+ && F->getName().find("_xchg") == StringRef::npos) {
+ std::string buffer(F->getName().str() + "_noret");
+ F = dyn_cast<Function>(
+ F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
+ atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
+ }
+
+ if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
+ && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
+ return false;
+ }
+ if (!mConvertAtomics) {
+ return false;
+ }
+ StringRef name = F->getName();
+ if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
+ mConvertAtomics = false;
+ }
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
+ Instruction *&src,
+ Constant *&mask,
+ Constant *&shift) {
+ if (!base) {
+ if (mDebug) {
+ dbgs() << "Null pointer passed into function.\n";
+ }
+ return false;
+ }
+ bool andOp = false;
+ if (base->getOpcode() == Instruction::Shl) {
+ shift = dyn_cast<Constant>(base->getOperand(1));
+ } else if (base->getOpcode() == Instruction::And) {
+ mask = dyn_cast<Constant>(base->getOperand(1));
+ andOp = true;
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
+ }
+ // If the base is neither a Shl or a And, we don't fit any of the patterns above.
+ return false;
+ }
+ src = dyn_cast<Instruction>(base->getOperand(0));
+ if (!src) {
+ if (mDebug) {
+ dbgs() << "Failed setup since the base operand is not an instruction!\n";
+ }
+ return false;
+ }
+ // If we find an 'and' operation, then we don't need to
+ // find the next operation as we already know the
+ // bits that are valid at this point.
+ if (andOp) {
+ return true;
+ }
+ if (src->getOpcode() == Instruction::Shl && !shift) {
+ shift = dyn_cast<Constant>(src->getOperand(1));
+ src = dyn_cast<Instruction>(src->getOperand(0));
+ } else if (src->getOpcode() == Instruction::And && !mask) {
+ mask = dyn_cast<Constant>(src->getOperand(1));
+ }
+ if (!mask && !shift) {
+ if (mDebug) {
+ dbgs() << "Failed setup since both mask and shift are NULL!\n";
+ }
+ // Did not find a constant mask or a shift.
+ return false;
+ }
+ return true;
+}
+bool
+AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) {
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::Or) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do an optimization on a sequence of ops that in the end equals a
+ // single ISA instruction.
+ // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
+ // Some simplified versions of this pattern are as follows:
+ // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
+ // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
+ // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
+ // (A & B) | (D << F) when (1 << F) >= B
+ // (A << C) | (D & E) when (1 << C) >= E
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
+ // The HD4XXX hardware doesn't support the ubit_insert instruction.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = 1;
+ // This optimization only works on 32bit integers.
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ // TODO: Handle vectors.
+ if (isVector) {
+ if (mDebug) {
+ dbgs() << "!!! Vectors are not supported yet!\n";
+ }
+ return false;
+ }
+ Instruction *LHSSrc = NULL, *RHSSrc = NULL;
+ Constant *LHSMask = NULL, *RHSMask = NULL;
+ Constant *LHSShift = NULL, *RHSShift = NULL;
+ Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
+ Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
+ if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (LHS) { LHS->dump(); }
+ if (LHSSrc) { LHSSrc->dump(); }
+ if (LHSMask) { LHSMask->dump(); }
+ if (LHSShift) { LHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
+ if (mDebug) {
+ dbgs() << "Found an OR Operation that failed setup!\n";
+ inst->dump();
+ if (RHS) { RHS->dump(); }
+ if (RHSSrc) { RHSSrc->dump(); }
+ if (RHSMask) { RHSMask->dump(); }
+ if (RHSShift) { RHSShift->dump(); }
+ }
+ // There was an issue with the setup for BitInsert.
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
+ dbgs() << "Op: "; inst->dump();
+ dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
+ dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
+ }
+ Constant *offset = NULL;
+ Constant *width = NULL;
+ uint32_t lhsMaskVal = 0, rhsMaskVal = 0;
+ uint32_t lhsShiftVal = 0, rhsShiftVal = 0;
+ uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
+ uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
+ lhsMaskVal = (LHSMask
+ ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
+ rhsMaskVal = (RHSMask
+ ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
+ lhsShiftVal = (LHSShift
+ ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
+ rhsShiftVal = (RHSShift
+ ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
+ lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
+ rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
+ lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
+ rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
+ // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
+ if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
+ return false;
+ }
+ if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
+ offset = ConstantInt::get(aType, lhsMaskOffset, false);
+ width = ConstantInt::get(aType, lhsMaskWidth, false);
+ RHSSrc = RHS;
+ if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
+ return false;
+ }
+ if (!LHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ } else if (lhsShiftVal != lhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", LHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing LHS!\n";
+ }
+ } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
+ offset = ConstantInt::get(aType, rhsMaskOffset, false);
+ width = ConstantInt::get(aType, rhsMaskWidth, false);
+ LHSSrc = RHSSrc;
+ RHSSrc = LHS;
+ if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
+ return false;
+ }
+ if (!RHSShift) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ } else if (rhsShiftVal != rhsMaskOffset) {
+ LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+ "MaskShr", RHS);
+ }
+ if (mDebug) {
+ dbgs() << "Optimizing RHS!\n";
+ }
+ } else {
+ if (mDebug) {
+ dbgs() << "Failed constraint 3!\n";
+ }
+ return false;
+ }
+ if (mDebug) {
+ dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+ }
+ if (!offset || !width) {
+ if (mDebug) {
+ dbgs() << "Either width or offset are NULL, failed detection!\n";
+ }
+ return false;
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "__amdil_ubit_insert";
+ if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[4] = {
+ width,
+ offset,
+ LHSSrc,
+ RHSSrc
+ };
+ CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
+ if (mDebug) {
+ dbgs() << "Old Inst: ";
+ inst->dump();
+ dbgs() << "New Inst: ";
+ CI->dump();
+ dbgs() << "\n\n";
+ }
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) {
+ if (!inst) {
+ return false;
+ }
+ if (!inst->isBinaryOp()) {
+ return false;
+ }
+ if (inst->getOpcode() != Instruction::And) {
+ return false;
+ }
+ if (optLevel == CodeGenOpt::None) {
+ return false;
+ }
+ // We want to do some simple optimizations on Shift right/And patterns. The
+ // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
+ // value smaller than 32 and C is a mask. If C is a constant value, then the
+ // following transformation can occur. For signed integers, it turns into the
+ // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
+ // integers, it turns into the function call dst =
+ // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
+ // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
+ // Evergreen hardware.
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
+ // This does not work on HD4XXX hardware.
+ return false;
+ }
+ Type *aType = inst->getType();
+ bool isVector = aType->isVectorTy();
+
+ // XXX Support vector types
+ if (isVector) {
+ return false;
+ }
+ int numEle = 1;
+ // This only works on 32bit integers
+ if (aType->getScalarType()
+ != Type::getInt32Ty(inst->getContext())) {
+ return false;
+ }
+ if (isVector) {
+ const VectorType *VT = dyn_cast<VectorType>(aType);
+ numEle = VT->getNumElements();
+ // We currently cannot support more than 4 elements in a intrinsic and we
+ // cannot support Vec3 types.
+ if (numEle > 4 || numEle == 3) {
+ return false;
+ }
+ }
+ BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
+ // If the first operand is not a shift instruction, then we can return as it
+ // doesn't match this pattern.
+ if (!ShiftInst || !ShiftInst->isShift()) {
+ return false;
+ }
+ // If we are a shift left, then we need don't match this pattern.
+ if (ShiftInst->getOpcode() == Instruction::Shl) {
+ return false;
+ }
+ bool isSigned = ShiftInst->isArithmeticShift();
+ Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
+ Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
+ // Lets make sure that the shift value and the and mask are constant integers.
+ if (!AndMask || !ShrVal) {
+ return false;
+ }
+ Constant *newMaskConst;
+ Constant *shiftValConst;
+ if (isVector) {
+ // Handle the vector case
+ std::vector<Constant *> maskVals;
+ std::vector<Constant *> shiftVals;
+ ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
+ ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
+ Type *scalarType = AndMaskVec->getType()->getScalarType();
+ assert(AndMaskVec->getNumOperands() ==
+ ShrValVec->getNumOperands() && "cannot have a "
+ "combination where the number of elements to a "
+ "shift and an and are different!");
+ for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
+ ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
+ ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
+ if (!AndCI || !ShiftIC) {
+ return false;
+ }
+ uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
+ // If the mask or shiftval is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left
+ // then this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
+ shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
+ }
+ newMaskConst = ConstantVector::get(maskVals);
+ shiftValConst = ConstantVector::get(shiftVals);
+ } else {
+ // Handle the scalar case
+ uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
+ // This must be a mask value where all lower bits are set to 1 and then any
+ // bit higher is set to 0.
+ if (!isMask_32(maskVal)) {
+ return false;
+ }
+ maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+ // Count the number of bits set in the mask, this is the width of the
+ // resulting bit set that is extracted from the source value.
+ uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
+ // If the mask or shift val is greater than the bitcount, then break out.
+ if (maskVal >= 32 || shiftVal >= 32) {
+ return false;
+ }
+ // If the mask val is greater than the the number of original bits left then
+ // this optimization is invalid.
+ if (maskVal > (32 - shiftVal)) {
+ return false;
+ }
+ newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
+ shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
+ }
+ // Lets create the function signature.
+ std::vector<Type *> callTypes;
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ callTypes.push_back(aType);
+ FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+ std::string name = "llvm.AMDGPU.bit.extract.u32";
+ if (isVector) {
+ name += ".v" + itostr(numEle) + "i32";
+ } else {
+ name += ".";
+ }
+ // Lets create the function.
+ Function *Func =
+ dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[3] = {
+ ShiftInst->getOperand(0),
+ shiftValConst,
+ newMaskConst
+ };
+ // Lets create the Call with the operands
+ CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+ CI->setDoesNotAccessMemory();
+ CI->insertBefore(inst);
+ inst->replaceAllUsesWith(CI);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::expandBFI(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ if (!LHS->getName().startswith("__amdil_bfi")) {
+ return false;
+ }
+ Type* type = CI->getOperand(0)->getType();
+ Constant *negOneConst = NULL;
+ if (type->isVectorTy()) {
+ std::vector<Constant *> negOneVals;
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ negOneVals.push_back(negOneConst);
+ }
+ negOneConst = ConstantVector::get(negOneVals);
+ } else {
+ negOneConst = ConstantInt::get(CI->getContext(),
+ APInt(32, StringRef("-1"), 10));
+ }
+ // __amdil_bfi => (A & B) | (~A & C)
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ CI->getOperand(1), "bfi_and", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
+ "bfi_not", CI);
+ rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
+ "bfi_and", CI);
+ lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::expandBFM(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ if (!LHS->getName().startswith("__amdil_bfm")) {
+ return false;
+ }
+ // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
+ Constant *newMaskConst = NULL;
+ Constant *newShiftConst = NULL;
+ Type* type = CI->getOperand(0)->getType();
+ if (type->isVectorTy()) {
+ std::vector<Constant*> newMaskVals, newShiftVals;
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ for (size_t x = 0,
+ y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+ newMaskVals.push_back(newMaskConst);
+ newShiftVals.push_back(newShiftConst);
+ }
+ newMaskConst = ConstantVector::get(newMaskVals);
+ newShiftConst = ConstantVector::get(newShiftVals);
+ } else {
+ newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+ newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+ }
+ BinaryOperator *lhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
+ lhs, "bfm_shl", CI);
+ lhs = BinaryOperator::Create(Instruction::Sub, lhs,
+ newShiftConst, "bfm_sub", CI);
+ BinaryOperator *rhs =
+ BinaryOperator::Create(Instruction::And, CI->getOperand(1),
+ newMaskConst, "bfm_mask", CI);
+ lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
+ CI->replaceAllUsesWith(lhs);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) {
+ Instruction *inst = (*bbb);
+ if (optimizeCallInst(bbb)) {
+ return true;
+ }
+ if (optimizeBitExtract(inst)) {
+ return false;
+ }
+ if (optimizeBitInsert(inst)) {
+ return false;
+ }
+ if (correctMisalignedMemOp(inst)) {
+ return false;
+ }
+ return false;
+}
+bool
+AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) {
+ LoadInst *linst = dyn_cast<LoadInst>(inst);
+ StoreInst *sinst = dyn_cast<StoreInst>(inst);
+ unsigned alignment;
+ Type* Ty = inst->getType();
+ if (linst) {
+ alignment = linst->getAlignment();
+ Ty = inst->getType();
+ } else if (sinst) {
+ alignment = sinst->getAlignment();
+ Ty = sinst->getValueOperand()->getType();
+ } else {
+ return false;
+ }
+ unsigned size = getTypeSize(Ty);
+ if (size == alignment || size < alignment) {
+ return false;
+ }
+ if (!Ty->isStructTy()) {
+ return false;
+ }
+ if (alignment < 4) {
+ if (linst) {
+ linst->setAlignment(0);
+ return true;
+ } else if (sinst) {
+ sinst->setAlignment(0);
+ return true;
+ }
+ }
+ return false;
+}
+bool
+AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+ std::string namePrefix = LHS->getName().substr(0, 14);
+ if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
+ && namePrefix != "__amdil__imul24_high") {
+ return false;
+ }
+ if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
+ return false;
+ }
+ return true;
+}
+
+void
+AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) {
+ assert(isSigned24BitOps(CI) && "Must be a "
+ "signed 24 bit operation to call this function!");
+ Value *LHS = CI->getOperand(CI->getNumOperands()-1);
+ // On 7XX and 8XX we do not have signed 24bit, so we need to
+ // expand it to the following:
+ // imul24 turns into 32bit imul
+ // imad24 turns into 32bit imad
+ // imul24_high turns into 32bit imulhigh
+ if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
+ Type *aType = CI->getOperand(0)->getType();
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ callTypes.push_back(CI->getOperand(2)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+ std::string name = "__amdil_imad";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[3] = {
+ CI->getOperand(0),
+ CI->getOperand(1),
+ CI->getOperand(2)
+ };
+ CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
+ BinaryOperator *mulOp =
+ BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
+ CI->getOperand(1), "imul24", CI);
+ CI->replaceAllUsesWith(mulOp);
+ } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
+ Type *aType = CI->getOperand(0)->getType();
+
+ bool isVector = aType->isVectorTy();
+ int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+ std::vector<Type*> callTypes;
+ callTypes.push_back(CI->getOperand(0)->getType());
+ callTypes.push_back(CI->getOperand(1)->getType());
+ FunctionType *funcType =
+ FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+ std::string name = "__amdil_imul_high";
+ if (isVector) {
+ name += "_v" + itostr(numEle) + "i32";
+ } else {
+ name += "_i32";
+ }
+ Function *Func = dyn_cast<Function>(
+ CI->getParent()->getParent()->getParent()->
+ getOrInsertFunction(StringRef(name), funcType));
+ Value *Operands[2] = {
+ CI->getOperand(0),
+ CI->getOperand(1)
+ };
+ CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
+ nCI->insertBefore(CI);
+ CI->replaceAllUsesWith(nCI);
+ }
+}
+
+bool
+AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) {
+ return (CI != NULL
+ && CI->getOperand(CI->getNumOperands() - 1)->getName()
+ == "__amdil_get_local_size_int");
+}
+
+bool
+AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) {
+ if (!CI) {
+ return false;
+ }
+ if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
+ && (mSTM->getDeviceName() == "cayman")) {
+ return false;
+ }
+ return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
+ == "__amdil_improved_div";
+}
+
+void
+AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) {
+ assert(convertAccurateDivide(CI)
+ && "expanding accurate divide can only happen if it is expandable!");
+ BinaryOperator *divOp =
+ BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
+ CI->getOperand(1), "fdiv32", CI);
+ CI->replaceAllUsesWith(divOp);
+}
+
+bool
+AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) {
+ if (optLevel != CodeGenOpt::None) {
+ return false;
+ }
+
+ if (!CI) {
+ return false;
+ }
+
+ unsigned funcNameIdx = 0;
+ funcNameIdx = CI->getNumOperands() - 1;
+ StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
+ if (calleeName != "__amdil_image2d_read_norm"
+ && calleeName != "__amdil_image2d_read_unnorm"
+ && calleeName != "__amdil_image3d_read_norm"
+ && calleeName != "__amdil_image3d_read_unnorm") {
+ return false;
+ }
+
+ unsigned samplerIdx = 2;
+ samplerIdx = 1;
+ Value *sampler = CI->getOperand(samplerIdx);
+ LoadInst *lInst = dyn_cast<LoadInst>(sampler);
+ if (!lInst) {
+ return false;
+ }
+
+ if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return false;
+ }
+
+ GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
+ // If we are loading from what is not a global value, then we
+ // fail and return.
+ if (!gv) {
+ return false;
+ }
+
+ // If we don't have an initializer or we have an initializer and
+ // the initializer is not a 32bit integer, we fail.
+ if (!gv->hasInitializer()
+ || !gv->getInitializer()->getType()->isIntegerTy(32)) {
+ return false;
+ }
+
+ // Now that we have the global variable initializer, lets replace
+ // all uses of the load instruction with the samplerVal and
+ // reparse the __amdil_is_constant() function.
+ Constant *samplerVal = gv->getInitializer();
+ lInst->replaceAllUsesWith(samplerVal);
+ return true;
+}
+
+bool
+AMDGPUPeepholeOpt::doInitialization(Module &M) {
+ return false;
+}
+
+bool
+AMDGPUPeepholeOpt::doFinalization(Module &M) {
+ return false;
+}
+
+void
+AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
+ size_t size = 0;
+ if (!T) {
+ return size;
+ }
+ switch (T->getTypeID()) {
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ case Type::LabelTyID:
+ assert(0 && "These types are not supported by this backend");
+ default:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ size = T->getPrimitiveSizeInBits() >> 3;
+ break;
+ case Type::PointerTyID:
+ size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
+ break;
+ case Type::IntegerTyID:
+ size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
+ break;
+ case Type::StructTyID:
+ size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
+ break;
+ case Type::ArrayTyID:
+ size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
+ break;
+ case Type::FunctionTyID:
+ size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
+ break;
+ case Type::VectorTyID:
+ size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
+ break;
+ };
+ return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
+ bool dereferencePtr) {
+ size_t size = 0;
+ if (!ST) {
+ return size;
+ }
+ Type *curType;
+ StructType::element_iterator eib;
+ StructType::element_iterator eie;
+ for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+ curType = *eib;
+ size += getTypeSize(curType, dereferencePtr);
+ }
+ return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
+ bool dereferencePtr) {
+ return IT ? (IT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
+ bool dereferencePtr) {
+ assert(0 && "Should not be able to calculate the size of an function type");
+ return 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
+ bool dereferencePtr) {
+ return (size_t)(AT ? (getTypeSize(AT->getElementType(),
+ dereferencePtr) * AT->getNumElements())
+ : 0);
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
+ bool dereferencePtr) {
+ return VT ? (VT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
+ bool dereferencePtr) {
+ if (!PT) {
+ return 0;
+ }
+ Type *CT = PT->getElementType();
+ if (CT->getTypeID() == Type::StructTyID &&
+ PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+ return getTypeSize(dyn_cast<StructType>(CT));
+ } else if (dereferencePtr) {
+ size_t size = 0;
+ for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+ size += getTypeSize(PT->getContainedType(x), dereferencePtr);
+ }
+ return size;
+ } else {
+ return 4;
+ }
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
+ bool dereferencePtr) {
+ //assert(0 && "Should not be able to calculate the size of an opaque type");
+ return 4;
+}
diff --git a/lib/Target/R600/AMDILRegisterInfo.td b/lib/Target/R600/AMDILRegisterInfo.td
new file mode 100644
index 0000000000..b9d033432e
--- /dev/null
+++ b/lib/Target/R600/AMDILRegisterInfo.td
@@ -0,0 +1,107 @@
+//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Declarations that describe the AMDIL register file
+//
+//===----------------------------------------------------------------------===//
+
+class AMDILReg<bits<16> num, string n> : Register<n> {
+ field bits<16> Value;
+ let Value = num;
+ let Namespace = "AMDGPU";
+}
+
+// We will start with 8 registers for each class before expanding to more
+// Since the swizzle is added based on the register class, we can leave it
+// off here and just specify different registers for different register classes
+def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
+def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
+def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
+def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
+def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
+def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
+def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
+def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
+def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
+def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
+def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
+def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
+def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
+def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
+def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
+def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
+def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
+def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
+
+// All registers between 1000 and 1024 are reserved and cannot be used
+// unless commented in this section
+// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
+// r1020 is used to hold the frame index for local arrays
+// r1019 is used to hold the dynamic stack allocation pointer
+// r1018 is used as a temporary register for handwritten code
+// r1017 is used as a temporary register for handwritten code
+// r1016 is used as a temporary register for load/store code
+// r1015 is used as a temporary register for data segment offset
+// r1014 is used as a temporary register for store code
+// r1013 is used as the section data pointer register
+// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
+// r1009 is used as the frame pointer register
+// r999 is used as the mem register.
+// r998 is used as the return address register.
+//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
+//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
+//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
+//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
+//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
+//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
+def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
+def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
+def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
+def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
+def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
+def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
+def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
+def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
+def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
+def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
+def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
+def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
+def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
+def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
+def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
+def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
+def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
+def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
+def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
+def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
+def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
+def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
+def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
+def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
+ (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+ let AltOrders = [(add (sequence "R%u", 1, 20))];
+ let AltOrderSelect = [{
+ return 1;
+ }];
+ }
diff --git a/lib/Target/R600/AMDILSIDevice.cpp b/lib/Target/R600/AMDILSIDevice.cpp
new file mode 100644
index 0000000000..3096c22067
--- /dev/null
+++ b/lib/Target/R600/AMDILSIDevice.cpp
@@ -0,0 +1,45 @@
+//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILSIDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+
+using namespace llvm;
+
+AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
+ : AMDGPUEvergreenDevice(ST) {
+}
+AMDGPUSIDevice::~AMDGPUSIDevice() {
+}
+
+size_t
+AMDGPUSIDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_900;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t
+AMDGPUSIDevice::getGeneration() const {
+ return AMDGPUDeviceInfo::HD7XXX;
+}
+
+std::string
+AMDGPUSIDevice::getDataLayout() const {
+ return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
diff --git a/lib/Target/R600/AMDILSIDevice.h b/lib/Target/R600/AMDILSIDevice.h
new file mode 100644
index 0000000000..5b2cb25022
--- /dev/null
+++ b/lib/Target/R600/AMDILSIDevice.h
@@ -0,0 +1,39 @@
+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef AMDILSIDEVICE_H
+#define AMDILSIDEVICE_H
+#include "AMDILEvergreenDevice.h"
+
+namespace llvm {
+class AMDGPUSubtarget;
+//===---------------------------------------------------------------------===//
+// SI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+/// \brief The AMDGPUSIDevice is the base class for all Southern Island series
+/// of cards.
+class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
+public:
+ AMDGPUSIDevice(AMDGPUSubtarget*);
+ virtual ~AMDGPUSIDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual std::string getDataLayout() const;
+};
+
+} // namespace llvm
+#endif // AMDILSIDEVICE_H
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
new file mode 100644
index 0000000000..ce0b56bdf0
--- /dev/null
+++ b/lib/Target/R600/CMakeLists.txt
@@ -0,0 +1,55 @@
+set(LLVM_TARGET_DEFINITIONS AMDGPU.td)
+
+tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
+tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
+tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
+add_public_tablegen_target(AMDGPUCommonTableGen)
+
+add_llvm_target(R600CodeGen
+ AMDIL7XXDevice.cpp
+ AMDILCFGStructurizer.cpp
+ AMDILDevice.cpp
+ AMDILDeviceInfo.cpp
+ AMDILEvergreenDevice.cpp
+ AMDILFrameLowering.cpp
+ AMDILIntrinsicInfo.cpp
+ AMDILISelDAGToDAG.cpp
+ AMDILISelLowering.cpp
+ AMDILNIDevice.cpp
+ AMDILPeepholeOptimizer.cpp
+ AMDILSIDevice.cpp
+ AMDGPUAsmPrinter.cpp
+ AMDGPUMCInstLower.cpp
+ AMDGPUSubtarget.cpp
+ AMDGPUStructurizeCFG.cpp
+ AMDGPUTargetMachine.cpp
+ AMDGPUISelLowering.cpp
+ AMDGPUConvertToISA.cpp
+ AMDGPUInstrInfo.cpp
+ AMDGPURegisterInfo.cpp
+ R600ExpandSpecialInstrs.cpp
+ R600InstrInfo.cpp
+ R600ISelLowering.cpp
+ R600MachineFunctionInfo.cpp
+ R600RegisterInfo.cpp
+ SIAnnotateControlFlow.cpp
+ SIAssignInterpRegs.cpp
+ SIInstrInfo.cpp
+ SIISelLowering.cpp
+ SILowerLiteralConstants.cpp
+ SILowerControlFlow.cpp
+ SIMachineFunctionInfo.cpp
+ SIRegisterInfo.cpp
+ )
+
+add_dependencies(LLVMR600CodeGen intrinsics_gen)
+
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
new file mode 100644
index 0000000000..e6c550b5ac
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -0,0 +1,132 @@
+//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot) {
+ printInstruction(MI, OS);
+
+ printAnnotation(OS, Annot);
+}
+
+void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ switch (Op.getReg()) {
+ // This is the default predicate state, so we don't need to print it.
+ case AMDGPU::PRED_SEL_OFF: break;
+ default: O << getRegisterName(Op.getReg()); break;
+ }
+ } else if (Op.isImm()) {
+ O << Op.getImm();
+ } else if (Op.isFPImm()) {
+ O << Op.getFPImm();
+ } else {
+ assert(!"unknown operand type in printOperand");
+ }
+}
+
+void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printOperand(MI, OpNo, O);
+ O << ", ";
+ printOperand(MI, OpNo + 1, O);
+}
+
+void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, StringRef Asm) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert(Op.isImm());
+ if (Op.getImm() == 1) {
+ O << Asm;
+ }
+}
+
+void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "|");
+}
+
+void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "_SAT");
+}
+
+void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ union Literal {
+ float f;
+ int32_t i;
+ } L;
+
+ L.i = MI->getOperand(OpNo).getImm();
+ O << L.i << "(" << L.f << ")";
+}
+
+void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, " *");
+}
+
+void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "-");
+}
+
+void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ switch (MI->getOperand(OpNo).getImm()) {
+ default: break;
+ case 1:
+ O << " * 2.0";
+ break;
+ case 2:
+ O << " * 4.0";
+ break;
+ case 3:
+ O << " / 2.0";
+ break;
+ }
+}
+
+void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.getImm() != 0) {
+ O << " + " << Op.getImm();
+ }
+}
+
+void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "ExecMask,");
+}
+
+void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printIfSet(MI, OpNo, O, "Pred,");
+}
+
+void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.getImm() == 0) {
+ O << " (MASKED)";
+ }
+}
+
+#include "AMDGPUGenAsmWriter.inc"
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
new file mode 100644
index 0000000000..96e0e46f8a
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -0,0 +1,52 @@
+//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTPRINTER_H
+#define AMDGPUINSTPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class AMDGPUInstPrinter : public MCInstPrinter {
+public:
+ AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ //Autogenerated by tblgen
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+private:
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm);
+ void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUINSTRPRINTER_H
diff --git a/lib/Target/R600/InstPrinter/CMakeLists.txt b/lib/Target/R600/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000000..069c55ba94
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMR600AsmPrinter
+ AMDGPUInstPrinter.cpp
+ )
+
+add_dependencies(LLVMR600AsmPrinter AMDGPUCommonTableGen)
diff --git a/lib/Target/R600/InstPrinter/LLVMBuild.txt b/lib/Target/R600/InstPrinter/LLVMBuild.txt
new file mode 100644
index 0000000000..ec0be89f10
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/R600/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = R600AsmPrinter
+parent = R600
+required_libraries = MC Support
+add_to_library_groups = R600
+
diff --git a/lib/Target/R600/InstPrinter/Makefile b/lib/Target/R600/InstPrinter/Makefile
new file mode 100644
index 0000000000..a794cc1124
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+#===- lib/Target/R600/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMR600AsmPrinter
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/LLVMBuild.txt b/lib/Target/R600/LLVMBuild.txt
new file mode 100644
index 0000000000..f2a7554e52
--- /dev/null
+++ b/lib/Target/R600/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = R600
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = R600CodeGen
+parent = R600
+required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC R600AsmPrinter R600Desc R600Info
+add_to_library_groups = R600
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
new file mode 100644
index 0000000000..98fca43267
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -0,0 +1,90 @@
+//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUMCObjectWriter : public MCObjectWriter {
+public:
+ AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
+ virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ //XXX: Implement if necessary.
+ }
+ virtual void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {
+ assert(!"Not implemented");
+ }
+
+ virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+
+};
+
+class AMDGPUAsmBackend : public MCAsmBackend {
+public:
+ AMDGPUAsmBackend(const Target &T)
+ : MCAsmBackend() {}
+
+ virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
+ virtual unsigned getNumFixupKinds() const { return 0; };
+ virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+ virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ return false;
+ }
+ virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ assert(!"Not implemented");
+ }
+ virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
+ virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ return true;
+ }
+};
+
+} //End anonymous namespace
+
+void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
+ Asm.writeSectionData(I, Layout);
+ }
+}
+
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
+ StringRef CPU) {
+ return new AMDGPUAsmBackend(T);
+}
+
+AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
+ raw_ostream &OS) const {
+ return new AMDGPUMCObjectWriter(OS);
+}
+
+void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+
+ uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
+ assert(Fixup.getKind() == FK_PCRel_4);
+ *Dst = (Value - 4) / 4;
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
new file mode 100644
index 0000000000..4d3d3e7945
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -0,0 +1,85 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCAsmInfo.h"
+
+using namespace llvm;
+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
+ HasSingleParameterDotFile = false;
+ WeakDefDirective = 0;
+ //===------------------------------------------------------------------===//
+ HasSubsectionsViaSymbols = true;
+ HasMachoZeroFillDirective = false;
+ HasMachoTBSSDirective = false;
+ HasStaticCtorDtorReferenceInStaticMode = false;
+ LinkerRequiresNonEmptyDwarfLines = true;
+ MaxInstLength = 16;
+ PCSymbol = "$";
+ SeparatorString = "\n";
+ CommentColumn = 40;
+ CommentString = ";";
+ LabelSuffix = ":";
+ GlobalPrefix = "@";
+ PrivateGlobalPrefix = ";.";
+ LinkerPrivateGlobalPrefix = "!";
+ InlineAsmStart = ";#ASMSTART";
+ InlineAsmEnd = ";#ASMEND";
+ AssemblerDialect = 0;
+ AllowQuotesInName = false;
+ AllowNameToStartWithDigit = false;
+ AllowPeriodsInName = false;
+
+ //===--- Data Emission Directives -------------------------------------===//
+ ZeroDirective = ".zero";
+ AsciiDirective = ".ascii\t";
+ AscizDirective = ".asciz\t";
+ Data8bitsDirective = ".byte\t";
+ Data16bitsDirective = ".short\t";
+ Data32bitsDirective = ".long\t";
+ Data64bitsDirective = ".quad\t";
+ GPRel32Directive = 0;
+ SunStyleELFSectionSwitchSyntax = true;
+ UsesELFSectionDirectiveForBSS = true;
+ HasMicrosoftFastStdCallMangling = false;
+
+ //===--- Alignment Information ----------------------------------------===//
+ AlignDirective = ".align\t";
+ AlignmentIsInBytes = true;
+ TextAlignFillValue = 0;
+
+ //===--- Global Variable Emission Directives --------------------------===//
+ GlobalDirective = ".global";
+ ExternDirective = ".extern";
+ HasSetDirective = false;
+ HasAggressiveSymbolFolding = true;
+ COMMDirectiveAlignmentIsInBytes = false;
+ HasDotTypeDotSizeDirective = false;
+ HasNoDeadStrip = true;
+ HasSymbolResolver = false;
+ WeakRefDirective = ".weakref\t";
+ LinkOnceDirective = 0;
+ //===--- Dwarf Emission Directives -----------------------------------===//
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+ ExceptionsType = ExceptionHandling::None;
+ DwarfUsesInlineInfoSection = false;
+ DwarfSectionOffsetDirective = ".offset";
+
+}
+
+const char*
+AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const {
+ return 0;
+}
+
+const MCSection*
+AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const {
+ return 0;
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
new file mode 100644
index 0000000000..3ad0fa6824
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUMCASMINFO_H
+#define AMDGPUMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+namespace llvm {
+
+class Target;
+class StringRef;
+
+class AMDGPUMCAsmInfo : public MCAsmInfo {
+public:
+ explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
+ const char* getDataASDirective(unsigned int Size, unsigned int AS) const;
+ const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
+};
+} // namespace llvm
+#endif // AMDGPUMCASMINFO_H
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
new file mode 100644
index 0000000000..9d0d6cf6fd
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -0,0 +1,60 @@
+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUCODEEMITTER_H
+#define AMDGPUCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class MCInst;
+class MCOperand;
+
+class AMDGPUMCCodeEmitter : public MCCodeEmitter {
+public:
+
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+
+ virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+ virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+ virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const {
+ return Value;
+ }
+ virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+ virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 0;
+ }
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUCODEEMITTER_H
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
new file mode 100644
index 0000000000..072ee49b63
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -0,0 +1,113 @@
+//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This file provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "AMDGPUMCAsmInfo.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AMDGPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createAMDGPUMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitAMDGPUMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAMDGPUMCRegisterInfo(X, 0);
+ return X;
+}
+
+static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo * X = new MCSubtargetInfo();
+ InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new AMDGPUInstPrinter(MAI, MII, MRI);
+}
+
+static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
+ return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
+ } else {
+ return createR600MCCodeEmitter(MCII, MRI, STI, Ctx);
+ }
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ return createPureStreamer(Ctx, MAB, _OS, _Emitter);
+}
+
+extern "C" void LLVMInitializeR600TargetMC() {
+
+ RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
+
+ TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
+
+ TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
+
+ TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
+
+ TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
+
+ TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
+
+ TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
+
+ TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
+
+ TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
new file mode 100644
index 0000000000..363a4af3f3
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -0,0 +1,55 @@
+//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef AMDGPUMCTARGETDESC_H
+#define AMDGPUMCTARGETDESC_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+
+extern Target TheAMDGPUTarget;
+
+MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
+ StringRef CPU);
+} // End llvm namespace
+
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#endif // AMDGPUMCTARGETDESC_H
diff --git a/lib/Target/R600/MCTargetDesc/CMakeLists.txt b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000000..37e714c2e7
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+add_llvm_library(LLVMR600Desc
+ AMDGPUAsmBackend.cpp
+ AMDGPUMCTargetDesc.cpp
+ AMDGPUMCAsmInfo.cpp
+ R600MCCodeEmitter.cpp
+ SIMCCodeEmitter.cpp
+ )
+
+add_dependencies(LLVMR600Desc AMDGPUCommonTableGen)
diff --git a/lib/Target/R600/MCTargetDesc/LLVMBuild.txt b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 0000000000..b1beab0bb3
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/R600/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = R600Desc
+parent = R600
+required_libraries = R600AsmPrinter R600Info MC
+add_to_library_groups = R600
diff --git a/lib/Target/R600/MCTargetDesc/Makefile b/lib/Target/R600/MCTargetDesc/Makefile
new file mode 100644
index 0000000000..8894a7607f
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMR600Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
new file mode 100644
index 0000000000..36deae9c0a
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -0,0 +1,574 @@
+//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// This code emitter outputs bytecode that is understood by the r600g driver
+/// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
+/// but it still needs to be run through a finalizer in order to be executed
+/// by the GPU.
+///
+/// [1] http://www.mesa3d.org/
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600Defines.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include <stdio.h>
+
+#define SRC_BYTE_COUNT 11
+#define DST_BYTE_COUNT 5
+
+using namespace llvm;
+
+namespace {
+
+class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
+ R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+ const MCRegisterInfo &MRI;
+ const MCSubtargetInfo &STI;
+ MCContext &Ctx;
+
+public:
+
+ R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+ const MCSubtargetInfo &sti, MCContext &ctx)
+ : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+
+ /// \brief Encode the instruction and write it to the OS.
+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// \returns the encoding for an MCOperand.
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+private:
+
+ void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const;
+ void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
+ void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
+ raw_ostream &OS) const;
+ void EmitDst(const MCInst &MI, raw_ostream &OS) const;
+ void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const;
+ void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
+
+ void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
+
+ void EmitByte(unsigned int byte, raw_ostream &OS) const;
+
+ void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
+
+ void Emit(uint32_t value, raw_ostream &OS) const;
+ void Emit(uint64_t value, raw_ostream &OS) const;
+
+ unsigned getHWRegChan(unsigned reg) const;
+ unsigned getHWReg(unsigned regNo) const;
+
+ bool isFCOp(unsigned opcode) const;
+ bool isTexOp(unsigned opcode) const;
+ bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
+
+};
+
+} // End anonymous namespace
+
+enum RegElement {
+ ELEMENT_X = 0,
+ ELEMENT_Y,
+ ELEMENT_Z,
+ ELEMENT_W
+};
+
+enum InstrTypes {
+ INSTR_ALU = 0,
+ INSTR_TEX,
+ INSTR_FC,
+ INSTR_NATIVE,
+ INSTR_VTX,
+ INSTR_EXPORT
+};
+
+enum FCInstr {
+ FC_IF_PREDICATE = 0,
+ FC_ELSE,
+ FC_ENDIF,
+ FC_BGNLOOP,
+ FC_ENDLOOP,
+ FC_BREAK_PREDICATE,
+ FC_CONTINUE
+};
+
+enum TextureTypes {
+ TEXTURE_1D = 1,
+ TEXTURE_2D,
+ TEXTURE_3D,
+ TEXTURE_CUBE,
+ TEXTURE_RECT,
+ TEXTURE_SHADOW1D,
+ TEXTURE_SHADOW2D,
+ TEXTURE_SHADOWRECT,
+ TEXTURE_1D_ARRAY,
+ TEXTURE_2D_ARRAY,
+ TEXTURE_SHADOW1D_ARRAY,
+ TEXTURE_SHADOW2D_ARRAY
+};
+
+MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
+}
+
+void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (isTexOp(MI.getOpcode())) {
+ EmitTexInstr(MI, Fixups, OS);
+ } else if (isFCOp(MI.getOpcode())){
+ EmitFCInstr(MI, OS);
+ } else if (MI.getOpcode() == AMDGPU::RETURN ||
+ MI.getOpcode() == AMDGPU::BUNDLE ||
+ MI.getOpcode() == AMDGPU::KILL) {
+ return;
+ } else {
+ switch(MI.getOpcode()) {
+ case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+ case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
+ uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_NATIVE, OS);
+ Emit(inst, OS);
+ break;
+ }
+ case AMDGPU::CONSTANT_LOAD_eg:
+ case AMDGPU::VTX_READ_PARAM_8_eg:
+ case AMDGPU::VTX_READ_PARAM_16_eg:
+ case AMDGPU::VTX_READ_PARAM_32_eg:
+ case AMDGPU::VTX_READ_GLOBAL_8_eg:
+ case AMDGPU::VTX_READ_GLOBAL_32_eg:
+ case AMDGPU::VTX_READ_GLOBAL_128_eg: {
+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+ uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
+
+ EmitByte(INSTR_VTX, OS);
+ Emit(InstWord01, OS);
+ Emit(InstWord2, OS);
+ break;
+ }
+ case AMDGPU::EG_ExportSwz:
+ case AMDGPU::R600_ExportSwz:
+ case AMDGPU::EG_ExportBuf:
+ case AMDGPU::R600_ExportBuf: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_EXPORT, OS);
+ Emit(Inst, OS);
+ break;
+ }
+
+ default:
+ EmitALUInstr(MI, Fixups, OS);
+ break;
+ }
+ }
+}
+
+void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const {
+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
+ unsigned NumOperands = MI.getNumOperands();
+
+ // Emit instruction type
+ EmitByte(INSTR_ALU, OS);
+
+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+
+ //older alu have different encoding for instructions with one or two src
+ //parameters.
+ if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
+ !(MCDesc.TSFlags & R600_InstFlag::OP3)) {
+ uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
+ InstWord01 &= ~(0x3FFULL << 39);
+ InstWord01 |= ISAOpCode << 1;
+ }
+
+ unsigned SrcIdx = 0;
+ for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) {
+ if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() ||
+ OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) {
+ continue;
+ }
+ EmitSrcISA(MI, OpIdx, InstWord01, OS);
+ SrcIdx++;
+ }
+
+ // Emit zeros for unused sources
+ for ( ; SrcIdx < 3; SrcIdx++) {
+ EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
+ }
+
+ Emit(InstWord01, OS);
+ return;
+}
+
+void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
+ raw_ostream &OS) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ union {
+ float f;
+ uint32_t i;
+ } Value;
+ Value.i = 0;
+ // Emit the source select (2 bytes). For GPRs, this is the register index.
+ // For other potential instruction operands, (e.g. constant registers) the
+ // value of the source select is defined in the r600isa docs.
+ if (MO.isReg()) {
+ unsigned reg = MO.getReg();
+ EmitTwoBytes(getHWReg(reg), OS);
+ if (reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned ImmOpIndex = MI.getNumOperands() - 1;
+ MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+ if (ImmOp.isFPImm()) {
+ Value.f = ImmOp.getFPImm();
+ } else {
+ assert(ImmOp.isImm());
+ Value.i = ImmOp.getImm();
+ }
+ }
+ } else {
+ // XXX: Handle other operand types.
+ EmitTwoBytes(0, OS);
+ }
+
+ // Emit the source channel (1 byte)
+ if (MO.isReg()) {
+ EmitByte(getHWRegChan(MO.getReg()), OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // XXX: Emit isNegated (1 byte)
+ if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
+ && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
+ (MO.isReg() &&
+ (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // Emit isAbsolute (1 byte)
+ if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ // XXX: Emit relative addressing mode (1 byte)
+ EmitByte(0, OS);
+
+ // Emit kc_bank, This will be adjusted later by r600_asm
+ EmitByte(0, OS);
+
+ // Emit the literal value, if applicable (4 bytes).
+ Emit(Value.i, OS);
+
+}
+
+void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
+ uint64_t &Value, raw_ostream &OS) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ union {
+ float f;
+ uint32_t i;
+ } InlineConstant;
+ InlineConstant.i = 0;
+ // Emit the source select (2 bytes). For GPRs, this is the register index.
+ // For other potential instruction operands, (e.g. constant registers) the
+ // value of the source select is defined in the r600isa docs.
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
+ EmitByte(1, OS);
+ } else {
+ EmitByte(0, OS);
+ }
+
+ if (Reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned ImmOpIndex = MI.getNumOperands() - 1;
+ MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+ if (ImmOp.isFPImm()) {
+ InlineConstant.f = ImmOp.getFPImm();
+ } else {
+ assert(ImmOp.isImm());
+ InlineConstant.i = ImmOp.getImm();
+ }
+ }
+ }
+
+ // Emit the literal value, if applicable (4 bytes).
+ Emit(InlineConstant.i, OS);
+}
+
+void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ raw_ostream &OS) const {
+
+ unsigned Opcode = MI.getOpcode();
+ bool hasOffsets = (Opcode == AMDGPU::TEX_LD);
+ unsigned OpOffset = hasOffsets ? 3 : 0;
+ int64_t Resource = MI.getOperand(OpOffset + 2).getImm();
+ int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
+ int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
+ unsigned srcSelect[4] = {0, 1, 2, 3};
+
+ // Emit instruction type
+ EmitByte(1, OS);
+
+ // Emit instruction
+ EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
+
+ // Emit resource id
+ EmitByte(Resource, OS);
+
+ // Emit source register
+ EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
+
+ // XXX: Emit src isRelativeAddress
+ EmitByte(0, OS);
+
+ // Emit destination register
+ EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
+
+ // XXX: Emit dst isRealtiveAddress
+ EmitByte(0, OS);
+
+ // XXX: Emit dst select
+ EmitByte(0, OS); // X
+ EmitByte(1, OS); // Y
+ EmitByte(2, OS); // Z
+ EmitByte(3, OS); // W
+
+ // XXX: Emit lod bias
+ EmitByte(0, OS);
+
+ // XXX: Emit coord types
+ unsigned coordType[4] = {1, 1, 1, 1};
+
+ if (TextureType == TEXTURE_RECT
+ || TextureType == TEXTURE_SHADOWRECT) {
+ coordType[ELEMENT_X] = 0;
+ coordType[ELEMENT_Y] = 0;
+ }
+
+ if (TextureType == TEXTURE_1D_ARRAY
+ || TextureType == TEXTURE_SHADOW1D_ARRAY) {
+ if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
+ coordType[ELEMENT_Y] = 0;
+ } else {
+ coordType[ELEMENT_Z] = 0;
+ srcSelect[ELEMENT_Z] = ELEMENT_Y;
+ }
+ } else if (TextureType == TEXTURE_2D_ARRAY
+ || TextureType == TEXTURE_SHADOW2D_ARRAY) {
+ coordType[ELEMENT_Z] = 0;
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ EmitByte(coordType[i], OS);
+ }
+
+ // XXX: Emit offsets
+ if (hasOffsets)
+ for (unsigned i = 2; i < 5; i++)
+ EmitByte(MI.getOperand(i).getImm()<<1, OS);
+ else
+ EmitNullBytes(3, OS);
+
+ // Emit sampler id
+ EmitByte(Sampler, OS);
+
+ // XXX:Emit source select
+ if ((TextureType == TEXTURE_SHADOW1D
+ || TextureType == TEXTURE_SHADOW2D
+ || TextureType == TEXTURE_SHADOWRECT
+ || TextureType == TEXTURE_SHADOW1D_ARRAY)
+ && Opcode != AMDGPU::TEX_SAMPLE_C_L
+ && Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+ srcSelect[ELEMENT_W] = ELEMENT_Z;
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ EmitByte(srcSelect[i], OS);
+ }
+}
+
+void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
+
+ // Emit instruction type
+ EmitByte(INSTR_FC, OS);
+
+ // Emit SRC
+ unsigned NumOperands = MI.getNumOperands();
+ if (NumOperands > 0) {
+ assert(NumOperands == 1);
+ EmitSrc(MI, 0, OS);
+ } else {
+ EmitNullBytes(SRC_BYTE_COUNT, OS);
+ }
+
+ // Emit FC Instruction
+ enum FCInstr instr;
+ switch (MI.getOpcode()) {
+ case AMDGPU::PREDICATED_BREAK:
+ instr = FC_BREAK_PREDICATE;
+ break;
+ case AMDGPU::CONTINUE:
+ instr = FC_CONTINUE;
+ break;
+ case AMDGPU::IF_PREDICATE_SET:
+ instr = FC_IF_PREDICATE;
+ break;
+ case AMDGPU::ELSE:
+ instr = FC_ELSE;
+ break;
+ case AMDGPU::ENDIF:
+ instr = FC_ENDIF;
+ break;
+ case AMDGPU::ENDLOOP:
+ instr = FC_ENDLOOP;
+ break;
+ case AMDGPU::WHILELOOP:
+ instr = FC_BGNLOOP;
+ break;
+ default:
+ abort();
+ break;
+ }
+ EmitByte(instr, OS);
+}
+
+void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
+ raw_ostream &OS) const {
+
+ for (unsigned int i = 0; i < ByteCount; i++) {
+ EmitByte(0, OS);
+ }
+}
+
+void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
+ OS.write((uint8_t) Byte & 0xff);
+}
+
+void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
+ raw_ostream &OS) const {
+ OS.write((uint8_t) (Bytes & 0xff));
+ OS.write((uint8_t) ((Bytes >> 8) & 0xff));
+}
+
+void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
+ for (unsigned i = 0; i < 4; i++) {
+ OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
+ }
+}
+
+void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
+ for (unsigned i = 0; i < 8; i++) {
+ EmitByte((Value >> (8 * i)) & 0xff, OS);
+ }
+}
+
+unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
+ return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT;
+}
+
+unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
+ return MRI.getEncodingValue(RegNo) & HW_REG_MASK;
+}
+
+uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixup) const {
+ if (MO.isReg()) {
+ if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) {
+ return MRI.getEncodingValue(MO.getReg());
+ } else {
+ return getHWReg(MO.getReg());
+ }
+ } else if (MO.isImm()) {
+ return MO.getImm();
+ } else {
+ assert(0);
+ return 0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Encoding helper functions
+//===----------------------------------------------------------------------===//
+
+bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::PREDICATED_BREAK:
+ case AMDGPU::CONTINUE:
+ case AMDGPU::IF_PREDICATE_SET:
+ case AMDGPU::ELSE:
+ case AMDGPU::ENDIF:
+ case AMDGPU::ENDLOOP:
+ case AMDGPU::WHILELOOP:
+ return true;
+ }
+}
+
+bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ return true;
+ }
+}
+
+bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
+ unsigned Flag) const {
+ const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
+ unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
+ if (FlagIndex == 0) {
+ return false;
+ }
+ assert(MI.getOperand(FlagIndex).isImm());
+ return !!((MI.getOperand(FlagIndex).getImm() >>
+ (NUM_MO_FLAGS * Operand)) & Flag);
+}
+
+#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
new file mode 100644
index 0000000000..b4bdb25289
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -0,0 +1,298 @@
+//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The SI code emitter produces machine code that can be executed
+/// directly on the GPU device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
+#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
+
+// These must be kept in sync with SIInstructions.td and also the
+// InstrEncodingInfo array in SIInstrInfo.cpp.
+//
+// NOTE: This enum is only used to identify the encoding type within LLVM,
+// the actual encoding type that is part of the instruction format is different
+namespace SIInstrEncodingType {
+ enum Encoding {
+ EXP = 0,
+ LDS = 1,
+ MIMG = 2,
+ MTBUF = 3,
+ MUBUF = 4,
+ SMRD = 5,
+ SOP1 = 6,
+ SOP2 = 7,
+ SOPC = 8,
+ SOPK = 9,
+ SOPP = 10,
+ VINTRP = 11,
+ VOP1 = 12,
+ VOP2 = 13,
+ VOP3 = 14,
+ VOPC = 15
+ };
+}
+
+using namespace llvm;
+
+namespace {
+class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
+ SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
+ const MCInstrInfo &MCII;
+ const MCRegisterInfo &MRI;
+ const MCSubtargetInfo &STI;
+ MCContext &Ctx;
+
+public:
+ SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+ const MCSubtargetInfo &sti, MCContext &ctx)
+ : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+
+ ~SIMCCodeEmitter() { }
+
+ /// \breif Encode the instruction and write it to the OS.
+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// \returns the encoding for an MCOperand.
+ virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+public:
+
+ /// \brief Encode a sequence of registers with the correct alignment.
+ unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
+
+ /// \brief Encoding for when 2 consecutive registers are used
+ virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const;
+
+ /// \brief Encoding for when 4 consectuive registers are used
+ virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const;
+
+ /// \brief Encoding for SMRD indexed loads
+ virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const;
+
+ /// \brief Post-Encoder method for VOP instructions
+ virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const;
+
+private:
+
+ /// \returns this SIInstrEncodingType for this instruction.
+ unsigned getEncodingType(const MCInst &MI) const;
+
+ /// \brief Get then size in bytes of this instructions encoding.
+ unsigned getEncodingBytes(const MCInst &MI) const;
+
+ /// \returns the hardware encoding for a register
+ unsigned getRegBinaryCode(unsigned reg) const;
+
+ /// \brief Generated function that returns the hardware encoding for
+ /// a register
+ unsigned getHWRegNum(unsigned reg) const;
+
+};
+
+} // End anonymous namespace
+
+MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new SIMCCodeEmitter(MCII, MRI, STI, Ctx);
+}
+
+void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
+ unsigned bytes = getEncodingBytes(MI);
+ for (unsigned i = 0; i < bytes; i++) {
+ OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
+ }
+}
+
+uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ return getRegBinaryCode(MO.getReg());
+ } else if (MO.isImm()) {
+ return MO.getImm();
+ } else if (MO.isFPImm()) {
+ // XXX: Not all instructions can use inline literals
+ // XXX: We should make sure this is a 32-bit constant
+ union {
+ float F;
+ uint32_t I;
+ } Imm;
+ Imm.F = MO.getFPImm();
+ return Imm.I;
+ } else if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FK_PCRel_4);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+ return 0;
+ } else{
+ llvm_unreachable("Encoding of this operand type is not supported yet.");
+ }
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom Operand Encodings
+//===----------------------------------------------------------------------===//
+
+unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
+ unsigned shift) const {
+ unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg());
+ return regCode >> shift;
+ return 0;
+}
+unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
+ unsigned OpNo ,
+ SmallVectorImpl<MCFixup> &Fixup) const {
+ return GPRAlign(MI, OpNo, 1);
+}
+
+unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
+ unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const {
+ return GPRAlign(MI, OpNo, 2);
+}
+
+#define SMRD_OFFSET_MASK 0xff
+#define SMRD_IMM_SHIFT 8
+#define SMRD_SBASE_MASK 0x3f
+#define SMRD_SBASE_SHIFT 9
+/// This function is responsibe for encoding the offset
+/// and the base ptr for SMRD instructions it should return a bit string in
+/// this format:
+///
+/// OFFSET = bits{7-0}
+/// IMM = bits{8}
+/// SBASE = bits{14-9}
+///
+uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixup) const {
+ uint32_t Encoding;
+
+ const MCOperand &OffsetOp = MI.getOperand(OpNo + 1);
+
+ //XXX: Use this function for SMRD loads with register offsets
+ assert(OffsetOp.isImm());
+
+ Encoding =
+ (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK)
+ | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
+ | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
+ ;
+
+ return Encoding;
+}
+
+//===----------------------------------------------------------------------===//
+// Post Encoder Callbacks
+//===----------------------------------------------------------------------===//
+
+uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{
+ unsigned encodingType = getEncodingType(MI);
+ unsigned numSrcOps;
+ unsigned vgprBitOffset;
+
+ if (encodingType == SIInstrEncodingType::VOP3) {
+ numSrcOps = 3;
+ vgprBitOffset = 32;
+ } else {
+ numSrcOps = 1;
+ vgprBitOffset = 0;
+ }
+
+ // Add one to skip over the destination reg operand.
+ for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
+ const MCOperand &MO = MI.getOperand(opIdx);
+ if (MO.isReg()) {
+ unsigned reg = MI.getOperand(opIdx).getReg();
+ if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) ||
+ AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) {
+ Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
+ }
+ } else if (MO.isFPImm()) {
+ union {
+ float f;
+ uint32_t i;
+ } Imm;
+ // XXX: Not all instructions can use inline literals
+ // XXX: We should make sure this is a 32-bit constant
+ Imm.f = MO.getFPImm();
+ Value |= ((uint64_t)Imm.i) << 32;
+ }
+ }
+ return Value;
+}
+
+//===----------------------------------------------------------------------===//
+// Encoding helper functions
+//===----------------------------------------------------------------------===//
+
+unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
+ return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
+}
+
+unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
+
+ // These instructions aren't real instructions with an encoding type, so
+ // we need to manually specify their size.
+ switch (MI.getOpcode()) {
+ default: break;
+ case AMDGPU::SI_LOAD_LITERAL_I32:
+ case AMDGPU::SI_LOAD_LITERAL_F32:
+ return 4;
+ }
+
+ unsigned encoding_type = getEncodingType(MI);
+ switch (encoding_type) {
+ case SIInstrEncodingType::EXP:
+ case SIInstrEncodingType::LDS:
+ case SIInstrEncodingType::MUBUF:
+ case SIInstrEncodingType::MTBUF:
+ case SIInstrEncodingType::MIMG:
+ case SIInstrEncodingType::VOP3:
+ return 8;
+ default:
+ return 4;
+ }
+}
+
+
+unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
+ switch (reg) {
+ case AMDGPU::M0: return 124;
+ case AMDGPU::SREG_LIT_0: return 128;
+ case AMDGPU::SI_LITERAL_CONSTANT: return 255;
+ default: return MRI.getEncodingValue(reg);
+ }
+}
+
diff --git a/lib/Target/R600/Makefile b/lib/Target/R600/Makefile
new file mode 100644
index 0000000000..1b3ebbe8c8
--- /dev/null
+++ b/lib/Target/R600/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/R600/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMR600CodeGen
+TARGET = AMDGPU
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
+ AMDGPUGenDAGISel.inc AMDGPUGenSubtargetInfo.inc \
+ AMDGPUGenMCCodeEmitter.inc AMDGPUGenCallingConv.inc \
+ AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
+ AMDGPUGenAsmWriter.inc
+
+DIRS = InstPrinter TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
new file mode 100644
index 0000000000..3dc1ecda77
--- /dev/null
+++ b/lib/Target/R600/Processors.td
@@ -0,0 +1,29 @@
+//===-- Processors.td - TODO: Add brief description -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AMDIL processors supported.
+//
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
+: Processor<Name, itin, Features>;
+def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
+def : Proc<"rv710", R600_EG_Itin, []>;
+def : Proc<"rv730", R600_EG_Itin, []>;
+def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
+def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
+
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
new file mode 100644
index 0000000000..7dea8e44ea
--- /dev/null
+++ b/lib/Target/R600/R600Defines.h
@@ -0,0 +1,79 @@
+//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef R600DEFINES_H_
+#define R600DEFINES_H_
+
+#include "llvm/MC/MCRegisterInfo.h"
+
+// Operand Flags
+#define MO_FLAG_CLAMP (1 << 0)
+#define MO_FLAG_NEG (1 << 1)
+#define MO_FLAG_ABS (1 << 2)
+#define MO_FLAG_MASK (1 << 3)
+#define MO_FLAG_PUSH (1 << 4)
+#define MO_FLAG_NOT_LAST (1 << 5)
+#define MO_FLAG_LAST (1 << 6)
+#define NUM_MO_FLAGS 7
+
+/// \brief Helper for getting the operand index for the instruction flags
+/// operand.
+#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
+
+namespace R600_InstFlag {
+ enum TIF {
+ TRANS_ONLY = (1 << 0),
+ TEX = (1 << 1),
+ REDUCTION = (1 << 2),
+ FC = (1 << 3),
+ TRIG = (1 << 4),
+ OP3 = (1 << 5),
+ VECTOR = (1 << 6),
+ //FlagOperand bits 7, 8
+ NATIVE_OPERANDS = (1 << 9),
+ OP1 = (1 << 10),
+ OP2 = (1 << 11)
+ };
+}
+
+#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS)
+
+/// \brief Defines for extracting register infomation from register encoding
+#define HW_REG_MASK 0x1ff
+#define HW_CHAN_SHIFT 9
+
+namespace R600Operands {
+ enum Ops {
+ DST,
+ UPDATE_EXEC_MASK,
+ UPDATE_PREDICATE,
+ WRITE,
+ OMOD,
+ DST_REL,
+ CLAMP,
+ SRC0,
+ SRC0_NEG,
+ SRC0_REL,
+ SRC0_ABS,
+ SRC1,
+ SRC1_NEG,
+ SRC1_REL,
+ SRC1_ABS,
+ SRC2,
+ SRC2_NEG,
+ SRC2_REL,
+ LAST,
+ PRED_SEL,
+ IMM,
+ COUNT
+ };
+}
+
+#endif // R600DEFINES_H_
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
new file mode 100644
index 0000000000..b903d4aedd
--- /dev/null
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -0,0 +1,334 @@
+//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Vector, Reduction, and Cube instructions need to fill the entire instruction
+/// group to work correctly. This pass expands these individual instructions
+/// into several instructions that will completely fill the instruction group.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+
+ bool ExpandInputPerspective(MachineInstr& MI);
+ bool ExpandInputConstant(MachineInstr& MI);
+
+public:
+ R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "R600 Expand special instructions pass";
+ }
+};
+
+} // End anonymous namespace
+
+char R600ExpandSpecialInstrsPass::ID = 0;
+
+FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
+ return new R600ExpandSpecialInstrsPass(TM);
+}
+
+bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) {
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ if (MI.getOpcode() != AMDGPU::input_perspective)
+ return false;
+
+ MachineBasicBlock::iterator I = &MI;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
+ ->getInfo<R600MachineFunctionInfo>();
+ unsigned IJIndexBase;
+
+ // In Evergreen ISA doc section 8.3.2 :
+ // We need to interpolate XY and ZW in two different instruction groups.
+ // An INTERP_* must occupy all 4 slots of an instruction group.
+ // Output of INTERP_XY is written in X,Y slots
+ // Output of INTERP_ZW is written in Z,W slots
+ //
+ // Thus interpolation requires the following sequences :
+ //
+ // AnyGPR.x = INTERP_ZW; (Write Masked Out)
+ // AnyGPR.y = INTERP_ZW; (Write Masked Out)
+ // DstGPR.z = INTERP_ZW;
+ // DstGPR.w = INTERP_ZW; (End of first IG)
+ // DstGPR.x = INTERP_XY;
+ // DstGPR.y = INTERP_XY;
+ // AnyGPR.z = INTERP_XY; (Write Masked Out)
+ // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
+ //
+ switch (MI.getOperand(1).getImm()) {
+ case 0:
+ IJIndexBase = MFI->GetIJPerspectiveIndex();
+ break;
+ case 1:
+ IJIndexBase = MFI->GetIJLinearIndex();
+ break;
+ default:
+ assert(0 && "Unknow ij index");
+ }
+
+ for (unsigned i = 0; i < 8; i++) {
+ unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
+ 2 * IJIndexBase + ((i + 1) % 2));
+ unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+ MI.getOperand(2).getImm());
+
+
+ unsigned Sel = AMDGPU::sel_x;
+ switch (i % 4) {
+ case 0:Sel = AMDGPU::sel_x;break;
+ case 1:Sel = AMDGPU::sel_y;break;
+ case 2:Sel = AMDGPU::sel_z;break;
+ case 3:Sel = AMDGPU::sel_w;break;
+ default:break;
+ }
+
+ unsigned Res = TRI.getSubReg(DstReg, Sel);
+
+ unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY;
+
+ MachineBasicBlock &MBB = *(MI.getParent());
+ MachineInstr *NewMI =
+ TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg);
+
+ if (!(i> 1 && i < 6)) {
+ TII->addFlag(NewMI, 0, MO_FLAG_MASK);
+ }
+
+ if (i % 4 != 3)
+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+
+ return true;
+}
+
+bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) {
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ if (MI.getOpcode() != AMDGPU::input_constant)
+ return false;
+
+ MachineBasicBlock::iterator I = &MI;
+ unsigned DstReg = MI.getOperand(0).getReg();
+
+ for (unsigned i = 0; i < 4; i++) {
+ unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+ MI.getOperand(1).getImm());
+
+ unsigned Sel = AMDGPU::sel_x;
+ switch (i % 4) {
+ case 0:Sel = AMDGPU::sel_x;break;
+ case 1:Sel = AMDGPU::sel_y;break;
+ case 2:Sel = AMDGPU::sel_z;break;
+ case 3:Sel = AMDGPU::sel_w;break;
+ default:break;
+ }
+
+ unsigned Res = TRI.getSubReg(DstReg, Sel);
+
+ MachineBasicBlock &MBB = *(MI.getParent());
+ MachineInstr *NewMI = TII->buildDefaultInstruction(
+ MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg);
+
+ if (i % 4 != 3)
+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+
+ return true;
+}
+
+bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
+
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ MachineBasicBlock::iterator I = MBB.begin();
+ while (I != MBB.end()) {
+ MachineInstr &MI = *I;
+ I = llvm::next(I);
+
+ switch (MI.getOpcode()) {
+ default: break;
+ // Expand PRED_X to one of the PRED_SET instructions.
+ case AMDGPU::PRED_X: {
+ uint64_t Flags = MI.getOperand(3).getImm();
+ // The native opcode used by PRED_X is stored as an immediate in the
+ // third operand.
+ MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
+ MI.getOperand(2).getImm(), // opcode
+ MI.getOperand(0).getReg(), // dst
+ MI.getOperand(1).getReg(), // src0
+ AMDGPU::ZERO); // src1
+ TII->addFlag(PredSet, 0, MO_FLAG_MASK);
+ if (Flags & MO_FLAG_PUSH) {
+ TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
+ } else {
+ TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1);
+ }
+ MI.eraseFromParent();
+ continue;
+ }
+ case AMDGPU::BREAK:
+ MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
+ AMDGPU::PRED_SETE_INT,
+ AMDGPU::PREDICATE_BIT,
+ AMDGPU::ZERO,
+ AMDGPU::ZERO);
+ TII->addFlag(PredSet, 0, MO_FLAG_MASK);
+ TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(AMDGPU::PREDICATED_BREAK))
+ .addReg(AMDGPU::PREDICATE_BIT);
+ MI.eraseFromParent();
+ continue;
+ }
+
+ if (ExpandInputPerspective(MI))
+ continue;
+ if (ExpandInputConstant(MI))
+ continue;
+
+ bool IsReduction = TII->isReductionOp(MI.getOpcode());
+ bool IsVector = TII->isVector(MI);
+ bool IsCube = TII->isCubeOp(MI.getOpcode());
+ if (!IsReduction && !IsVector && !IsCube) {
+ continue;
+ }
+
+ // Expand the instruction
+ //
+ // Reduction instructions:
+ // T0_X = DP4 T1_XYZW, T2_XYZW
+ // becomes:
+ // TO_X = DP4 T1_X, T2_X
+ // TO_Y (write masked) = DP4 T1_Y, T2_Y
+ // TO_Z (write masked) = DP4 T1_Z, T2_Z
+ // TO_W (write masked) = DP4 T1_W, T2_W
+ //
+ // Vector instructions:
+ // T0_X = MULLO_INT T1_X, T2_X
+ // becomes:
+ // T0_X = MULLO_INT T1_X, T2_X
+ // T0_Y (write masked) = MULLO_INT T1_X, T2_X
+ // T0_Z (write masked) = MULLO_INT T1_X, T2_X
+ // T0_W (write masked) = MULLO_INT T1_X, T2_X
+ //
+ // Cube instructions:
+ // T0_XYZW = CUBE T1_XYZW
+ // becomes:
+ // TO_X = CUBE T1_Z, T1_Y
+ // T0_Y = CUBE T1_Z, T1_X
+ // T0_Z = CUBE T1_X, T1_Z
+ // T0_W = CUBE T1_Y, T1_Z
+ for (unsigned Chan = 0; Chan < 4; Chan++) {
+ unsigned DstReg = MI.getOperand(
+ TII->getOperandIdx(MI, R600Operands::DST)).getReg();
+ unsigned Src0 = MI.getOperand(
+ TII->getOperandIdx(MI, R600Operands::SRC0)).getReg();
+ unsigned Src1 = 0;
+
+ // Determine the correct source registers
+ if (!IsCube) {
+ int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1);
+ if (Src1Idx != -1) {
+ Src1 = MI.getOperand(Src1Idx).getReg();
+ }
+ }
+ if (IsReduction) {
+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+ Src0 = TRI.getSubReg(Src0, SubRegIndex);
+ Src1 = TRI.getSubReg(Src1, SubRegIndex);
+ } else if (IsCube) {
+ static const int CubeSrcSwz[] = {2, 2, 0, 1};
+ unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
+ unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
+ Src1 = TRI.getSubReg(Src0, SubRegIndex1);
+ Src0 = TRI.getSubReg(Src0, SubRegIndex0);
+ }
+
+ // Determine the correct destination registers;
+ bool Mask = false;
+ bool NotLast = true;
+ if (IsCube) {
+ unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+ DstReg = TRI.getSubReg(DstReg, SubRegIndex);
+ } else {
+ // Mask the write if the original instruction does not write to
+ // the current Channel.
+ Mask = (Chan != TRI.getHWRegChan(DstReg));
+ unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
+ DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+ }
+
+ // Set the IsLast bit
+ NotLast = (Chan != 3 );
+
+ // Add the new instruction
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AMDGPU::CUBE_r600_pseudo:
+ Opcode = AMDGPU::CUBE_r600_real;
+ break;
+ case AMDGPU::CUBE_eg_pseudo:
+ Opcode = AMDGPU::CUBE_eg_real;
+ break;
+ case AMDGPU::DOT4_r600_pseudo:
+ Opcode = AMDGPU::DOT4_r600_real;
+ break;
+ case AMDGPU::DOT4_eg_pseudo:
+ Opcode = AMDGPU::DOT4_eg_real;
+ break;
+ default:
+ break;
+ }
+
+ MachineInstr *NewMI =
+ TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
+
+ if (Chan != 0)
+ NewMI->bundleWithPred();
+ if (Mask) {
+ TII->addFlag(NewMI, 0, MO_FLAG_MASK);
+ }
+ if (NotLast) {
+ TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
+ }
+ }
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
new file mode 100644
index 0000000000..f0eece39ea
--- /dev/null
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -0,0 +1,909 @@
+//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Custom DAG lowering for R600
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600ISelLowering.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
+ AMDGPUTargetLowering(TM),
+ TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
+ addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
+ addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
+ computeRegisterProperties();
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Expand);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
+
+ setOperationAction(ISD::ADD, MVT::v4i32, Expand);
+ setOperationAction(ISD::AND, MVT::v4i32, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
+ setOperationAction(ISD::UREM, MVT::v4i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
+
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+
+ setOperationAction(ISD::FSUB, MVT::f32, Expand);
+
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
+ setOperationAction(ISD::FPOW, MVT::f32, Custom);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+
+ setTargetDAGCombine(ISD::FP_ROUND);
+
+ setSchedulingPreference(Sched::VLIW);
+}
+
+MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr * MI, MachineBasicBlock * BB) const {
+ MachineFunction * MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineBasicBlock::iterator I = *MI;
+
+ switch (MI->getOpcode()) {
+ default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case AMDGPU::SHADER_TYPE: break;
+ case AMDGPU::CLAMP_R600: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+ AMDGPU::MOV,
+ MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg());
+ TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
+ break;
+ }
+
+ case AMDGPU::FABS_R600: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+ AMDGPU::MOV,
+ MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg());
+ TII->addFlag(NewMI, 0, MO_FLAG_ABS);
+ break;
+ }
+
+ case AMDGPU::FNEG_R600: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+ AMDGPU::MOV,
+ MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg());
+ TII->addFlag(NewMI, 0, MO_FLAG_NEG);
+ break;
+ }
+
+ case AMDGPU::R600_LOAD_CONST: {
+ int64_t RegIndex = MI->getOperand(1).getImm();
+ unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
+ .addOperand(MI->getOperand(0))
+ .addReg(ConstantReg);
+ break;
+ }
+
+ case AMDGPU::MASK_WRITE: {
+ unsigned maskedRegister = MI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
+ MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
+ TII->addFlag(defInstr, 0, MO_FLAG_MASK);
+ break;
+ }
+
+ case AMDGPU::MOV_IMM_F32:
+ TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
+ MI->getOperand(1).getFPImm()->getValueAPF()
+ .bitcastToAPInt().getZExtValue());
+ break;
+ case AMDGPU::MOV_IMM_I32:
+ TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+ break;
+
+
+ case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+ case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
+ unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addImm(EOP); // Set End of program bit
+ break;
+ }
+
+ case AMDGPU::RESERVE_REG: {
+ R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
+ int64_t ReservedIndex = MI->getOperand(0).getImm();
+ unsigned ReservedReg =
+ AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
+ MFI->ReservedRegs.push_back(ReservedReg);
+ unsigned SuperReg =
+ AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4);
+ MFI->ReservedRegs.push_back(SuperReg);
+ break;
+ }
+
+ case AMDGPU::TXD: {
+ unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+ unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6))
+ .addReg(T0, RegState::Implicit)
+ .addReg(T1, RegState::Implicit);
+ break;
+ }
+
+ case AMDGPU::TXD_SHADOW: {
+ unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+ unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6));
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6))
+ .addReg(T0, RegState::Implicit)
+ .addReg(T1, RegState::Implicit);
+ break;
+ }
+
+ case AMDGPU::BRANCH:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(0);
+ break;
+
+ case AMDGPU::BRANCH_COND_f32: {
+ MachineInstr *NewMI =
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
+ AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_NOT_ZERO)
+ .addImm(0); // Flags
+ TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
+ }
+
+ case AMDGPU::BRANCH_COND_i32: {
+ MachineInstr *NewMI =
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
+ AMDGPU::PREDICATE_BIT)
+ .addOperand(MI->getOperand(1))
+ .addImm(OPCODE_IS_NOT_ZERO_INT)
+ .addImm(0); // Flags
+ TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ .addOperand(MI->getOperand(0))
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ break;
+ }
+
+ case AMDGPU::input_perspective: {
+ R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+
+ // XXX Be more fine about register reservation
+ for (unsigned i = 0; i < 4; i ++) {
+ unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
+ MFI->ReservedRegs.push_back(ReservedReg);
+ }
+
+ switch (MI->getOperand(1).getImm()) {
+ case 0:// Perspective
+ MFI->HasPerspectiveInterpolation = true;
+ break;
+ case 1:// Linear
+ MFI->HasLinearInterpolation = true;
+ break;
+ default:
+ assert(0 && "Unknow ij index");
+ }
+
+ return BB;
+ }
+
+ case AMDGPU::EG_ExportSwz:
+ case AMDGPU::R600_ExportSwz: {
+ bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
+ if (!EOP)
+ return BB;
+ unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(4))
+ .addOperand(MI->getOperand(5))
+ .addOperand(MI->getOperand(6))
+ .addImm(CfInst)
+ .addImm(1);
+ break;
+ }
+ }
+
+ MI->eraseFromParent();
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+using namespace llvm::Intrinsic;
+using namespace llvm::AMDGPUIntrinsic;
+
+static SDValue
+InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
+ unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
+ SDValue Scalar, SDValue Chain) {
+ if (!ExportMap[Slot]) {
+ SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
+ DL, MVT::v4f32,
+ DAG.getUNDEF(MVT::v4f32),
+ Scalar,
+ DAG.getConstant(Channel, MVT::i32));
+
+ unsigned Mask = 1 << Channel;
+
+ const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
+ DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32)};
+
+ SDValue Res = DAG.getNode(
+ AMDGPUISD::EXPORT,
+ DL,
+ MVT::Other,
+ Ops, 6);
+ ExportMap[Slot] = Res.getNode();
+ return Res;
+ }
+
+ SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ;
+ SDValue PreviousVector = ExportInstruction->getOperand(1);
+ SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
+ DL, MVT::v4f32,
+ PreviousVector,
+ Scalar,
+ DAG.getConstant(Channel, MVT::i32));
+
+ unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
+ ->getZExtValue();
+ Mask |= (1 << Channel);
+
+ const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
+ DAG.getConstant(Inst, MVT::i32),
+ DAG.getConstant(Type, MVT::i32),
+ DAG.getConstant(Slot, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32)};
+
+ DAG.UpdateNodeOperands(ExportInstruction,
+ Ops, 6);
+
+ return Chain;
+
+}
+
+SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::ROTL: return LowerROTL(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::FPOW: return LowerFPOW(Op, DAG);
+ case ISD::INTRINSIC_VOID: {
+ SDValue Chain = Op.getOperand(0);
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::AMDGPU_store_output: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+ if (!MRI.isLiveOut(Reg)) {
+ MRI.addLiveOut(Reg);
+ }
+ return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
+ }
+ case AMDGPUIntrinsic::R600_store_pixel_color: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+
+ SDNode **OutputsMap = MFI->Outputs;
+ return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
+ RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
+ Chain);
+
+ }
+ case AMDGPUIntrinsic::R600_store_stream_output : {
+ MachineFunction &MF = DAG.getMachineFunction();
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ int64_t BufIndex = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ SDNode **OutputsMap = MFI->StreamOutputs[BufIndex];
+ unsigned Inst;
+ switch (cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue() ) {
+ // STREAM3
+ case 3:
+ Inst = 4;
+ break;
+ // STREAM2
+ case 2:
+ Inst = 3;
+ break;
+ // STREAM1
+ case 1:
+ Inst = 2;
+ break;
+ // STREAM0
+ case 0:
+ Inst = 1;
+ break;
+ default:
+ llvm_unreachable("Wrong buffer id for stream outputs !");
+ }
+
+ return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
+ RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2),
+ Chain);
+ }
+ // default for switch(IntrinsicID)
+ default: break;
+ }
+ // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ switch(IntrinsicID) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case AMDGPUIntrinsic::R600_load_input: {
+ int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
+ }
+ case AMDGPUIntrinsic::R600_load_input_perspective: {
+ int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (slot < 0)
+ return DAG.getUNDEF(MVT::f32);
+ SDValue FullVector = DAG.getNode(
+ AMDGPUISD::INTERP,
+ DL, MVT::v4f32,
+ DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
+ }
+ case AMDGPUIntrinsic::R600_load_input_linear: {
+ int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (slot < 0)
+ return DAG.getUNDEF(MVT::f32);
+ SDValue FullVector = DAG.getNode(
+ AMDGPUISD::INTERP,
+ DL, MVT::v4f32,
+ DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
+ }
+ case AMDGPUIntrinsic::R600_load_input_constant: {
+ int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (slot < 0)
+ return DAG.getUNDEF(MVT::f32);
+ SDValue FullVector = DAG.getNode(
+ AMDGPUISD::INTERP_P0,
+ DL, MVT::v4f32,
+ DAG.getConstant(slot / 4 , MVT::i32));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
+ }
+
+ case r600_read_ngroups_x:
+ return LowerImplicitParameter(DAG, VT, DL, 0);
+ case r600_read_ngroups_y:
+ return LowerImplicitParameter(DAG, VT, DL, 1);
+ case r600_read_ngroups_z:
+ return LowerImplicitParameter(DAG, VT, DL, 2);
+ case r600_read_global_size_x:
+ return LowerImplicitParameter(DAG, VT, DL, 3);
+ case r600_read_global_size_y:
+ return LowerImplicitParameter(DAG, VT, DL, 4);
+ case r600_read_global_size_z:
+ return LowerImplicitParameter(DAG, VT, DL, 5);
+ case r600_read_local_size_x:
+ return LowerImplicitParameter(DAG, VT, DL, 6);
+ case r600_read_local_size_y:
+ return LowerImplicitParameter(DAG, VT, DL, 7);
+ case r600_read_local_size_z:
+ return LowerImplicitParameter(DAG, VT, DL, 8);
+
+ case r600_read_tgid_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_X, VT);
+ case r600_read_tgid_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_Y, VT);
+ case r600_read_tgid_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T1_Z, VT);
+ case r600_read_tidig_x:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_X, VT);
+ case r600_read_tidig_y:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_Y, VT);
+ case r600_read_tidig_z:
+ return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::T0_Z, VT);
+ }
+ // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
+ break;
+ }
+ } // end switch(Op.getOpcode())
+ return SDValue();
+}
+
+void R600TargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ default: return;
+ case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+ }
+}
+
+SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
+ return DAG.getNode(
+ ISD::SETCC,
+ Op.getDebugLoc(),
+ MVT::i1,
+ Op, DAG.getConstantFP(0.0f, MVT::f32),
+ DAG.getCondCode(ISD::SETNE)
+ );
+}
+
+SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue CC = Op.getOperand(1);
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue JumpT = Op.getOperand(4);
+ SDValue CmpValue;
+ SDValue Result;
+
+ if (LHS.getValueType() == MVT::i32) {
+ CmpValue = DAG.getNode(
+ ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ MVT::i32,
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
+ } else if (LHS.getValueType() == MVT::f32) {
+ CmpValue = DAG.getNode(
+ ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ MVT::f32,
+ LHS, RHS,
+ DAG.getConstantFP(1.0f, MVT::f32),
+ DAG.getConstantFP(0.0f, MVT::f32),
+ CC);
+ } else {
+ assert(0 && "Not valid type for br_cc");
+ }
+ Result = DAG.getNode(
+ AMDGPUISD::BRANCH_COND,
+ CmpValue.getDebugLoc(),
+ MVT::Other, Chain,
+ JumpT, CmpValue);
+ return Result;
+}
+
+SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+ DebugLoc DL,
+ unsigned DwordOffset) const {
+ unsigned ByteOffset = DwordOffset * 4;
+ PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
+ AMDGPUAS::PARAM_I_ADDRESS);
+
+ // We shouldn't be using an offset wider than 16-bits for implicit parameters.
+ assert(isInt<16>(ByteOffset));
+
+ return DAG.getLoad(VT, DL, DAG.getEntryNode(),
+ DAG.getConstant(ByteOffset, MVT::i32), // PTR
+ MachinePointerInfo(ConstantPointerNull::get(PtrType)),
+ false, false, false, 0);
+}
+
+SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
+ Op.getOperand(0),
+ Op.getOperand(0),
+ DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(32, MVT::i32),
+ Op.getOperand(1)));
+}
+
+bool R600TargetLowering::isZero(SDValue Op) const {
+ if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
+ return Cst->isNullValue();
+ } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
+ return CstFP->isZero();
+ } else {
+ return false;
+ }
+}
+
+SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ SDValue Temp;
+
+ // LHS and RHS are guaranteed to be the same value type
+ EVT CompareVT = LHS.getValueType();
+
+ // Check if we can lower this to a native operation.
+
+ // Try to lower to a CND* instruction:
+ // CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
+ // can be lowered to CND* instructions can also be lowered to SET*
+ // instructions. CND* instructions are cheaper, because they dont't
+ // require additional instructions to convert their result to the correct
+ // value type, so this check should be first.
+ if (isZero(LHS) || isZero(RHS)) {
+ SDValue Cond = (isZero(LHS) ? RHS : LHS);
+ SDValue Zero = (isZero(LHS) ? LHS : RHS);
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ if (CompareVT != VT) {
+ // Bitcast True / False to the correct types. This will end up being
+ // a nop, but it allows us to define only a single pattern in the
+ // .TD files for each CND* instruction rather than having to have
+ // one pattern for integer True/False and one for fp True/False
+ True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
+ False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
+ }
+ if (isZero(LHS)) {
+ CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
+ }
+
+ switch (CCOpcode) {
+ case ISD::SETONE:
+ case ISD::SETUNE:
+ case ISD::SETNE:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETOLT:
+ case ISD::SETLE:
+ case ISD::SETLT:
+ CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+ Temp = True;
+ True = False;
+ False = Temp;
+ break;
+ default:
+ break;
+ }
+ SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+ Cond, Zero,
+ True, False,
+ DAG.getCondCode(CCOpcode));
+ return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
+ }
+
+ // Try to lower to a SET* instruction:
+ // We need all the operands of SELECT_CC to have the same value type, so if
+ // necessary we need to change True and False to be the same type as LHS and
+ // RHS, and then convert the result of the select_cc back to the correct type.
+
+ // Move hardware True/False values to the correct operand.
+ if (isHWTrueValue(False) && isHWFalseValue(True)) {
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ std::swap(False, True);
+ CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
+ }
+
+ if (isHWTrueValue(True) && isHWFalseValue(False)) {
+ if (CompareVT != VT) {
+ if (VT == MVT::f32 && CompareVT == MVT::i32) {
+ SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
+ // Convert integer values of true (-1) and false (0) to fp values of
+ // true (1.0f) and false (0.0f).
+ SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
+ } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
+ SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+ LHS, RHS,
+ DAG.getConstantFP(1.0f, MVT::f32),
+ DAG.getConstantFP(0.0f, MVT::f32),
+ CC);
+ // Convert fp values of true (1.0f) and false (0.0f) to integer values
+ // of true (-1) and false (0).
+ SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
+ return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
+ } else {
+ // I don't think there will be any other type pairings.
+ assert(!"Unhandled operand type parings in SELECT_CC");
+ }
+ } else {
+ // This SELECT_CC is already legal.
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+ }
+ }
+
+ // Possible Min/Max pattern
+ SDValue MinMax = LowerMinMax(Op, DAG);
+ if (MinMax.getNode()) {
+ return MinMax;
+ }
+
+ // If we make it this for it means we have no native instructions to handle
+ // this SELECT_CC, so we must lower it.
+ SDValue HWTrue, HWFalse;
+
+ if (CompareVT == MVT::f32) {
+ HWTrue = DAG.getConstantFP(1.0f, CompareVT);
+ HWFalse = DAG.getConstantFP(0.0f, CompareVT);
+ } else if (CompareVT == MVT::i32) {
+ HWTrue = DAG.getConstant(-1, CompareVT);
+ HWFalse = DAG.getConstant(0, CompareVT);
+ }
+ else {
+ assert(!"Unhandled value type in LowerSELECT_CC");
+ }
+
+ // Lower this unsupported SELECT_CC into a combination of two supported
+ // SELECT_CC operations.
+ SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
+
+ return DAG.getNode(ISD::SELECT_CC, DL, VT,
+ Cond, HWFalse,
+ True, False,
+ DAG.getCondCode(ISD::SETNE));
+}
+
+SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ return DAG.getNode(ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32),
+ Op.getOperand(1),
+ Op.getOperand(2),
+ DAG.getCondCode(ISD::SETNE));
+}
+
+SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ assert(Op.getValueType() == MVT::i32);
+ if (LHS.getValueType() == MVT::i32) {
+ Cond = DAG.getNode(
+ ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ MVT::i32,
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
+ } else if (LHS.getValueType() == MVT::f32) {
+ Cond = DAG.getNode(
+ ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ MVT::f32,
+ LHS, RHS,
+ DAG.getConstantFP(1.0f, MVT::f32),
+ DAG.getConstantFP(0.0f, MVT::f32),
+ CC);
+ Cond = DAG.getNode(
+ ISD::FP_TO_SINT,
+ DL,
+ MVT::i32,
+ Cond);
+ } else {
+ assert(0 && "Not valid type for set_cc");
+ }
+ Cond = DAG.getNode(
+ ISD::AND,
+ DL,
+ MVT::i32,
+ DAG.getConstant(1, MVT::i32),
+ Cond);
+ return Cond;
+}
+
+SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Value = Op.getOperand(1);
+ SDValue Ptr = Op.getOperand(2);
+
+ if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
+ // Convert pointer from byte address to dword address.
+ Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
+ DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
+ Ptr, DAG.getConstant(2, MVT::i32)));
+
+ if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
+ assert(!"Truncated and indexed stores not supported yet");
+ } else {
+ Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
+ }
+ return Chain;
+ }
+ return SDValue();
+}
+
+
+SDValue R600TargetLowering::LowerFPOW(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
+ SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
+ return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
+}
+
+/// XXX Only kernel functions are supported, so we can assume for now that
+/// every function is a kernel function, but in the future we should use
+/// separate calling conventions for kernel and non-kernel functions.
+SDValue R600TargetLowering::LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ unsigned ParamOffsetBytes = 36;
+ Function::const_arg_iterator FuncArg =
+ DAG.getMachineFunction().getFunction()->arg_begin();
+ for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
+ EVT VT = Ins[i].VT;
+ Type *ArgType = FuncArg->getType();
+ unsigned ArgSizeInBits = ArgType->isPointerTy() ?
+ 32 : ArgType->getPrimitiveSizeInBits();
+ unsigned ArgBytes = ArgSizeInBits >> 3;
+ EVT ArgVT;
+ if (ArgSizeInBits < VT.getSizeInBits()) {
+ assert(!ArgType->isFloatTy() &&
+ "Extending floating point arguments not supported yet");
+ ArgVT = MVT::getIntegerVT(ArgSizeInBits);
+ } else {
+ ArgVT = VT;
+ }
+ PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
+ AMDGPUAS::PARAM_I_ADDRESS);
+ SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
+ DAG.getConstant(ParamOffsetBytes, MVT::i32),
+ MachinePointerInfo(new Argument(PtrTy)),
+ ArgVT, false, false, ArgBytes);
+ InVals.push_back(Arg);
+ ParamOffsetBytes += ArgBytes;
+ }
+ return Chain;
+}
+
+EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
+ if (!VT.isVector()) return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ switch (N->getOpcode()) {
+ // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
+ case ISD::FP_ROUND: {
+ SDValue Arg = N->getOperand(0);
+ if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
+ return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
+ Arg.getOperand(0));
+ }
+ break;
+ }
+ }
+ return SDValue();
+}
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
new file mode 100644
index 0000000000..2b954dab55
--- /dev/null
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -0,0 +1,72 @@
+//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600ISELLOWERING_H
+#define R600ISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+
+namespace llvm {
+
+class R600InstrInfo;
+
+class R600TargetLowering : public AMDGPUTargetLowering {
+public:
+ R600TargetLowering(TargetMachine &TM);
+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock * BB) const;
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ void ReplaceNodeResults(SDNode * N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
+ virtual SDValue LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
+private:
+ const R600InstrInfo * TII;
+
+ /// Each OpenCL kernel has nine implicit parameters that are stored in the
+ /// first nine dwords of a Vertex Buffer. These implicit parameters are
+ /// lowered to load instructions which retreive the values from the Vertex
+ /// Buffer.
+ SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+ DebugLoc DL, unsigned DwordOffset) const;
+
+ void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineRegisterInfo & MRI, unsigned dword_offset) const;
+
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+
+ /// \brief Lower ROTL opcode to BITALIGN
+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
+
+ bool isZero(SDValue Op) const;
+};
+
+} // End namespace llvm;
+
+#endif // R600ISELLOWERING_H
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
new file mode 100644
index 0000000000..06b78d09cc
--- /dev/null
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -0,0 +1,666 @@
+//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600InstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Defines.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AMDGPUGenDFAPacketizer.inc"
+
+using namespace llvm;
+
+R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
+ : AMDGPUInstrInfo(tm),
+ RI(tm, *this)
+ { }
+
+const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
+}
+
+bool R600InstrInfo::isVector(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
+}
+
+void
+R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
+ && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
+ for (unsigned I = 0; I < 4; I++) {
+ unsigned SubRegIndex = RI.getSubRegFromChannel(I);
+ buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+ RI.getSubReg(DestReg, SubRegIndex),
+ RI.getSubReg(SrcReg, SubRegIndex))
+ .addReg(DestReg,
+ RegState::Define | RegState::Implicit);
+ }
+ } else {
+
+ // We can't copy vec4 registers
+ assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
+ && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
+
+ MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+ DestReg, SrcReg);
+ NewMI->getOperand(getOperandIdx(*NewMI, R600Operands::SRC0))
+ .setIsKill(KillSrc);
+ }
+}
+
+MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
+ unsigned DstReg, int64_t Imm) const {
+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
+ MachineInstrBuilder MIB(*MF, MI);
+ MIB.addReg(DstReg, RegState::Define);
+ MIB.addReg(AMDGPU::ALU_LITERAL_X);
+ MIB.addImm(Imm);
+ MIB.addReg(0); // PREDICATE_BIT
+
+ return MI;
+}
+
+unsigned R600InstrInfo::getIEQOpcode() const {
+ return AMDGPU::SETE_INT;
+}
+
+bool R600InstrInfo::isMov(unsigned Opcode) const {
+
+
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::MOV:
+ case AMDGPU::MOV_IMM_F32:
+ case AMDGPU::MOV_IMM_I32:
+ return true;
+ }
+}
+
+// Some instructions act as place holders to emulate operations that the GPU
+// hardware does automatically. This function can be used to check if
+// an opcode falls into this category.
+bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return false;
+ case AMDGPU::RETURN:
+ case AMDGPU::RESERVE_REG:
+ return true;
+ }
+}
+
+bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::DOT4_r600_pseudo:
+ case AMDGPU::DOT4_eg_pseudo:
+ return true;
+ }
+}
+
+bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::CUBE_r600_pseudo:
+ case AMDGPU::CUBE_r600_real:
+ case AMDGPU::CUBE_eg_pseudo:
+ case AMDGPU::CUBE_eg_real:
+ return true;
+ }
+}
+
+bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
+ unsigned TargetFlags = get(Opcode).TSFlags;
+
+ return ((TargetFlags & R600_InstFlag::OP1) |
+ (TargetFlags & R600_InstFlag::OP2) |
+ (TargetFlags & R600_InstFlag::OP3));
+}
+
+DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
+}
+
+static bool
+isPredicateSetter(unsigned Opcode) {
+ switch (Opcode) {
+ case AMDGPU::PRED_X:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static MachineInstr *
+findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ while (I != MBB.begin()) {
+ --I;
+ MachineInstr *MI = I;
+ if (isPredicateSetter(MI->getOpcode()))
+ return MI;
+ }
+
+ return NULL;
+}
+
+bool
+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Most of the following comes from the ARM implementation of AnalyzeBranch
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
+ return false;
+ }
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() ||
+ static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
+ if (LastOpc == AMDGPU::JUMP) {
+ if(!isPredicated(LastInst)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ MachineInstr *predSet = I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (SecondLastOpc == AMDGPU::JUMP &&
+ isPredicated(SecondLastInst) &&
+ LastOpc == AMDGPU::JUMP &&
+ !isPredicated(LastInst)) {
+ MachineInstr *predSet = --I;
+ while (!isPredicateSetter(predSet->getOpcode())) {
+ predSet = --I;
+ }
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ FBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(predSet->getOperand(1));
+ Cond.push_back(predSet->getOperand(2));
+ Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
+ const MachineInstr *MI = op.getParent();
+
+ switch (MI->getDesc().OpInfo->RegClass) {
+ default: // FIXME: fallthrough??
+ case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
+ case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
+ };
+}
+
+unsigned
+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
+ return 1;
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ addFlag(PredSet, 0, MO_FLAG_PUSH);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ return 1;
+ }
+ } else {
+ MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+ assert(PredSet && "No previous predicate !");
+ addFlag(PredSet, 0, MO_FLAG_PUSH);
+ PredSet->getOperand(2).setImm(Cond[1].getImm());
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP))
+ .addMBB(TBB)
+ .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
+ return 2;
+ }
+}
+
+unsigned
+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+
+ // Note : we leave PRED* instructions there.
+ // They may be needed when predicating instructions.
+
+ MachineBasicBlock::iterator I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 0;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ default:
+ return 0;
+ case AMDGPU::JUMP:
+ if (isPredicated(I)) {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ clearFlag(predSet, 0, MO_FLAG_PUSH);
+ }
+ I->eraseFromParent();
+ break;
+ }
+ I = MBB.end();
+
+ if (I == MBB.begin()) {
+ return 1;
+ }
+ --I;
+ switch (I->getOpcode()) {
+ // FIXME: only one case??
+ default:
+ return 1;
+ case AMDGPU::JUMP:
+ if (isPredicated(I)) {
+ MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+ clearFlag(predSet, 0, MO_FLAG_PUSH);
+ }
+ I->eraseFromParent();
+ break;
+ }
+ return 2;
+}
+
+bool
+R600InstrInfo::isPredicated(const MachineInstr *MI) const {
+ int idx = MI->findFirstPredOperandIdx();
+ if (idx < 0)
+ return false;
+
+ unsigned Reg = MI->getOperand(idx).getReg();
+ switch (Reg) {
+ default: return false;
+ case AMDGPU::PRED_SEL_ONE:
+ case AMDGPU::PRED_SEL_ZERO:
+ case AMDGPU::PREDICATE_BIT:
+ return true;
+ }
+}
+
+bool
+R600InstrInfo::isPredicable(MachineInstr *MI) const {
+ // XXX: KILL* instructions can be predicated, but they must be the last
+ // instruction in a clause, so this means any instructions after them cannot
+ // be predicated. Until we have proper support for instruction clauses in the
+ // backend, we will mark KILL* instructions as unpredicable.
+
+ if (MI->getOpcode() == AMDGPU::KILLGT) {
+ return false;
+ } else {
+ return AMDGPUInstrInfo::isPredicable(MI);
+ }
+}
+
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const{
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles,
+ unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles,
+ unsigned ExtraFCycles,
+ const BranchProbability &Probability) const {
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ const BranchProbability &Probability)
+ const {
+ return true;
+}
+
+bool
+R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const {
+ return false;
+}
+
+
+bool
+R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ MachineOperand &MO = Cond[1];
+ switch (MO.getImm()) {
+ case OPCODE_IS_ZERO_INT:
+ MO.setImm(OPCODE_IS_NOT_ZERO_INT);
+ break;
+ case OPCODE_IS_NOT_ZERO_INT:
+ MO.setImm(OPCODE_IS_ZERO_INT);
+ break;
+ case OPCODE_IS_ZERO:
+ MO.setImm(OPCODE_IS_NOT_ZERO);
+ break;
+ case OPCODE_IS_NOT_ZERO:
+ MO.setImm(OPCODE_IS_ZERO);
+ break;
+ default:
+ return true;
+ }
+
+ MachineOperand &MO2 = Cond[2];
+ switch (MO2.getReg()) {
+ case AMDGPU::PRED_SEL_ZERO:
+ MO2.setReg(AMDGPU::PRED_SEL_ONE);
+ break;
+ case AMDGPU::PRED_SEL_ONE:
+ MO2.setReg(AMDGPU::PRED_SEL_ZERO);
+ break;
+ default:
+ return true;
+ }
+ return false;
+}
+
+bool
+R600InstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ return isPredicateSetter(MI->getOpcode());
+}
+
+
+bool
+R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ return false;
+}
+
+
+bool
+R600InstrInfo::PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setReg(Pred[2].getReg());
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+ return true;
+ }
+
+ return false;
+}
+
+unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (PredCost)
+ *PredCost = 2;
+ return 2;
+}
+
+MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opcode,
+ unsigned DstReg,
+ unsigned Src0Reg,
+ unsigned Src1Reg) const {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
+ DstReg); // $dst
+
+ if (Src1Reg) {
+ MIB.addImm(0) // $update_exec_mask
+ .addImm(0); // $update_predicate
+ }
+ MIB.addImm(1) // $write
+ .addImm(0) // $omod
+ .addImm(0) // $dst_rel
+ .addImm(0) // $dst_clamp
+ .addReg(Src0Reg) // $src0
+ .addImm(0) // $src0_neg
+ .addImm(0) // $src0_rel
+ .addImm(0); // $src0_abs
+
+ if (Src1Reg) {
+ MIB.addReg(Src1Reg) // $src1
+ .addImm(0) // $src1_neg
+ .addImm(0) // $src1_rel
+ .addImm(0); // $src1_abs
+ }
+
+ //XXX: The r600g finalizer expects this to be 1, once we've moved the
+ //scheduling to the backend, we can change the default to 0.
+ MIB.addImm(1) // $last
+ .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
+ .addImm(0); // $literal
+
+ return MIB;
+}
+
+MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg,
+ uint64_t Imm) const {
+ MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
+ AMDGPU::ALU_LITERAL_X);
+ setImmOperand(MovImm, R600Operands::IMM, Imm);
+ return MovImm;
+}
+
+int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
+ R600Operands::Ops Op) const {
+ return getOperandIdx(MI.getOpcode(), Op);
+}
+
+int R600InstrInfo::getOperandIdx(unsigned Opcode,
+ R600Operands::Ops Op) const {
+ const static int OpTable[3][R600Operands::COUNT] = {
+// W C S S S S S S S S
+// R O D L S R R R S R R R S R R L P
+// D U I M R A R C C C C C C C R C C A R I
+// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M
+// T M P E D L P 0 N R A 1 N R A 2 N R T D M
+ {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11},
+ {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
+ {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
+ };
+ unsigned TargetFlags = get(Opcode).TSFlags;
+ unsigned OpTableIdx;
+
+ if (!HAS_NATIVE_OPERANDS(TargetFlags)) {
+ switch (Op) {
+ case R600Operands::DST: return 0;
+ case R600Operands::SRC0: return 1;
+ case R600Operands::SRC1: return 2;
+ case R600Operands::SRC2: return 3;
+ default:
+ assert(!"Unknown operand type for instruction");
+ return -1;
+ }
+ }
+
+ if (TargetFlags & R600_InstFlag::OP1) {
+ OpTableIdx = 0;
+ } else if (TargetFlags & R600_InstFlag::OP2) {
+ OpTableIdx = 1;
+ } else {
+ assert((TargetFlags & R600_InstFlag::OP3) && "OP1, OP2, or OP3 not defined "
+ "for this instruction");
+ OpTableIdx = 2;
+ }
+
+ return OpTable[OpTableIdx][Op];
+}
+
+void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
+ int64_t Imm) const {
+ int Idx = getOperandIdx(*MI, Op);
+ assert(Idx != -1 && "Operand not supported for this instruction.");
+ assert(MI->getOperand(Idx).isImm());
+ MI->getOperand(Idx).setImm(Imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction flag getters/setters
+//===----------------------------------------------------------------------===//
+
+bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
+ return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
+}
+
+MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
+ unsigned Flag) const {
+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+ int FlagIndex = 0;
+ if (Flag != 0) {
+ // If we pass something other than the default value of Flag to this
+ // function, it means we are want to set a flag on an instruction
+ // that uses native encoding.
+ assert(HAS_NATIVE_OPERANDS(TargetFlags));
+ bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
+ switch (Flag) {
+ case MO_FLAG_CLAMP:
+ FlagIndex = getOperandIdx(*MI, R600Operands::CLAMP);
+ break;
+ case MO_FLAG_MASK:
+ FlagIndex = getOperandIdx(*MI, R600Operands::WRITE);
+ break;
+ case MO_FLAG_NOT_LAST:
+ case MO_FLAG_LAST:
+ FlagIndex = getOperandIdx(*MI, R600Operands::LAST);
+ break;
+ case MO_FLAG_NEG:
+ switch (SrcIdx) {
+ case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_NEG); break;
+ case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_NEG); break;
+ case 2: FlagIndex = getOperandIdx(*MI, R600Operands::SRC2_NEG); break;
+ }
+ break;
+
+ case MO_FLAG_ABS:
+ assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
+ "instructions.");
+ (void)IsOP3;
+ switch (SrcIdx) {
+ case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_ABS); break;
+ case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_ABS); break;
+ }
+ break;
+
+ default:
+ FlagIndex = -1;
+ break;
+ }
+ assert(FlagIndex != -1 && "Flag not supported for this instruction");
+ } else {
+ FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
+ assert(FlagIndex != 0 &&
+ "Instruction flags not supported for this instruction");
+ }
+
+ MachineOperand &FlagOp = MI->getOperand(FlagIndex);
+ assert(FlagOp.isImm());
+ return FlagOp;
+}
+
+void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
+ unsigned Flag) const {
+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+ if (Flag == 0) {
+ return;
+ }
+ if (HAS_NATIVE_OPERANDS(TargetFlags)) {
+ MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
+ if (Flag == MO_FLAG_NOT_LAST) {
+ clearFlag(MI, Operand, MO_FLAG_LAST);
+ } else if (Flag == MO_FLAG_MASK) {
+ clearFlag(MI, Operand, Flag);
+ } else {
+ FlagOp.setImm(1);
+ }
+ } else {
+ MachineOperand &FlagOp = getFlagOp(MI, Operand);
+ FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
+ }
+}
+
+void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
+ unsigned Flag) const {
+ unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+ if (HAS_NATIVE_OPERANDS(TargetFlags)) {
+ MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
+ FlagOp.setImm(0);
+ } else {
+ MachineOperand &FlagOp = getFlagOp(MI);
+ unsigned InstFlags = FlagOp.getImm();
+ InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
+ FlagOp.setImm(InstFlags);
+ }
+}
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
new file mode 100644
index 0000000000..11685af5b0
--- /dev/null
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -0,0 +1,168 @@
+//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for R600InstrInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600INSTRUCTIONINFO_H_
+#define R600INSTRUCTIONINFO_H_
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDIL.h"
+#include "R600Defines.h"
+#include "R600RegisterInfo.h"
+#include <map>
+
+namespace llvm {
+
+ class AMDGPUTargetMachine;
+ class DFAPacketizer;
+ class ScheduleDAG;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineInstrBuilder;
+
+ class R600InstrInfo : public AMDGPUInstrInfo {
+ private:
+ const R600RegisterInfo RI;
+
+ int getBranchInstr(const MachineOperand &op) const;
+
+ public:
+ explicit R600InstrInfo(AMDGPUTargetMachine &tm);
+
+ const R600RegisterInfo &getRegisterInfo() const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ bool isTrig(const MachineInstr &MI) const;
+ bool isPlaceHolderOpcode(unsigned opcode) const;
+ bool isReductionOp(unsigned opcode) const;
+ bool isCubeOp(unsigned opcode) const;
+
+ /// \returns true if this \p Opcode represents an ALU instruction.
+ bool isALUInstr(unsigned Opcode) const;
+
+ /// \breif Vector instructions are instructions that must fill all
+ /// instruction slots within an instruction group.
+ bool isVector(const MachineInstr &MI) const;
+
+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const;
+
+ virtual unsigned getIEQOpcode() const;
+ virtual bool isMov(unsigned Opcode) const;
+
+ DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
+
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ bool isPredicated(const MachineInstr *MI) const;
+
+ bool isPredicable(MachineInstr *MI) const;
+
+ bool
+ isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ const BranchProbability &Probability) const;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const ;
+
+ bool
+ isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ const BranchProbability &Probability) const;
+
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const;
+
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ unsigned int getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const;
+
+ virtual int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const { return 1;}
+
+ /// You can use this function to avoid manually specifying each instruction
+ /// modifier operand when building a new instruction.
+ ///
+ /// \returns a MachineInstr with all the instruction modifiers initialized
+ /// to their default values.
+ MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned Opcode,
+ unsigned DstReg,
+ unsigned Src0Reg,
+ unsigned Src1Reg = 0) const;
+
+ MachineInstr *buildMovImm(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg,
+ uint64_t Imm) const;
+
+ /// \brief Get the index of Op in the MachineInstr.
+ ///
+ /// \returns -1 if the Instruction does not contain the specified \p Op.
+ int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
+
+ /// \brief Get the index of \p Op for the given Opcode.
+ ///
+ /// \returns -1 if the Instruction does not contain the specified \p Op.
+ int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
+
+ /// \brief Helper function for setting instruction flag values.
+ void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
+
+ /// \returns true if this instruction has an operand for storing target flags.
+ bool hasFlagOperand(const MachineInstr &MI) const;
+
+ ///\brief Add one of the MO_FLAG* flags to the specified \p Operand.
+ void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+
+ ///\brief Determine if the specified \p Flag is set on this \p Operand.
+ bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
+
+ /// \param SrcIdx The register source to set the flag on (e.g src0, src1, src2)
+ /// \param Flag The flag being set.
+ ///
+ /// \returns the operand containing the flags for this instruction.
+ MachineOperand &getFlagOp(MachineInstr *MI, unsigned SrcIdx = 0,
+ unsigned Flag = 0) const;
+
+ /// \brief Clear the specified flag on the instruction.
+ void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+};
+
+} // End llvm namespace
+
+#endif // R600INSTRINFO_H_
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
new file mode 100644
index 0000000000..64bab18fa6
--- /dev/null
+++ b/lib/Target/R600/R600Instructions.td
@@ -0,0 +1,1724 @@
+//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Tablegen instruction definitions
+//
+//===----------------------------------------------------------------------===//
+
+include "R600Intrinsics.td"
+
+class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin>
+ : AMDGPUInst <outs, ins, asm, pattern> {
+
+ field bits<64> Inst;
+ bit Trig = 0;
+ bit Op3 = 0;
+ bit isVector = 0;
+ bits<2> FlagOperandIdx = 0;
+ bit Op1 = 0;
+ bit Op2 = 0;
+ bit HasNativeOperands = 0;
+
+ bits<11> op_code = inst;
+ //let Inst = inst;
+ let Namespace = "AMDGPU";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let Itinerary = itin;
+
+ let TSFlags{4} = Trig;
+ let TSFlags{5} = Op3;
+
+ // Vector instructions are instructions that must fill all slots in an
+ // instruction group
+ let TSFlags{6} = isVector;
+ let TSFlags{8-7} = FlagOperandIdx;
+ let TSFlags{9} = HasNativeOperands;
+ let TSFlags{10} = Op1;
+ let TSFlags{11} = Op2;
+}
+
+class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
+ AMDGPUInst <outs, ins, asm, pattern> {
+ field bits<64> Inst;
+
+ let Namespace = "AMDGPU";
+}
+
+def MEMxi : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
+ let PrintMethod = "printMemOperand";
+}
+
+def MEMrr : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
+}
+
+// Operands for non-registers
+
+class InstFlag<string PM = "printOperand", int Default = 0>
+ : OperandWithDefaultOps <i32, (ops (i32 Default))> {
+ let PrintMethod = PM;
+}
+
+def LITERAL : InstFlag<"printLiteral">;
+
+def WRITE : InstFlag <"printWrite", 1>;
+def OMOD : InstFlag <"printOMOD">;
+def REL : InstFlag <"printRel">;
+def CLAMP : InstFlag <"printClamp">;
+def NEG : InstFlag <"printNeg">;
+def ABS : InstFlag <"printAbs">;
+def UEM : InstFlag <"printUpdateExecMask">;
+def UP : InstFlag <"printUpdatePred">;
+
+// XXX: The r600g finalizer in Mesa expects last to be one in most cases.
+// Once we start using the packetizer in this backend we should have this
+// default to 0.
+def LAST : InstFlag<"printLast", 1>;
+
+def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
+def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
+def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+
+class R600ALU_Word0 {
+ field bits<32> Word0;
+
+ bits<11> src0;
+ bits<1> src0_neg;
+ bits<1> src0_rel;
+ bits<11> src1;
+ bits<1> src1_rel;
+ bits<1> src1_neg;
+ bits<3> index_mode = 0;
+ bits<2> pred_sel;
+ bits<1> last;
+
+ bits<9> src0_sel = src0{8-0};
+ bits<2> src0_chan = src0{10-9};
+ bits<9> src1_sel = src1{8-0};
+ bits<2> src1_chan = src1{10-9};
+
+ let Word0{8-0} = src0_sel;
+ let Word0{9} = src0_rel;
+ let Word0{11-10} = src0_chan;
+ let Word0{12} = src0_neg;
+ let Word0{21-13} = src1_sel;
+ let Word0{22} = src1_rel;
+ let Word0{24-23} = src1_chan;
+ let Word0{25} = src1_neg;
+ let Word0{28-26} = index_mode;
+ let Word0{30-29} = pred_sel;
+ let Word0{31} = last;
+}
+
+class R600ALU_Word1 {
+ field bits<32> Word1;
+
+ bits<11> dst;
+ bits<3> bank_swizzle = 0;
+ bits<1> dst_rel;
+ bits<1> clamp;
+
+ bits<7> dst_sel = dst{6-0};
+ bits<2> dst_chan = dst{10-9};
+
+ let Word1{20-18} = bank_swizzle;
+ let Word1{27-21} = dst_sel;
+ let Word1{28} = dst_rel;
+ let Word1{30-29} = dst_chan;
+ let Word1{31} = clamp;
+}
+
+class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
+
+ bits<1> src0_abs;
+ bits<1> src1_abs;
+ bits<1> update_exec_mask;
+ bits<1> update_pred;
+ bits<1> write;
+ bits<2> omod;
+
+ let Word1{0} = src0_abs;
+ let Word1{1} = src1_abs;
+ let Word1{2} = update_exec_mask;
+ let Word1{3} = update_pred;
+ let Word1{4} = write;
+ let Word1{6-5} = omod;
+ let Word1{17-7} = alu_inst;
+}
+
+class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
+
+ bits<11> src2;
+ bits<1> src2_rel;
+ bits<1> src2_neg;
+
+ bits<9> src2_sel = src2{8-0};
+ bits<2> src2_chan = src2{10-9};
+
+ let Word1{8-0} = src2_sel;
+ let Word1{9} = src2_rel;
+ let Word1{11-10} = src2_chan;
+ let Word1{12} = src2_neg;
+ let Word1{17-13} = alu_inst;
+}
+
+/*
+XXX: R600 subtarget uses a slightly different encoding than the other
+subtargets. We currently handle this in R600MCCodeEmitter, but we may
+want to use these instruction classes in the future.
+
+class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
+
+ bits<1> fog_merge;
+ bits<10> alu_inst;
+
+ let Inst{37} = fog_merge;
+ let Inst{39-38} = omod;
+ let Inst{49-40} = alu_inst;
+}
+
+class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
+
+ bits<11> alu_inst;
+
+ let Inst{38-37} = omod;
+ let Inst{49-39} = alu_inst;
+}
+*/
+
+def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
+ (ops PRED_SEL_OFF)>;
+
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+
+// Class for instructions with only one source register.
+// If you add new ins to this instruction, make sure they are listed before
+// $literal, because the backend currently assumes that the last operand is
+// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in
+// R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
+// and R600InstrInfo::getOperandIdx().
+class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <0,
+ (outs R600_Reg32:$dst),
+ (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+ !strconcat(opName,
+ "$clamp $dst$write$dst_rel$omod, "
+ "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+ "$literal $pred_sel$last"),
+ pattern,
+ itin>,
+ R600ALU_Word0,
+ R600ALU_Word1_OP2 <inst> {
+
+ let src1 = 0;
+ let src1_rel = 0;
+ let src1_neg = 0;
+ let src1_abs = 0;
+ let update_exec_mask = 0;
+ let update_pred = 0;
+ let HasNativeOperands = 1;
+ let Op1 = 1;
+ let DisableEncoding = "$literal";
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
+ InstrItinClass itin = AnyALU> :
+ R600_1OP <inst, opName,
+ [(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
+>;
+
+// If you add our change the operands for R600_2OP instructions, you must
+// also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
+// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
+class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <inst,
+ (outs R600_Reg32:$dst),
+ (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
+ OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+ !strconcat(opName,
+ "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
+ "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+ "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
+ "$literal $pred_sel$last"),
+ pattern,
+ itin>,
+ R600ALU_Word0,
+ R600ALU_Word1_OP2 <inst> {
+
+ let HasNativeOperands = 1;
+ let Op2 = 1;
+ let DisableEncoding = "$literal";
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
+ InstrItinClass itim = AnyALU> :
+ R600_2OP <inst, opName,
+ [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
+ R600_Reg32:$src1))]
+>;
+
+// If you add our change the operands for R600_3OP instructions, you must
+// also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
+// R600InstrInfo::buildDefaultInstruction(), and
+// R600InstrInfo::getOperandIdx().
+class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <0,
+ (outs R600_Reg32:$dst),
+ (ins REL:$dst_rel, CLAMP:$clamp,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
+ R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+ !strconcat(opName, "$clamp $dst$dst_rel, "
+ "$src0_neg$src0$src0_rel, "
+ "$src1_neg$src1$src1_rel, "
+ "$src2_neg$src2$src2_rel, "
+ "$literal $pred_sel$last"),
+ pattern,
+ itin>,
+ R600ALU_Word0,
+ R600ALU_Word1_OP3<inst>{
+
+ let HasNativeOperands = 1;
+ let DisableEncoding = "$literal";
+ let Op3 = 1;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
+ InstrItinClass itin = VecALU> :
+ InstR600 <inst,
+ (outs R600_Reg32:$dst),
+ ins,
+ asm,
+ pattern,
+ itin>;
+
+class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
+ InstrItinClass itin = AnyALU> :
+ InstR600 <inst,
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+ !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"),
+ pattern,
+ itin>{
+ let Inst {10-0} = inst;
+ }
+
+} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
+
+def TEX_SHADOW : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13);
+ }]
+>;
+
+class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
+ dag ins, string asm, list<dag> pattern> :
+ InstR600ISA <outs, ins, asm, pattern> {
+ bits<7> RW_GPR;
+ bits<7> INDEX_GPR;
+
+ bits<2> RIM;
+ bits<2> TYPE;
+ bits<1> RW_REL;
+ bits<2> ELEM_SIZE;
+
+ bits<12> ARRAY_SIZE;
+ bits<4> COMP_MASK;
+ bits<4> BURST_COUNT;
+ bits<1> VPM;
+ bits<1> eop;
+ bits<1> MARK;
+ bits<1> BARRIER;
+
+ // CF_ALLOC_EXPORT_WORD0_RAT
+ let Inst{3-0} = rat_id;
+ let Inst{9-4} = rat_inst;
+ let Inst{10} = 0; // Reserved
+ let Inst{12-11} = RIM;
+ let Inst{14-13} = TYPE;
+ let Inst{21-15} = RW_GPR;
+ let Inst{22} = RW_REL;
+ let Inst{29-23} = INDEX_GPR;
+ let Inst{31-30} = ELEM_SIZE;
+
+ // CF_ALLOC_EXPORT_WORD1_BUF
+ let Inst{43-32} = ARRAY_SIZE;
+ let Inst{47-44} = COMP_MASK;
+ let Inst{51-48} = BURST_COUNT;
+ let Inst{52} = VPM;
+ let Inst{53} = eop;
+ let Inst{61-54} = cf_inst;
+ let Inst{62} = MARK;
+ let Inst{63} = BARRIER;
+}
+
+class LoadParamFrag <PatFrag load_type> : PatFrag <
+ (ops node:$ptr), (load_type node:$ptr),
+ [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
+>;
+
+def load_param : LoadParamFrag<load>;
+def load_param_zexti8 : LoadParamFrag<zextloadi8>;
+def load_param_zexti16 : LoadParamFrag<zextloadi16>;
+
+def isR600 : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
+def isR700 : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
+ "Subtarget.device()->getDeviceFlag()"
+ ">= OCL_DEVICE_RV710">;
+def isEG : Predicate<
+ "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
+ "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && "
+ "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
+
+def isCayman : Predicate<"Subtarget.device()"
+ "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
+def isEGorCayman : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
+ "|| Subtarget.device()->getGeneration() =="
+ "AMDGPUDeviceInfo::HD6XXX">;
+
+def isR600toCayman : Predicate<
+ "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
+
+//===----------------------------------------------------------------------===//
+// Interpolation Instructions
+//===----------------------------------------------------------------------===//
+
+def INTERP: SDNode<"AMDGPUISD::INTERP",
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
+ >;
+
+def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
+ >;
+
+let usesCustomInserter = 1 in {
+def input_perspective : AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins i32imm:$src0, i32imm:$src1),
+ "input_perspective $src0 $src1 : dst",
+ [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
+} // End usesCustomInserter = 1
+
+def input_constant : AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins i32imm:$src),
+ "input_perspective $src : dst",
+ [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
+
+
+
+def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
+ let bank_swizzle = 5;
+}
+
+def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
+ let bank_swizzle = 5;
+}
+
+def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
+
+//===----------------------------------------------------------------------===//
+// Export Instructions
+//===----------------------------------------------------------------------===//
+
+def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>;
+
+def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+class ExportWord0 {
+ field bits<32> Word0;
+
+ bits<13> arraybase;
+ bits<2> type;
+ bits<7> gpr;
+ bits<2> elem_size;
+
+ let Word0{12-0} = arraybase;
+ let Word0{14-13} = type;
+ let Word0{21-15} = gpr;
+ let Word0{22} = 0; // RW_REL
+ let Word0{29-23} = 0; // INDEX_GPR
+ let Word0{31-30} = elem_size;
+}
+
+class ExportSwzWord1 {
+ field bits<32> Word1;
+
+ bits<3> sw_x;
+ bits<3> sw_y;
+ bits<3> sw_z;
+ bits<3> sw_w;
+ bits<1> eop;
+ bits<8> inst;
+
+ let Word1{2-0} = sw_x;
+ let Word1{5-3} = sw_y;
+ let Word1{8-6} = sw_z;
+ let Word1{11-9} = sw_w;
+}
+
+class ExportBufWord1 {
+ field bits<32> Word1;
+
+ bits<12> arraySize;
+ bits<4> compMask;
+ bits<1> eop;
+ bits<8> inst;
+
+ let Word1{11-0} = arraySize;
+ let Word1{15-12} = compMask;
+}
+
+multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
+ def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
+ (ExportInst
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ 0, 61, 0, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
+ (ExportInst
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ 0, 61, 7, 0, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(int_R600_store_pixel_dummy),
+ (ExportInst
+ (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0),
+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 0, 1, 2, 3, cf_inst, 0)
+ >;
+}
+
+multiclass SteamOutputExportPattern<Instruction ExportInst,
+ bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
+// Stream0
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1),
+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 4095, imm:$mask, buf0inst, 0)>;
+// Stream1
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 2),
+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 4095, imm:$mask, buf1inst, 0)>;
+// Stream2
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 3),
+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 4095, imm:$mask, buf2inst, 0)>;
+// Stream3
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 4),
+ (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ 4095, imm:$mask, buf3inst, 0)>;
+}
+
+let isTerminator = 1, usesCustomInserter = 1 in {
+
+class ExportSwzInst : InstR600ISA<(
+ outs),
+ (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
+ i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
+ i32imm:$eop),
+ !strconcat("EXPORT", " $gpr"),
+ []>, ExportWord0, ExportSwzWord1 {
+ let elem_size = 3;
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+} // End isTerminator = 1, usesCustomInserter = 1
+
+class ExportBufInst : InstR600ISA<(
+ outs),
+ (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
+ i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
+ !strconcat("EXPORT", " $gpr"),
+ []>, ExportWord0, ExportBufWord1 {
+ let elem_size = 0;
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+let Predicates = [isR600toCayman] in {
+
+//===----------------------------------------------------------------------===//
+// Common Instructions R600, R700, Evergreen, Cayman
+//===----------------------------------------------------------------------===//
+
+def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
+// Non-IEEE MUL: 0 * anything = 0
+def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
+def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
+def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
+def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
+
+// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
+// so some of the instruction names don't match the asm string.
+// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
+def SETE : R600_2OP <
+ 0x08, "SETE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_EQ))]
+>;
+
+def SGT : R600_2OP <
+ 0x09, "SETGT",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_GT))]
+>;
+
+def SGE : R600_2OP <
+ 0xA, "SETGE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_GE))]
+>;
+
+def SNE : R600_2OP <
+ 0xB, "SETNE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+ COND_NE))]
+>;
+
+def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
+def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
+def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
+def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
+def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
+
+def MOV : R600_1OP <0x19, "MOV", []>;
+
+let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
+
+class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
+ (outs R600_Reg32:$dst),
+ (ins immType:$imm),
+ "",
+ []
+>;
+
+} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
+
+def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
+def : Pat <
+ (imm:$val),
+ (MOV_IMM_I32 imm:$val)
+>;
+
+def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
+def : Pat <
+ (fpimm:$val),
+ (MOV_IMM_F32 fpimm:$val)
+>;
+
+def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
+def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
+def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
+def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
+
+let hasSideEffects = 1 in {
+
+def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
+
+} // end hasSideEffects
+
+def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
+def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
+def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
+def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
+def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
+def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
+def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
+def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
+def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
+def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
+
+def SETE_INT : R600_2OP <
+ 0x3A, "SETE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
+>;
+
+def SETGT_INT : R600_2OP <
+ 0x3B, "SGT_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
+>;
+
+def SETGE_INT : R600_2OP <
+ 0x3C, "SETGE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
+>;
+
+def SETNE_INT : R600_2OP <
+ 0x3D, "SETNE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
+>;
+
+def SETGT_UINT : R600_2OP <
+ 0x3E, "SETGT_UINT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
+>;
+
+def SETGE_UINT : R600_2OP <
+ 0x3F, "SETGE_UINT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
+>;
+
+def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
+def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
+def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
+def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
+
+def CNDE_INT : R600_3OP <
+ 0x1C, "CNDE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), 0,
+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+ COND_EQ))]
+>;
+
+def CNDGE_INT : R600_3OP <
+ 0x1E, "CNDGE_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), 0,
+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+ COND_GE))]
+>;
+
+def CNDGT_INT : R600_3OP <
+ 0x1D, "CNDGT_INT",
+ [(set (i32 R600_Reg32:$dst),
+ (selectcc (i32 R600_Reg32:$src0), 0,
+ (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+ COND_GT))]
+>;
+
+//===----------------------------------------------------------------------===//
+// Texture instructions
+//===----------------------------------------------------------------------===//
+
+def TEX_LD : R600_TEX <
+ 0x03, "TEX_LD",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+> {
+let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget";
+let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget);
+}
+
+def TEX_GET_TEXTURE_RESINFO : R600_TEX <
+ 0x04, "TEX_GET_TEXTURE_RESINFO",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+>;
+
+def TEX_GET_GRADIENTS_H : R600_TEX <
+ 0x07, "TEX_GET_GRADIENTS_H",
+ [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+>;
+
+def TEX_GET_GRADIENTS_V : R600_TEX <
+ 0x08, "TEX_GET_GRADIENTS_V",
+ [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+>;
+
+def TEX_SET_GRADIENTS_H : R600_TEX <
+ 0x0B, "TEX_SET_GRADIENTS_H",
+ []
+>;
+
+def TEX_SET_GRADIENTS_V : R600_TEX <
+ 0x0C, "TEX_SET_GRADIENTS_V",
+ []
+>;
+
+def TEX_SAMPLE : R600_TEX <
+ 0x10, "TEX_SAMPLE",
+ [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C : R600_TEX <
+ 0x18, "TEX_SAMPLE_C",
+ [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_L : R600_TEX <
+ 0x11, "TEX_SAMPLE_L",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C_L : R600_TEX <
+ 0x19, "TEX_SAMPLE_C_L",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_LB : R600_TEX <
+ 0x12, "TEX_SAMPLE_LB",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C_LB : R600_TEX <
+ 0x1A, "TEX_SAMPLE_C_LB",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_G : R600_TEX <
+ 0x14, "TEX_SAMPLE_G",
+ []
+>;
+
+def TEX_SAMPLE_C_G : R600_TEX <
+ 0x1C, "TEX_SAMPLE_C_G",
+ []
+>;
+
+//===----------------------------------------------------------------------===//
+// Helper classes for common instructions
+//===----------------------------------------------------------------------===//
+
+class MUL_LIT_Common <bits<5> inst> : R600_3OP <
+ inst, "MUL_LIT",
+ []
+>;
+
+class MULADD_Common <bits<5> inst> : R600_3OP <
+ inst, "MULADD",
+ [(set (f32 R600_Reg32:$dst),
+ (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
+>;
+
+class CNDE_Common <bits<5> inst> : R600_3OP <
+ inst, "CNDE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+ COND_EQ))]
+>;
+
+class CNDGT_Common <bits<5> inst> : R600_3OP <
+ inst, "CNDGT",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+ COND_GT))]
+>;
+
+class CNDGE_Common <bits<5> inst> : R600_3OP <
+ inst, "CNDGE",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+ (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+ COND_GE))]
+>;
+
+multiclass DOT4_Common <bits<11> inst> {
+
+ def _pseudo : R600_REDUCTION <inst,
+ (ins R600_Reg128:$src0, R600_Reg128:$src1),
+ "DOT4 $dst $src0, $src1",
+ [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
+ >;
+
+ def _real : R600_2OP <inst, "DOT4", []>;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+multiclass CUBE_Common <bits<11> inst> {
+
+ def _pseudo : InstR600 <
+ inst,
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src),
+ "CUBE $dst $src",
+ [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+ VecALU
+ > {
+ let isPseudo = 1;
+ }
+
+ def _real : R600_2OP <inst, "CUBE", []>;
+}
+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
+
+class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "EXP_IEEE", fexp2
+>;
+
+class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "FLT_TO_INT", fp_to_sint
+>;
+
+class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "INT_TO_FLT", sint_to_fp
+>;
+
+class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "FLT_TO_UINT", fp_to_uint
+>;
+
+class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "UINT_TO_FLT", uint_to_fp
+>;
+
+class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
+ inst, "LOG_CLAMPED", []
+>;
+
+class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "LOG_IEEE", flog2
+>;
+
+class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
+class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
+class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
+class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
+ inst, "MULHI_INT", mulhs
+>;
+class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
+ inst, "MULHI", mulhu
+>;
+class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
+ inst, "MULLO_INT", mul
+>;
+class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>;
+
+class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
+ inst, "RECIP_CLAMPED", []
+>;
+
+class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
+ inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))]
+>;
+
+class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "RECIP_UINT", AMDGPUurecip
+>;
+
+class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
+>;
+
+class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
+ inst, "RECIPSQRT_IEEE", []
+>;
+
+class SIN_Common <bits<11> inst> : R600_1OP <
+ inst, "SIN", []>{
+ let Trig = 1;
+}
+
+class COS_Common <bits<11> inst> : R600_1OP <
+ inst, "COS", []> {
+ let Trig = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper patterns for complex intrinsics
+//===----------------------------------------------------------------------===//
+
+multiclass DIV_Common <InstR600 recip_ieee> {
+def : Pat<
+ (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
+ (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+>;
+
+def : Pat<
+ (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
+ (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+>;
+}
+
+class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
+ (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
+ (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
+>;
+
+//===----------------------------------------------------------------------===//
+// R600 / R700 Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isR600] in {
+
+ def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
+ def MULADD_r600 : MULADD_Common<0x10>;
+ def CNDE_r600 : CNDE_Common<0x18>;
+ def CNDGT_r600 : CNDGT_Common<0x19>;
+ def CNDGE_r600 : CNDGE_Common<0x1A>;
+ defm DOT4_r600 : DOT4_Common<0x50>;
+ defm CUBE_r600 : CUBE_Common<0x52>;
+ def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
+ def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
+ def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
+ def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
+ def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
+ def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
+ def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
+ def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
+ def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
+ def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
+ def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
+ def SIN_r600 : SIN_Common<0x6E>;
+ def COS_r600 : COS_Common<0x6F>;
+ def ASHR_r600 : ASHR_Common<0x70>;
+ def LSHR_r600 : LSHR_Common<0x71>;
+ def LSHL_r600 : LSHL_Common<0x72>;
+ def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
+ def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
+ def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
+ def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
+ def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
+
+ defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
+ def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
+
+ def : Pat<(fsqrt R600_Reg32:$src),
+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
+
+ def R600_ExportSwz : ExportSwzInst {
+ let Word1{20-17} = 1; // BURST_COUNT
+ let Word1{21} = eop;
+ let Word1{22} = 1; // VALID_PIXEL_MODE
+ let Word1{30-23} = inst;
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : ExportPattern<R600_ExportSwz, 39>;
+
+ def R600_ExportBuf : ExportBufInst {
+ let Word1{20-17} = 1; // BURST_COUNT
+ let Word1{21} = eop;
+ let Word1{22} = 1; // VALID_PIXEL_MODE
+ let Word1{30-23} = inst;
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
+}
+
+// Helper pattern for normalizing inputs to triginomic instructions for R700+
+// cards.
+class COS_PAT <InstR600 trig> : Pat<
+ (fcos R600_Reg32:$src),
+ (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+>;
+
+class SIN_PAT <InstR600 trig> : Pat<
+ (fsin R600_Reg32:$src),
+ (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+>;
+
+//===----------------------------------------------------------------------===//
+// R700 Only instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isR700] in {
+ def SIN_r700 : SIN_Common<0x6E>;
+ def COS_r700 : COS_Common<0x6F>;
+
+ // R700 normalizes inputs to SIN/COS the same as EG
+ def : SIN_PAT <SIN_r700>;
+ def : COS_PAT <COS_r700>;
+}
+
+//===----------------------------------------------------------------------===//
+// Evergreen Only instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEG] in {
+
+def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
+defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
+
+def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
+def MULHI_INT_eg : MULHI_INT_Common<0x90>;
+def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
+def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
+def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
+def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
+def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_eg : SIN_Common<0x8D>;
+def COS_eg : COS_Common<0x8E>;
+
+def : SIN_PAT <SIN_eg>;
+def : COS_PAT <COS_eg>;
+def : Pat<(fsqrt R600_Reg32:$src),
+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
+} // End Predicates = [isEG]
+
+//===----------------------------------------------------------------------===//
+// Evergreen / Cayman Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEGorCayman] in {
+
+ // BFE_UINT - bit_extract, an optimization for mask and shift
+ // Src0 = Input
+ // Src1 = Offset
+ // Src2 = Width
+ //
+ // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
+ //
+ // Example Usage:
+ // (Offset, Width)
+ //
+ // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
+ // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
+ // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
+ // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
+ def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
+ [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
+ R600_Reg32:$src1,
+ R600_Reg32:$src2))],
+ VecALU
+ >;
+
+ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
+ [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
+ R600_Reg32:$src2))],
+ VecALU
+ >;
+
+ def MULADD_eg : MULADD_Common<0x14>;
+ def ASHR_eg : ASHR_Common<0x15>;
+ def LSHR_eg : LSHR_Common<0x16>;
+ def LSHL_eg : LSHL_Common<0x17>;
+ def CNDE_eg : CNDE_Common<0x19>;
+ def CNDGT_eg : CNDGT_Common<0x1A>;
+ def CNDGE_eg : CNDGE_Common<0x1B>;
+ def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
+ def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
+ defm DOT4_eg : DOT4_Common<0xBE>;
+ defm CUBE_eg : CUBE_Common<0xC0>;
+
+ def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
+
+ def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
+ let Pattern = [];
+ }
+
+ def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
+
+ def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
+ let Pattern = [];
+ }
+
+ def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
+
+ // TRUNC is used for the FLT_TO_INT instructions to work around a
+ // perceived problem where the rounding modes are applied differently
+ // depending on the instruction and the slot they are in.
+ // See:
+ // https://bugs.freedesktop.org/show_bug.cgi?id=50232
+ // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
+ //
+ // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
+ // which do not need to be truncated since the fp values are 0.0f or 1.0f.
+ // We should look into handling these cases separately.
+ def : Pat<(fp_to_sint R600_Reg32:$src0),
+ (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>;
+
+ def : Pat<(fp_to_uint R600_Reg32:$src0),
+ (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
+
+ def EG_ExportSwz : ExportSwzInst {
+ let Word1{19-16} = 1; // BURST_COUNT
+ let Word1{20} = 1; // VALID_PIXEL_MODE
+ let Word1{21} = eop;
+ let Word1{29-22} = inst;
+ let Word1{30} = 0; // MARK
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : ExportPattern<EG_ExportSwz, 83>;
+
+ def EG_ExportBuf : ExportBufInst {
+ let Word1{19-16} = 1; // BURST_COUNT
+ let Word1{20} = 1; // VALID_PIXEL_MODE
+ let Word1{21} = eop;
+ let Word1{29-22} = inst;
+ let Word1{30} = 0; // MARK
+ let Word1{31} = 1; // BARRIER
+ }
+ defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
+
+//===----------------------------------------------------------------------===//
+// Memory read/write instructions
+//===----------------------------------------------------------------------===//
+let usesCustomInserter = 1 in {
+
+class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
+ list<dag> pattern>
+ : EG_CF_RAT <0x57, 0x2, 0, (outs), ins,
+ !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> {
+ let RIM = 0;
+ // XXX: Have a separate instruction for non-indexed writes.
+ let TYPE = 1;
+ let RW_REL = 0;
+ let ELEM_SIZE = 0;
+
+ let ARRAY_SIZE = 0;
+ let COMP_MASK = comp_mask;
+ let BURST_COUNT = 0;
+ let VPM = 0;
+ let MARK = 0;
+ let BARRIER = 1;
+}
+
+} // End usesCustomInserter = 1
+
+// 32-bit store
+def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
+ (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+ 0x1, "RAT_WRITE_CACHELESS_32_eg",
+ [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]
+>;
+
+//128-bit store
+def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+ 0xf, "RAT_WRITE_CACHELESS_128",
+ [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)]
+>;
+
+class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
+ : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern> {
+
+ // Operands
+ bits<7> DST_GPR;
+ bits<7> SRC_GPR;
+
+ // Static fields
+ bits<5> VC_INST = 0;
+ bits<2> FETCH_TYPE = 2;
+ bits<1> FETCH_WHOLE_QUAD = 0;
+ bits<8> BUFFER_ID = buffer_id;
+ bits<1> SRC_REL = 0;
+ // XXX: We can infer this field based on the SRC_GPR. This would allow us
+ // to store vertex addresses in any channel, not just X.
+ bits<2> SRC_SEL_X = 0;
+ bits<6> MEGA_FETCH_COUNT;
+ bits<1> DST_REL = 0;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
+ // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
+ // however, based on my testing if USE_CONST_FIELDS is set, then all
+ // these fields need to be set to 0.
+ bits<1> USE_CONST_FIELDS = 0;
+ bits<6> DATA_FORMAT;
+ bits<2> NUM_FORMAT_ALL = 1;
+ bits<1> FORMAT_COMP_ALL = 0;
+ bits<1> SRF_MODE_ALL = 0;
+
+ // LLVM can only encode 64-bit instructions, so these fields are manually
+ // encoded in R600CodeEmitter
+ //
+ // bits<16> OFFSET;
+ // bits<2> ENDIAN_SWAP = 0;
+ // bits<1> CONST_BUF_NO_STRIDE = 0;
+ // bits<1> MEGA_FETCH = 0;
+ // bits<1> ALT_CONST = 0;
+ // bits<2> BUFFER_INDEX_MODE = 0;
+
+ // VTX_WORD0
+ let Inst{4-0} = VC_INST;
+ let Inst{6-5} = FETCH_TYPE;
+ let Inst{7} = FETCH_WHOLE_QUAD;
+ let Inst{15-8} = BUFFER_ID;
+ let Inst{22-16} = SRC_GPR;
+ let Inst{23} = SRC_REL;
+ let Inst{25-24} = SRC_SEL_X;
+ let Inst{31-26} = MEGA_FETCH_COUNT;
+
+ // VTX_WORD1_GPR
+ let Inst{38-32} = DST_GPR;
+ let Inst{39} = DST_REL;
+ let Inst{40} = 0; // Reserved
+ let Inst{43-41} = DST_SEL_X;
+ let Inst{46-44} = DST_SEL_Y;
+ let Inst{49-47} = DST_SEL_Z;
+ let Inst{52-50} = DST_SEL_W;
+ let Inst{53} = USE_CONST_FIELDS;
+ let Inst{59-54} = DATA_FORMAT;
+ let Inst{61-60} = NUM_FORMAT_ALL;
+ let Inst{62} = FORMAT_COMP_ALL;
+ let Inst{63} = SRF_MODE_ALL;
+
+ // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+ // is done in R600CodeEmitter
+ //
+ // Inst{79-64} = OFFSET;
+ // Inst{81-80} = ENDIAN_SWAP;
+ // Inst{82} = CONST_BUF_NO_STRIDE;
+ // Inst{83} = MEGA_FETCH;
+ // Inst{84} = ALT_CONST;
+ // Inst{86-85} = BUFFER_INDEX_MODE;
+ // Inst{95-86} = 0; Reserved
+
+ // VTX_WORD3 (Padding)
+ //
+ // Inst{127-96} = 0;
+}
+
+class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 1;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 1; // FMT_8
+}
+
+class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+ let MEGA_FETCH_COUNT = 2;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 5; // FMT_16
+
+}
+
+class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 4;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 0xD; // COLOR_32
+
+ // This is not really necessary, but there were some GPU hangs that appeared
+ // to be caused by ALU instructions in the next instruction group that wrote
+ // to the $ptr registers of the VTX_READ.
+ // e.g.
+ // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
+ // %T2_X<def> = MOV %ZERO
+ //Adding this constraint prevents this from happening.
+ let Constraints = "$ptr.ptr = $dst";
+}
+
+class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst),
+ pattern> {
+
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
+
+ // XXX: Need to force VTX_READ_128 instructions to write to the same register
+ // that holds its buffer address to avoid potential hangs. We can't use
+ // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
+ // registers are different sizes.
+}
+
+//===----------------------------------------------------------------------===//
+// VTX Read from parameter memory space
+//===----------------------------------------------------------------------===//
+
+def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
+ [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
+ [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
+ [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// VTX Read from global memory space
+//===----------------------------------------------------------------------===//
+
+// 8-bit reads
+def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
+ [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
+>;
+
+// 32-bit reads
+def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
+ [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
+>;
+
+// 128-bit reads
+def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
+ [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// Constant Loads
+// XXX: We are currently storing all constants in the global address space.
+//===----------------------------------------------------------------------===//
+
+def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
+ [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
+>;
+
+}
+
+let Predicates = [isCayman] in {
+
+let isVector = 1 in {
+
+def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
+
+def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
+def MULHI_INT_cm : MULHI_INT_Common<0x90>;
+def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
+def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
+def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
+def LOG_IEEE_ : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_cm : SIN_Common<0x8D>;
+def COS_cm : COS_Common<0x8E>;
+} // End isVector = 1
+
+def : SIN_PAT <SIN_cm>;
+def : COS_PAT <COS_cm>;
+
+defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
+
+// RECIP_UINT emulation for Cayman
+def : Pat <
+ (AMDGPUurecip R600_Reg32:$src0),
+ (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
+ (MOV_IMM_I32 0x4f800000)))
+>;
+
+
+def : Pat<(fsqrt R600_Reg32:$src),
+ (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
+
+} // End isCayman
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+
+
+def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src),
+ "IF_PREDICATE_SET $src", []>;
+
+def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src),
+ "PREDICATED_BREAK $src", []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+let isPseudo = 1 in {
+
+def PRED_X : InstR600 <
+ 0, (outs R600_Predicate_Bit:$dst),
+ (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
+ "", [], NullALU> {
+ let FlagOperandIdx = 3;
+}
+
+let isTerminator = 1, isBranch = 1, isBarrier = 1 in {
+
+def JUMP : InstR600 <0x10,
+ (outs),
+ (ins brtarget:$target, R600_Pred:$p),
+ "JUMP $target ($p)",
+ [], AnyALU
+ >;
+
+} // End isTerminator = 1, isBranch = 1, isBarrier = 1
+
+let usesCustomInserter = 1 in {
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
+
+def MASK_WRITE : AMDGPUShaderInst <
+ (outs),
+ (ins R600_Reg32:$src),
+ "MASK_WRITE $src",
+ []
+>;
+
+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
+
+def R600_LOAD_CONST : AMDGPUShaderInst <
+ (outs R600_Reg32:$dst),
+ (ins i32imm:$src0),
+ "R600_LOAD_CONST $dst, $src0",
+ [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
+>;
+
+def RESERVE_REG : AMDGPUShaderInst <
+ (outs),
+ (ins i32imm:$src),
+ "RESERVE_REG $src",
+ [(int_AMDGPU_reserve_reg imm:$src)]
+>;
+
+def TXD: AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+ "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+>;
+
+def TXD_SHADOW: AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+ "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
+ [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+>;
+
+} // End isPseudo = 1
+} // End usesCustomInserter = 1
+
+def CLAMP_R600 : CLAMP <R600_Reg32>;
+def FABS_R600 : FABS<R600_Reg32>;
+def FNEG_R600 : FNEG<R600_Reg32>;
+
+//===---------------------------------------------------------------------===//
+// Return instruction
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def RETURN : ILFormat<(outs), (ins variable_ops),
+ "RETURN", [(IL_retflag)]>;
+}
+
+//===--------------------------------------------------------------------===//
+// Instructions support
+//===--------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
+// Custom Inserter for Branches and returns, this eventually will be a
+// seperate pass
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
+ def BRANCH : ILFormat<(outs), (ins brtarget:$target),
+ "; Pseudo unconditional branch instruction",
+ [(br bb:$target)]>;
+ defm BRANCH_COND : BranchConditional<IL_brcond>;
+}
+
+//===---------------------------------------------------------------------===//
+// Flow and Program control Instructions
+//===---------------------------------------------------------------------===//
+let isTerminator=1 in {
+ def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
+ !strconcat("SWITCH", " $src"), []>;
+ def CASE : ILFormat< (outs), (ins GPRI32:$src),
+ !strconcat("CASE", " $src"), []>;
+ def BREAK : ILFormat< (outs), (ins),
+ "BREAK", []>;
+ def CONTINUE : ILFormat< (outs), (ins),
+ "CONTINUE", []>;
+ def DEFAULT : ILFormat< (outs), (ins),
+ "DEFAULT", []>;
+ def ELSE : ILFormat< (outs), (ins),
+ "ELSE", []>;
+ def ENDSWITCH : ILFormat< (outs), (ins),
+ "ENDSWITCH", []>;
+ def ENDMAIN : ILFormat< (outs), (ins),
+ "ENDMAIN", []>;
+ def END : ILFormat< (outs), (ins),
+ "END", []>;
+ def ENDFUNC : ILFormat< (outs), (ins),
+ "ENDFUNC", []>;
+ def ENDIF : ILFormat< (outs), (ins),
+ "ENDIF", []>;
+ def WHILELOOP : ILFormat< (outs), (ins),
+ "WHILE", []>;
+ def ENDLOOP : ILFormat< (outs), (ins),
+ "ENDLOOP", []>;
+ def FUNC : ILFormat< (outs), (ins),
+ "FUNC", []>;
+ def RETDYN : ILFormat< (outs), (ins),
+ "RET_DYN", []>;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
+ // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+ defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
+ defm IFC : BranchInstr2<"IFC">;
+ defm BREAKC : BranchInstr2<"BREAKC">;
+ defm CONTINUEC : BranchInstr2<"CONTINUEC">;
+}
+
+//===----------------------------------------------------------------------===//
+// ISel Patterns
+//===----------------------------------------------------------------------===//
+
+//CNDGE_INT extra pattern
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
+ (i32 R600_Reg32:$src2), COND_GT),
+ (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+>;
+
+// KIL Patterns
+def KILP : Pat <
+ (int_AMDGPU_kilp),
+ (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
+>;
+
+def KIL : Pat <
+ (int_AMDGPU_kill R600_Reg32:$src0),
+ (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
+>;
+
+// SGT Reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
+ (SGT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SGE Reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
+ (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_INT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
+ (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_INT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
+ (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_UINT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
+ (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_UINT reverse args
+def : Pat <
+ (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
+ (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// The next two patterns are special cases for handling 'true if ordered' and
+// 'true if unordered' conditionals. The assumption here is that the behavior of
+// SETE and SNE conforms to the Direct3D 10 rules for floating point values
+// described here:
+// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
+// We assume that SETE returns false when one of the operands is NAN and
+// SNE returns true when on of the operands is NAN
+
+//SETE - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
+ (SETE R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+//SNE - 'true if unordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
+ (SNE R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
+
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>;
+
+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
+
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>;
+
+def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
+def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+
+// bitconvert patterns
+
+def : BitConvert <i32, f32, R600_Reg32>;
+def : BitConvert <f32, i32, R600_Reg32>;
+def : BitConvert <v4f32, v4i32, R600_Reg128>;
+def : BitConvert <v4i32, v4f32, R600_Reg128>;
+
+// DWORDADDR pattern
+def : DwordAddrPat <i32, R600_Reg32>;
+
+} // End isR600toCayman Predicate
diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td
new file mode 100644
index 0000000000..3825bc4d3b
--- /dev/null
+++ b/lib/Target/R600/R600Intrinsics.td
@@ -0,0 +1,32 @@
+//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "R600", isTarget = 1 in {
+ def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_load_input_perspective :
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+ def int_R600_load_input_constant :
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+ def int_R600_load_input_linear :
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+ def int_R600_store_stream_output :
+ Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ def int_R600_store_pixel_color :
+ Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
+ def int_R600_store_pixel_depth :
+ Intrinsic<[], [llvm_float_ty], []>;
+ def int_R600_store_pixel_stencil :
+ Intrinsic<[], [llvm_float_ty], []>;
+ def int_R600_store_pixel_dummy :
+ Intrinsic<[], [], []>;
+}
diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp
new file mode 100644
index 0000000000..4eb5efa19f
--- /dev/null
+++ b/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -0,0 +1,34 @@
+//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
+ : MachineFunctionInfo(),
+ HasLinearInterpolation(false),
+ HasPerspectiveInterpolation(false) {
+ memset(Outputs, 0, sizeof(Outputs));
+ memset(StreamOutputs, 0, sizeof(StreamOutputs));
+ }
+
+unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const {
+ assert(HasPerspectiveInterpolation);
+ return 0;
+}
+
+unsigned R600MachineFunctionInfo::GetIJLinearIndex() const {
+ assert(HasLinearInterpolation);
+ if (HasPerspectiveInterpolation)
+ return 1;
+ else
+ return 0;
+}
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
new file mode 100644
index 0000000000..e97fb5be62
--- /dev/null
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -0,0 +1,39 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINEFUNCTIONINFO_H
+#define R600MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include <vector>
+
+namespace llvm {
+
+class R600MachineFunctionInfo : public MachineFunctionInfo {
+
+public:
+ R600MachineFunctionInfo(const MachineFunction &MF);
+ std::vector<unsigned> ReservedRegs;
+ SDNode *Outputs[16];
+ SDNode *StreamOutputs[64][4];
+ bool HasLinearInterpolation;
+ bool HasPerspectiveInterpolation;
+
+ unsigned GetIJLinearIndex() const;
+ unsigned GetIJPerspectiveIndex() const;
+
+};
+
+} // End llvm namespace
+
+#endif //R600MACHINEFUNCTIONINFO_H
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
new file mode 100644
index 0000000000..a39f83dbac
--- /dev/null
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -0,0 +1,89 @@
+//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600RegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
+
+BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
+
+ Reserved.set(AMDGPU::ZERO);
+ Reserved.set(AMDGPU::HALF);
+ Reserved.set(AMDGPU::ONE);
+ Reserved.set(AMDGPU::ONE_INT);
+ Reserved.set(AMDGPU::NEG_HALF);
+ Reserved.set(AMDGPU::NEG_ONE);
+ Reserved.set(AMDGPU::PV_X);
+ Reserved.set(AMDGPU::ALU_LITERAL_X);
+ Reserved.set(AMDGPU::PREDICATE_BIT);
+ Reserved.set(AMDGPU::PRED_SEL_OFF);
+ Reserved.set(AMDGPU::PRED_SEL_ZERO);
+ Reserved.set(AMDGPU::PRED_SEL_ONE);
+
+ for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
+ E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
+ Reserved.set(*I);
+ }
+
+ for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
+ E = MFI->ReservedRegs.end(); I != E; ++I) {
+ Reserved.set(*I);
+ }
+
+ return Reserved;
+}
+
+const TargetRegisterClass *
+R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
+ switch (rc->getID()) {
+ case AMDGPU::GPRF32RegClassID:
+ case AMDGPU::GPRI32RegClassID:
+ return &AMDGPU::R600_Reg32RegClass;
+ default: return rc;
+ }
+}
+
+unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
+ return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
+}
+
+const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
+ MVT VT) const {
+ switch(VT.SimpleTy) {
+ default:
+ case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
+ }
+}
+
+unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
+ switch (Channel) {
+ default: assert(!"Invalid channel index"); return 0;
+ case 0: return AMDGPU::sel_x;
+ case 1: return AMDGPU::sel_y;
+ case 2: return AMDGPU::sel_z;
+ case 3: return AMDGPU::sel_w;
+ }
+}
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
new file mode 100644
index 0000000000..f9ca918f24
--- /dev/null
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -0,0 +1,55 @@
+//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for R600RegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600REGISTERINFO_H_
+#define R600REGISTERINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+namespace llvm {
+
+class R600TargetMachine;
+class TargetInstrInfo;
+
+struct R600RegisterInfo : public AMDGPURegisterInfo {
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// \param RC is an AMDIL reg class.
+ ///
+ /// \returns the R600 reg class that is equivalent to \p RC.
+ virtual const TargetRegisterClass *getISARegClass(
+ const TargetRegisterClass *RC) const;
+
+ /// \brief get the HW encoding for a register's channel.
+ unsigned getHWRegChan(unsigned reg) const;
+
+ /// \brief get the register class of the specified type to use in the
+ /// CFGStructurizer
+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+
+ /// \returns the sub reg enum value for the given \p Channel
+ /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
+ unsigned getSubRegFromChannel(unsigned Channel) const;
+
+};
+
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
new file mode 100644
index 0000000000..d3d6d25d29
--- /dev/null
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -0,0 +1,107 @@
+
+class R600Reg <string name, bits<16> encoding> : Register<name> {
+ let Namespace = "AMDGPU";
+ let HWEncoding = encoding;
+}
+
+class R600RegWithChan <string name, bits<9> sel, string chan> :
+ Register <name> {
+
+ field bits<2> chan_encoding = !if(!eq(chan, "X"), 0,
+ !if(!eq(chan, "Y"), 1,
+ !if(!eq(chan, "Z"), 2,
+ !if(!eq(chan, "W"), 3, 0))));
+ let HWEncoding{8-0} = sel;
+ let HWEncoding{10-9} = chan_encoding;
+ let Namespace = "AMDGPU";
+}
+
+class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
+ RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
+ let HWEncoding = encoding;
+}
+
+foreach Index = 0-127 in {
+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
+ // 32-bit Temporary Registers
+ def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
+
+ // 32-bit Constant Registers (There are more than 128, this the number
+ // that is currently supported.
+ def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
+ }
+ // 128-bit Temporary Registers
+ def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
+ [!cast<Register>("T"#Index#"_X"),
+ !cast<Register>("T"#Index#"_Y"),
+ !cast<Register>("T"#Index#"_Z"),
+ !cast<Register>("T"#Index#"_W")],
+ Index>;
+}
+
+// Array Base Register holding input in FS
+foreach Index = 448-464 in {
+ def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
+}
+
+
+// Special Registers
+
+def ZERO : R600Reg<"0.0", 248>;
+def ONE : R600Reg<"1.0", 249>;
+def NEG_ONE : R600Reg<"-1.0", 249>;
+def ONE_INT : R600Reg<"1", 250>;
+def HALF : R600Reg<"0.5", 252>;
+def NEG_HALF : R600Reg<"-0.5", 252>;
+def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
+def PV_X : R600Reg<"pv.x", 254>;
+def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
+def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
+def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
+def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
+
+def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "ArrayBase%u", 448, 464))>;
+
+def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave (sequence "C%u_X", 0, 127),
+ (sequence "C%u_Z", 0, 127)),
+ (interleave (sequence "C%u_Y", 0, 127),
+ (sequence "C%u_W", 0, 127))))>;
+
+def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_X", 0, 127))>;
+
+def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_Y", 0, 127))>;
+
+def R600_TReg32_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_Z", 0, 127))>;
+
+def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "T%u_W", 0, 127))>;
+
+def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (interleave
+ (interleave R600_TReg32_X, R600_TReg32_Z),
+ (interleave R600_TReg32_Y, R600_TReg32_W)))>;
+
+def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+ R600_TReg32,
+ R600_CReg32,
+ R600_ArrayBase,
+ ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
+
+def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
+ PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
+
+def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
+ PREDICATE_BIT)>;
+
+def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+ (add (sequence "T%u_XYZW", 0, 127))> {
+ let CopyCost = -1;
+}
diff --git a/lib/Target/R600/R600Schedule.td b/lib/Target/R600/R600Schedule.td
new file mode 100644
index 0000000000..7ede181c51
--- /dev/null
+++ b/lib/Target/R600/R600Schedule.td
@@ -0,0 +1,36 @@
+//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
+// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
+// slot has been removed.
+//
+//===----------------------------------------------------------------------===//
+
+
+def ALU_X : FuncUnit;
+def ALU_Y : FuncUnit;
+def ALU_Z : FuncUnit;
+def ALU_W : FuncUnit;
+def TRANS : FuncUnit;
+
+def AnyALU : InstrItinClass;
+def VecALU : InstrItinClass;
+def TransALU : InstrItinClass;
+
+def R600_EG_Itin : ProcessorItineraries <
+ [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
+ [],
+ [
+ InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
+ InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
+ InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
+ InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
+ ]
+>;
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
new file mode 100644
index 0000000000..f580377418
--- /dev/null
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -0,0 +1,329 @@
+//===-- SIAnnotateControlFlow.cpp - ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Annotates the control flow with hardware specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+namespace {
+
+// Complex types used in this pass
+typedef std::pair<BasicBlock *, Value *> StackEntry;
+typedef SmallVector<StackEntry, 16> StackVector;
+
+// Intrinsic names the control flow is annotated with
+static const char *IfIntrinsic = "llvm.SI.if";
+static const char *ElseIntrinsic = "llvm.SI.else";
+static const char *BreakIntrinsic = "llvm.SI.break";
+static const char *IfBreakIntrinsic = "llvm.SI.if.break";
+static const char *ElseBreakIntrinsic = "llvm.SI.else.break";
+static const char *LoopIntrinsic = "llvm.SI.loop";
+static const char *EndCfIntrinsic = "llvm.SI.end.cf";
+
+class SIAnnotateControlFlow : public FunctionPass {
+
+ static char ID;
+
+ Type *Boolean;
+ Type *Void;
+ Type *Int64;
+ Type *ReturnStruct;
+
+ ConstantInt *BoolTrue;
+ ConstantInt *BoolFalse;
+ UndefValue *BoolUndef;
+ Constant *Int64Zero;
+
+ Constant *If;
+ Constant *Else;
+ Constant *Break;
+ Constant *IfBreak;
+ Constant *ElseBreak;
+ Constant *Loop;
+ Constant *EndCf;
+
+ DominatorTree *DT;
+ StackVector Stack;
+ SSAUpdater PhiInserter;
+
+ bool isTopOfStack(BasicBlock *BB);
+
+ Value *popSaved();
+
+ void push(BasicBlock *BB, Value *Saved);
+
+ bool isElse(PHINode *Phi);
+
+ void eraseIfUnused(PHINode *Phi);
+
+ void openIf(BranchInst *Term);
+
+ void insertElse(BranchInst *Term);
+
+ void handleLoopCondition(Value *Cond);
+
+ void handleLoop(BranchInst *Term);
+
+ void closeControlFlow(BasicBlock *BB);
+
+public:
+ SIAnnotateControlFlow():
+ FunctionPass(ID) { }
+
+ virtual bool doInitialization(Module &M);
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual const char *getPassName() const {
+ return "SI annotate control flow";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+};
+
+} // end anonymous namespace
+
+char SIAnnotateControlFlow::ID = 0;
+
+/// \brief Initialize all the types and constants used in the pass
+bool SIAnnotateControlFlow::doInitialization(Module &M) {
+ LLVMContext &Context = M.getContext();
+
+ Void = Type::getVoidTy(Context);
+ Boolean = Type::getInt1Ty(Context);
+ Int64 = Type::getInt64Ty(Context);
+ ReturnStruct = StructType::get(Boolean, Int64, (Type *)0);
+
+ BoolTrue = ConstantInt::getTrue(Context);
+ BoolFalse = ConstantInt::getFalse(Context);
+ BoolUndef = UndefValue::get(Boolean);
+ Int64Zero = ConstantInt::get(Int64, 0);
+
+ If = M.getOrInsertFunction(
+ IfIntrinsic, ReturnStruct, Boolean, (Type *)0);
+
+ Else = M.getOrInsertFunction(
+ ElseIntrinsic, ReturnStruct, Int64, (Type *)0);
+
+ Break = M.getOrInsertFunction(
+ BreakIntrinsic, Int64, Int64, (Type *)0);
+
+ IfBreak = M.getOrInsertFunction(
+ IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)0);
+
+ ElseBreak = M.getOrInsertFunction(
+ ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)0);
+
+ Loop = M.getOrInsertFunction(
+ LoopIntrinsic, Boolean, Int64, (Type *)0);
+
+ EndCf = M.getOrInsertFunction(
+ EndCfIntrinsic, Void, Int64, (Type *)0);
+
+ return false;
+}
+
+/// \brief Is BB the last block saved on the stack ?
+bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) {
+ return Stack.back().first == BB;
+}
+
+/// \brief Pop the last saved value from the control flow stack
+Value *SIAnnotateControlFlow::popSaved() {
+ return Stack.pop_back_val().second;
+}
+
+/// \brief Push a BB and saved value to the control flow stack
+void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) {
+ Stack.push_back(std::make_pair(BB, Saved));
+}
+
+/// \brief Can the condition represented by this PHI node treated like
+/// an "Else" block?
+bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
+ BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock();
+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+ if (Phi->getIncomingBlock(i) == IDom) {
+
+ if (Phi->getIncomingValue(i) != BoolTrue)
+ return false;
+
+ } else {
+ if (Phi->getIncomingValue(i) != BoolFalse)
+ return false;
+
+ }
+ }
+ return true;
+}
+
+// \brief Erase "Phi" if it is not used any more
+void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
+ if (!Phi->hasNUsesOrMore(1))
+ Phi->eraseFromParent();
+}
+
+/// \brief Open a new "If" block
+void SIAnnotateControlFlow::openIf(BranchInst *Term) {
+ Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
+ Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
+ push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+}
+
+/// \brief Close the last "If" block and open a new "Else" block
+void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
+ Value *Ret = CallInst::Create(Else, popSaved(), "", Term);
+ Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
+ push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+}
+
+/// \brief Recursively handle the condition leading to a loop
+void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) {
+ if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
+
+ // Handle all non constant incoming values first
+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = Phi->getIncomingValue(i);
+ if (isa<ConstantInt>(Incoming))
+ continue;
+
+ Phi->setIncomingValue(i, BoolFalse);
+ handleLoopCondition(Incoming);
+ }
+
+ BasicBlock *Parent = Phi->getParent();
+ BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
+
+ for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+
+ Value *Incoming = Phi->getIncomingValue(i);
+ if (Incoming != BoolTrue)
+ continue;
+
+ BasicBlock *From = Phi->getIncomingBlock(i);
+ if (From == IDom) {
+ CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
+ if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
+ Value *Args[] = {
+ OldEnd->getArgOperand(0),
+ PhiInserter.GetValueAtEndOfBlock(Parent)
+ };
+ Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
+ PhiInserter.AddAvailableValue(Parent, Ret);
+ continue;
+ }
+ }
+
+ TerminatorInst *Insert = From->getTerminator();
+ Value *Arg = PhiInserter.GetValueAtEndOfBlock(From);
+ Value *Ret = CallInst::Create(Break, Arg, "", Insert);
+ PhiInserter.AddAvailableValue(From, Ret);
+ }
+ eraseIfUnused(Phi);
+
+ } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
+ BasicBlock *Parent = Inst->getParent();
+ TerminatorInst *Insert = Parent->getTerminator();
+ Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) };
+ Value *Ret = CallInst::Create(IfBreak, Args, "", Insert);
+ PhiInserter.AddAvailableValue(Parent, Ret);
+
+ } else {
+ assert(0 && "Unhandled loop condition!");
+ }
+}
+
+/// \brief Handle a back edge (loop)
+void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
+ BasicBlock *Target = Term->getSuccessor(1);
+ PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
+
+ PhiInserter.Initialize(Int64, "");
+ PhiInserter.AddAvailableValue(Target, Broken);
+
+ Value *Cond = Term->getCondition();
+ Term->setCondition(BoolTrue);
+ handleLoopCondition(Cond);
+
+ BasicBlock *BB = Term->getParent();
+ Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB);
+ for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
+ PI != PE; ++PI) {
+
+ Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI);
+ }
+
+ Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
+ push(Term->getSuccessor(0), Arg);
+}
+
+/// \brief Close the last opened control flow
+void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
+ CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
+}
+
+/// \brief Annotate the control flow with intrinsics so the backend can
+/// recognize if/then/else and loops.
+bool SIAnnotateControlFlow::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTree>();
+
+ for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
+ E = df_end(&F.getEntryBlock()); I != E; ++I) {
+
+ BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator());
+
+ if (!Term || Term->isUnconditional()) {
+ if (isTopOfStack(*I))
+ closeControlFlow(*I);
+ continue;
+ }
+
+ if (I.nodeVisited(Term->getSuccessor(1))) {
+ if (isTopOfStack(*I))
+ closeControlFlow(*I);
+ handleLoop(Term);
+ continue;
+ }
+
+ if (isTopOfStack(*I)) {
+ PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
+ if (Phi && Phi->getParent() == *I && isElse(Phi)) {
+ insertElse(Term);
+ eraseIfUnused(Phi);
+ continue;
+ }
+ closeControlFlow(*I);
+ }
+ openIf(Term);
+ }
+
+ assert(Stack.empty());
+ return true;
+}
+
+/// \brief Create the annotation pass
+FunctionPass *llvm::createSIAnnotateControlFlowPass() {
+ return new SIAnnotateControlFlow();
+}
diff --git a/lib/Target/R600/SIAssignInterpRegs.cpp b/lib/Target/R600/SIAssignInterpRegs.cpp
new file mode 100644
index 0000000000..832e44d766
--- /dev/null
+++ b/lib/Target/R600/SIAssignInterpRegs.cpp
@@ -0,0 +1,152 @@
+//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass maps the pseudo interpolation registers to the correct physical
+/// registers.
+//
+/// Prior to executing a fragment shader, the GPU loads interpolation
+/// parameters into physical registers. The specific physical register that each
+/// interpolation parameter ends up in depends on the type of the interpolation
+/// parameter as well as how many interpolation parameters are used by the
+/// shader.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+#include "AMDGPU.h"
+#include "AMDIL.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class SIAssignInterpRegsPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ TargetMachine &TM;
+
+ void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
+ unsigned physReg, unsigned virtReg);
+
+public:
+ SIAssignInterpRegsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "SI Assign intrpolation registers"; }
+};
+
+} // End anonymous namespace
+
+char SIAssignInterpRegsPass::ID = 0;
+
+#define INTERP_VALUES 16
+#define REQUIRED_VALUE_MAX_INDEX 7
+
+struct InterpInfo {
+ bool Enabled;
+ unsigned Regs[3];
+ unsigned RegCount;
+};
+
+
+FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
+ return new SIAssignInterpRegsPass(tm);
+}
+
+bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) {
+
+ struct InterpInfo InterpUse[INTERP_VALUES] = {
+ {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
+ {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
+ {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
+ {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
+ {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
+ {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
+ {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
+ {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
+ {false, {AMDGPU::POS_X_FLOAT}, 1},
+ {false, {AMDGPU::POS_Y_FLOAT}, 1},
+ {false, {AMDGPU::POS_Z_FLOAT}, 1},
+ {false, {AMDGPU::POS_W_FLOAT}, 1},
+ {false, {AMDGPU::FRONT_FACE}, 1},
+ {false, {AMDGPU::ANCILLARY}, 1},
+ {false, {AMDGPU::SAMPLE_COVERAGE}, 1},
+ {false, {AMDGPU::POS_FIXED_PT}, 1}
+ };
+
+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+ // This pass is only needed for pixel shaders.
+ if (MFI->ShaderType != ShaderType::PIXEL) {
+ return false;
+ }
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool ForceEnable = true;
+
+ // First pass, mark the interpolation values that are used.
+ for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
+ for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
+ RegIdx++) {
+ InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
+ !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
+ if (InterpUse[InterpIdx].Enabled &&
+ InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
+ ForceEnable = false;
+ }
+ }
+ }
+
+ // At least one interpolation mode must be enabled or else the GPU will hang.
+ if (ForceEnable) {
+ InterpUse[0].Enabled = true;
+ }
+
+ unsigned UsedVgprs = 0;
+
+ // Second pass, replace with VGPRs.
+ for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
+ if (!InterpUse[InterpIdx].Enabled) {
+ continue;
+ }
+ MFI->SPIPSInputAddr |= (1 << InterpIdx);
+
+ for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
+ RegIdx++, UsedVgprs++) {
+ unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
+ unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
+ addLiveIn(&MF, MRI, NewReg, VirtReg);
+ }
+ }
+
+ return false;
+}
+
+void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
+ MachineRegisterInfo & MRI,
+ unsigned physReg, unsigned virtReg) {
+ const TargetInstrInfo * TII = TM.getInstrInfo();
+ if (!MRI.isLiveIn(physReg)) {
+ MRI.addLiveIn(physReg, virtReg);
+ MF->front().addLiveIn(physReg);
+ BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), virtReg)
+ .addReg(physReg);
+ } else {
+ MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
+ }
+}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
new file mode 100644
index 0000000000..4c672ca7c1
--- /dev/null
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -0,0 +1,511 @@
+//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Custom DAG lowering for SI
+//
+//===----------------------------------------------------------------------===//
+
+#include "SIISelLowering.h"
+#include "AMDIL.h"
+#include "AMDILIntrinsicInfo.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+using namespace llvm;
+
+SITargetLowering::SITargetLowering(TargetMachine &TM) :
+ AMDGPUTargetLowering(TM),
+ TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) {
+ addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
+ addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
+ addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
+ addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass);
+ addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
+
+ addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
+ addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
+
+ computeRegisterProperties();
+
+ setOperationAction(ISD::AND, MVT::i1, Custom);
+
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
+ setOperationAction(ISD::ADD, MVT::i32, Legal);
+
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // We need to custom lower loads from the USER_SGPR address space, so we can
+ // add the SGPRs as livein registers.
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setTargetDAGCombine(ISD::SELECT_CC);
+
+ setTargetDAGCombine(ISD::SETCC);
+}
+
+MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr * MI, MachineBasicBlock * BB) const {
+ const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
+ MachineBasicBlock::iterator I = MI;
+
+ if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
+ AppendS_WAITCNT(MI, *BB, llvm::next(I));
+ return BB;
+ }
+
+ switch (MI->getOpcode()) {
+ default:
+ return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case AMDGPU::BRANCH: return BB;
+ case AMDGPU::CLAMP_SI:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ // VSRC1-2 are unused, but we still need to fill all the
+ // operand slots, so we just reuse the VSRC0 operand
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(1))
+ .addImm(0) // ABS
+ .addImm(1) // CLAMP
+ .addImm(0) // OMOD
+ .addImm(0); // NEG
+ MI->eraseFromParent();
+ break;
+
+ case AMDGPU::FABS_SI:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ // VSRC1-2 are unused, but we still need to fill all the
+ // operand slots, so we just reuse the VSRC0 operand
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(1))
+ .addImm(1) // ABS
+ .addImm(0) // CLAMP
+ .addImm(0) // OMOD
+ .addImm(0); // NEG
+ MI->eraseFromParent();
+ break;
+
+ case AMDGPU::FNEG_SI:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ // VSRC1-2 are unused, but we still need to fill all the
+ // operand slots, so we just reuse the VSRC0 operand
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(1))
+ .addImm(0) // ABS
+ .addImm(0) // CLAMP
+ .addImm(0) // OMOD
+ .addImm(1); // NEG
+ MI->eraseFromParent();
+ break;
+ case AMDGPU::SHADER_TYPE:
+ BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
+ MI->getOperand(0).getImm();
+ MI->eraseFromParent();
+ break;
+
+ case AMDGPU::SI_INTERP:
+ LowerSI_INTERP(MI, *BB, I, MRI);
+ break;
+ case AMDGPU::SI_INTERP_CONST:
+ LowerSI_INTERP_CONST(MI, *BB, I, MRI);
+ break;
+ case AMDGPU::SI_KIL:
+ LowerSI_KIL(MI, *BB, I, MRI);
+ break;
+ case AMDGPU::SI_WQM:
+ LowerSI_WQM(MI, *BB, I, MRI);
+ break;
+ case AMDGPU::SI_V_CNDLT:
+ LowerSI_V_CNDLT(MI, *BB, I, MRI);
+ break;
+ }
+ return BB;
+}
+
+void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I) const {
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(0);
+}
+
+
+void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC);
+
+ MI->eraseFromParent();
+}
+
+void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
+ unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
+ MachineOperand dst = MI->getOperand(0);
+ MachineOperand iReg = MI->getOperand(1);
+ MachineOperand jReg = MI->getOperand(2);
+ MachineOperand attr_chan = MI->getOperand(3);
+ MachineOperand attr = MI->getOperand(4);
+ MachineOperand params = MI->getOperand(5);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
+ .addOperand(params);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
+ .addOperand(iReg)
+ .addOperand(attr_chan)
+ .addOperand(attr)
+ .addReg(M0);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
+ .addOperand(dst)
+ .addReg(tmp)
+ .addOperand(jReg)
+ .addOperand(attr_chan)
+ .addOperand(attr)
+ .addReg(M0);
+
+ MI->eraseFromParent();
+}
+
+void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
+ MachineBasicBlock &BB, MachineBasicBlock::iterator I,
+ MachineRegisterInfo &MRI) const {
+ MachineOperand dst = MI->getOperand(0);
+ MachineOperand attr_chan = MI->getOperand(1);
+ MachineOperand attr = MI->getOperand(2);
+ MachineOperand params = MI->getOperand(3);
+ unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
+ .addOperand(params);
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
+ .addOperand(dst)
+ .addOperand(attr_chan)
+ .addOperand(attr)
+ .addReg(M0);
+
+ MI->eraseFromParent();
+}
+
+void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
+ // Clear this pixel from the exec mask if the operand is negative
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
+ AMDGPU::VCC)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addOperand(MI->getOperand(0));
+
+ MI->eraseFromParent();
+}
+
+void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
+ unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+ BuildMI(BB, I, BB.findDebugLoc(I),
+ TII->get(AMDGPU::V_CMP_GT_F32_e32),
+ VCC)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addOperand(MI->getOperand(1));
+
+ BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(2))
+ .addReg(VCC);
+
+ MI->eraseFromParent();
+}
+
+EVT SITargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ EVT VT = Op.getValueType();
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::SI_vs_load_buffer_index:
+ return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
+ AMDGPU::VGPR0, VT);
+ default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ }
+ break;
+ }
+ }
+ return SDValue();
+}
+
+/// \brief The function is for lowering i1 operations on the
+/// VCC register.
+///
+/// In the VALU context, VCC is a one bit register, but in the
+/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
+/// the SALU can perform operations on the VCC register, we need to promote
+/// the operand types from i1 to i64 in order for tablegen to be able to match
+/// this operation to the correct SALU instruction. We do this promotion by
+/// wrapping the operands in a CopyToReg node.
+///
+SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
+ SelectionDAG &DAG,
+ unsigned VCCNode) const {
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64,
+ DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
+ Op.getOperand(0)),
+ DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
+ Op.getOperand(1)));
+
+ return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
+}
+
+/// \brief Helper function for LowerBRCOND
+static SDNode *findUser(SDValue Value, unsigned Opcode) {
+
+ SDNode *Parent = Value.getNode();
+ for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
+ I != E; ++I) {
+
+ if (I.getUse().get() != Value)
+ continue;
+
+ if (I->getOpcode() == Opcode)
+ return *I;
+ }
+ return 0;
+}
+
+/// This transforms the control flow intrinsics to get the branch destination as
+/// last parameter, also switches branch target with BR if the need arise
+SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
+ SelectionDAG &DAG) const {
+
+ DebugLoc DL = BRCOND.getDebugLoc();
+
+ SDNode *Intr = BRCOND.getOperand(1).getNode();
+ SDValue Target = BRCOND.getOperand(2);
+ SDNode *BR = 0;
+
+ if (Intr->getOpcode() == ISD::SETCC) {
+ // As long as we negate the condition everything is fine
+ SDNode *SetCC = Intr;
+ assert(SetCC->getConstantOperandVal(1) == 1);
+ assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
+ ISD::SETNE);
+ Intr = SetCC->getOperand(0).getNode();
+
+ } else {
+ // Get the target from BR if we don't negate the condition
+ BR = findUser(BRCOND, ISD::BR);
+ Target = BR->getOperand(1);
+ }
+
+ assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+
+ // Build the result and
+ SmallVector<EVT, 4> Res;
+ for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i)
+ Res.push_back(Intr->getValueType(i));
+
+ // operands of the new intrinsic call
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(BRCOND.getOperand(0));
+ for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i)
+ Ops.push_back(Intr->getOperand(i));
+ Ops.push_back(Target);
+
+ // build the new intrinsic call
+ SDNode *Result = DAG.getNode(
+ Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
+ DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode();
+
+ if (BR) {
+ // Give the branch instruction our target
+ SDValue Ops[] = {
+ BR->getOperand(0),
+ BRCOND.getOperand(2)
+ };
+ DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2);
+ }
+
+ SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
+
+ // Copy the intrinsic results to registers
+ for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
+ SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
+ if (!CopyToReg)
+ continue;
+
+ Chain = DAG.getCopyToReg(
+ Chain, DL,
+ CopyToReg->getOperand(1),
+ SDValue(Result, i - 1),
+ SDValue());
+
+ DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
+ }
+
+ // Remove the old intrinsic from the chain
+ DAG.ReplaceAllUsesOfValueWith(
+ SDValue(Intr, Intr->getNumValues() - 1),
+ Intr->getOperand(0));
+
+ return Chain;
+}
+
+SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
+
+ assert(Ptr);
+
+ unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
+
+ // We only need to lower USER_SGPR address space loads
+ if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
+ return SDValue();
+ }
+
+ // Loads from the USER_SGPR address space can only have constant value
+ // pointers.
+ ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
+ assert(BasePtr);
+
+ unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
+ const TargetRegisterClass * dstClass;
+ switch (TypeDwordWidth) {
+ default:
+ assert(!"USER_SGPR value size not implemented");
+ return SDValue();
+ case 1:
+ dstClass = &AMDGPU::SReg_32RegClass;
+ break;
+ case 2:
+ dstClass = &AMDGPU::SReg_64RegClass;
+ break;
+ }
+ uint64_t Index = BasePtr->getZExtValue();
+ assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
+ unsigned SGPRIndex = Index / TypeDwordWidth;
+ unsigned Reg = dstClass->getRegister(SGPRIndex);
+
+ DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
+ VT));
+ return SDValue();
+}
+
+SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // Possible Min/Max pattern
+ SDValue MinMax = LowerMinMax(Op, DAG);
+ if (MinMax.getNode()) {
+ return MinMax;
+ }
+
+ SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::SELECT_CC: {
+ N->dump();
+ ConstantSDNode *True, *False;
+ // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
+ if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ && (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ && True->isAllOnesValue()
+ && False->isNullValue()
+ && VT == MVT::i1) {
+ return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(4));
+
+ }
+ break;
+ }
+ case ISD::SETCC: {
+ SDValue Arg0 = N->getOperand(0);
+ SDValue Arg1 = N->getOperand(1);
+ SDValue CC = N->getOperand(2);
+ ConstantSDNode * C = NULL;
+ ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
+
+ // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
+ if (VT == MVT::i1
+ && Arg0.getOpcode() == ISD::SIGN_EXTEND
+ && Arg0.getOperand(0).getValueType() == MVT::i1
+ && (C = dyn_cast<ConstantSDNode>(Arg1))
+ && C->isNullValue()
+ && CCOp == ISD::SETNE) {
+ return SimplifySetCC(VT, Arg0.getOperand(0),
+ DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
+ }
+ break;
+ }
+ }
+ return SDValue();
+}
+
+#define NODE_NAME_CASE(node) case SIISD::node: return #node;
+
+const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return AMDGPUTargetLowering::getTargetNodeName(Opcode);
+ NODE_NAME_CASE(VCC_AND)
+ NODE_NAME_CASE(VCC_BITCAST)
+ }
+}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
new file mode 100644
index 0000000000..c088112652
--- /dev/null
+++ b/lib/Target/R600/SIISelLowering.h
@@ -0,0 +1,62 @@
+//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SIISELLOWERING_H
+#define SIISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+#include "SIInstrInfo.h"
+
+namespace llvm {
+
+class SITargetLowering : public AMDGPUTargetLowering {
+ const SIInstrInfo * TII;
+
+ /// Memory reads and writes are syncronized using the S_WAITCNT instruction.
+ /// This function takes the most conservative approach and inserts an
+ /// S_WAITCNT instruction after every read and write.
+ void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I) const;
+ void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, unsigned Opocde) const;
+ void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
+ void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
+ void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
+ void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
+ void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
+
+ SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
+ unsigned VCCNode) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+
+public:
+ SITargetLowering(TargetMachine &tm);
+ virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
+ MachineBasicBlock * BB) const;
+ virtual EVT getSetCCResultType(EVT VT) const;
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ virtual const char* getTargetNodeName(unsigned Opcode) const;
+};
+
+} // End namespace llvm
+
+#endif //SIISELLOWERING_H
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
new file mode 100644
index 0000000000..aea3b5a888
--- /dev/null
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -0,0 +1,146 @@
+//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Instruction format definitions.
+//
+// Instructions with _32 take 32-bit operands.
+// Instructions with _64 take 64-bit operands.
+//
+// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
+// encoding is the standard encoding, but instruction that make use of
+// any of the instruction modifiers must use the 64-bit encoding.
+//
+// Instructions with _e32 use the 32-bit encoding.
+// Instructions with _e64 use the 64-bit encoding.
+//
+//===----------------------------------------------------------------------===//
+
+class VOP3b_2IN <bits<9> op, string opName, RegisterClass dstClass,
+ RegisterClass src0Class, RegisterClass src1Class,
+ list<dag> pattern>
+ : VOP3b <op, (outs dstClass:$vdst),
+ (ins src0Class:$src0, src1Class:$src1, InstFlag:$src2, InstFlag:$sdst,
+ InstFlag:$omod, InstFlag:$neg),
+ opName, pattern
+>;
+
+
+class VOP3_1_32 <bits<9> op, string opName, list<dag> pattern>
+ : VOP3b_2IN <op, opName, SReg_1, AllReg_32, VReg_32, pattern>;
+
+class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
+ : VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
+
+class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
+ : VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, VReg_64:$src1, VReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
+
+
+class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
+ : SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
+
+class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
+ : SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
+
+class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
+ : SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
+
+class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
+ : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
+
+class SOP2_VCC <bits<7> op, string opName, list<dag> pattern>
+ : SOP2 <op, (outs SReg_1:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
+
+class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern> :
+ VOP1 <
+ op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
+ >;
+
+multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
+ def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
+ def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ opName, []
+ >;
+}
+
+multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
+
+ def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
+
+ def _e64 : VOP3_64 <
+ {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ opName, []
+ >;
+}
+
+class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern> :
+ VOP2 <
+ op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
+ >;
+
+multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
+
+ def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
+
+ def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ opName, []
+ >;
+}
+
+multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
+ def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
+
+ def _e64 : VOP3_64 <
+ {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ opName, []
+ >;
+}
+
+class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
+ : SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
+
+class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
+ : SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
+
+class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern> :
+ VOPC <
+ op, (ins arc:$src0, vrc:$src1), opName, pattern
+ >;
+
+multiclass VOPC_32 <bits<9> op, string opName, list<dag> pattern> {
+
+ def _e32 : VOPC_Helper <
+ {op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ VReg_32, AllReg_32, opName, pattern
+ >;
+
+ def _e64 : VOP3_1_32 <
+ op,
+ opName, pattern
+ >;
+}
+
+multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
+
+ def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
+
+ def _e64 : VOP3_64 <
+ {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ opName, []
+ >;
+}
+
+class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
+ : SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
+
+class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
+ : SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
+
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
new file mode 100644
index 0000000000..c6ad4d531d
--- /dev/null
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -0,0 +1,89 @@
+//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIInstrInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include <stdio.h>
+
+using namespace llvm;
+
+SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
+ : AMDGPUInstrInfo(tm),
+ RI(tm, *this)
+ { }
+
+const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
+ return RI;
+}
+
+void
+SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // If we are trying to copy to or from SCC, there is a bug somewhere else in
+ // the backend. While it may be theoretically possible to do this, it should
+ // never be necessary.
+ assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
+
+ if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
+ assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ assert(AMDGPU::SReg_32RegClass.contains(DestReg));
+ assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ }
+}
+
+MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const {
+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
+ MachineInstrBuilder MIB(*MF, MI);
+ MIB.addReg(DstReg, RegState::Define);
+ MIB.addImm(Imm);
+
+ return MI;
+
+}
+
+bool SIInstrInfo::isMov(unsigned Opcode) const {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_IMM_F32:
+ case AMDGPU::V_MOV_IMM_I32:
+ case AMDGPU::S_MOV_IMM_I32:
+ return true;
+ }
+}
+
+bool
+SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ return RC != &AMDGPU::EXECRegRegClass;
+}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
new file mode 100644
index 0000000000..631f6c00cc
--- /dev/null
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -0,0 +1,62 @@
+//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for SIInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIINSTRINFO_H
+#define SIINSTRINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "SIRegisterInfo.h"
+
+namespace llvm {
+
+class SIInstrInfo : public AMDGPUInstrInfo {
+private:
+ const SIRegisterInfo RI;
+
+public:
+ explicit SIInstrInfo(AMDGPUTargetMachine &tm);
+
+ const SIRegisterInfo &getRegisterInfo() const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ /// \returns the encoding type of this instruction.
+ unsigned getEncodingType(const MachineInstr &MI) const;
+
+ /// \returns the size of this instructions encoding in number of bytes.
+ unsigned getEncodingBytes(const MachineInstr &MI) const;
+
+ virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+ int64_t Imm) const;
+
+ virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
+ virtual bool isMov(unsigned Opcode) const;
+
+ virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+ };
+
+} // End namespace llvm
+
+namespace SIInstrFlags {
+ enum Flags {
+ // First 4 bits are the instruction encoding
+ NEED_WAIT = 1 << 4
+ };
+}
+
+#endif //SIINSTRINFO_H
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
new file mode 100644
index 0000000000..873a451e99
--- /dev/null
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -0,0 +1,589 @@
+//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SI DAG Profiles
+//===----------------------------------------------------------------------===//
+def SDTVCCBinaryOp : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
+]>;
+
+//===----------------------------------------------------------------------===//
+// SI DAG Nodes
+//===----------------------------------------------------------------------===//
+
+// and operation on 64-bit wide vcc
+def SIsreg1_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// Special bitcast node for sharing VCC register between VALU and SALU
+def SIsreg1_bitcast : SDNode<"SIISD::VCC_BITCAST",
+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
+>;
+
+// and operation on 64-bit wide vcc
+def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// Special bitcast node for sharing VCC register between VALU and SALU
+def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST",
+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
+>;
+
+class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
+ AMDGPUInst<outs, ins, asm, pattern> {
+
+ field bits<4> EncodingType = 0;
+ field bits<1> NeedWait = 0;
+
+ let TSFlags{3-0} = EncodingType;
+ let TSFlags{4} = NeedWait;
+
+}
+
+class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+
+ field bits<32> Inst;
+}
+
+class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
+
+ field bits<64> Inst;
+}
+
+class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
+ let EncoderMethod = "encodeOperand";
+ let MIOperandInfo = opInfo;
+}
+
+def IMM16bit : ImmLeaf <
+ i16,
+ [{return isInt<16>(Imm);}]
+>;
+
+def IMM8bit : ImmLeaf <
+ i32,
+ [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
+>;
+
+def IMM12bit : ImmLeaf <
+ i16,
+ [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
+>;
+
+def IMM32bitIn64bit : ImmLeaf <
+ i64,
+ [{return isInt<32>(Imm);}]
+>;
+
+class GPR4Align <RegisterClass rc> : Operand <vAny> {
+ let EncoderMethod = "GPR4AlignEncode";
+ let MIOperandInfo = (ops rc:$reg);
+}
+
+class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
+ let EncoderMethod = "GPR2AlignEncode";
+ let MIOperandInfo = (ops rc:$reg);
+}
+
+def SMRDmemrr : Operand<iPTR> {
+ let MIOperandInfo = (ops SReg_64, SReg_32);
+ let EncoderMethod = "GPR2AlignEncode";
+}
+
+def SMRDmemri : Operand<iPTR> {
+ let MIOperandInfo = (ops SReg_64, i32imm);
+ let EncoderMethod = "SMRDmemriEncode";
+}
+
+def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>;
+def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>;
+
+let Uses = [EXEC] in {
+
+def EXP : Enc64<
+ (outs),
+ (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+ "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
+ [] > {
+
+ bits<4> EN;
+ bits<6> TGT;
+ bits<1> COMPR;
+ bits<1> DONE;
+ bits<1> VM;
+ bits<8> VSRC0;
+ bits<8> VSRC1;
+ bits<8> VSRC2;
+ bits<8> VSRC3;
+
+ let Inst{3-0} = EN;
+ let Inst{9-4} = TGT;
+ let Inst{10} = COMPR;
+ let Inst{11} = DONE;
+ let Inst{12} = VM;
+ let Inst{31-26} = 0x3e;
+ let Inst{39-32} = VSRC0;
+ let Inst{47-40} = VSRC1;
+ let Inst{55-48} = VSRC2;
+ let Inst{63-56} = VSRC3;
+ let EncodingType = 0; //SIInstrEncodingType::EXP
+
+ let NeedWait = 1;
+ let usesCustomInserter = 1;
+}
+
+class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<4> DMASK;
+ bits<1> UNORM;
+ bits<1> GLC;
+ bits<1> DA;
+ bits<1> R128;
+ bits<1> TFE;
+ bits<1> LWE;
+ bits<1> SLC;
+ bits<8> VADDR;
+ bits<5> SRSRC;
+ bits<5> SSAMP;
+
+ let Inst{11-8} = DMASK;
+ let Inst{12} = UNORM;
+ let Inst{13} = GLC;
+ let Inst{14} = DA;
+ let Inst{15} = R128;
+ let Inst{16} = TFE;
+ let Inst{17} = LWE;
+ let Inst{24-18} = op;
+ let Inst{25} = SLC;
+ let Inst{31-26} = 0x3c;
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC;
+ let Inst{57-53} = SSAMP;
+
+ let EncodingType = 2; //SIInstrEncodingType::MIMG
+
+ let NeedWait = 1;
+ let usesCustomInserter = 1;
+}
+
+class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64<outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<12> OFFSET;
+ bits<1> OFFEN;
+ bits<1> IDXEN;
+ bits<1> GLC;
+ bits<1> ADDR64;
+ bits<4> DFMT;
+ bits<3> NFMT;
+ bits<8> VADDR;
+ bits<5> SRSRC;
+ bits<1> SLC;
+ bits<1> TFE;
+ bits<8> SOFFSET;
+
+ let Inst{11-0} = OFFSET;
+ let Inst{12} = OFFEN;
+ let Inst{13} = IDXEN;
+ let Inst{14} = GLC;
+ let Inst{15} = ADDR64;
+ let Inst{18-16} = op;
+ let Inst{22-19} = DFMT;
+ let Inst{25-23} = NFMT;
+ let Inst{31-26} = 0x3a; //encoding
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC;
+ let Inst{54} = SLC;
+ let Inst{55} = TFE;
+ let Inst{63-56} = SOFFSET;
+ let EncodingType = 3; //SIInstrEncodingType::MTBUF
+
+ let NeedWait = 1;
+ let usesCustomInserter = 1;
+ let neverHasSideEffects = 1;
+}
+
+class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64<outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<12> OFFSET;
+ bits<1> OFFEN;
+ bits<1> IDXEN;
+ bits<1> GLC;
+ bits<1> ADDR64;
+ bits<1> LDS;
+ bits<8> VADDR;
+ bits<5> SRSRC;
+ bits<1> SLC;
+ bits<1> TFE;
+ bits<8> SOFFSET;
+
+ let Inst{11-0} = OFFSET;
+ let Inst{12} = OFFEN;
+ let Inst{13} = IDXEN;
+ let Inst{14} = GLC;
+ let Inst{15} = ADDR64;
+ let Inst{16} = LDS;
+ let Inst{24-18} = op;
+ let Inst{31-26} = 0x38; //encoding
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC;
+ let Inst{54} = SLC;
+ let Inst{55} = TFE;
+ let Inst{63-56} = SOFFSET;
+ let EncodingType = 4; //SIInstrEncodingType::MUBUF
+
+ let NeedWait = 1;
+ let usesCustomInserter = 1;
+ let neverHasSideEffects = 1;
+}
+
+} // End Uses = [EXEC]
+
+class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<15> PTR;
+ bits<8> OFFSET = PTR{7-0};
+ bits<1> IMM = PTR{8};
+ bits<6> SBASE = PTR{14-9};
+
+ let Inst{7-0} = OFFSET;
+ let Inst{8} = IMM;
+ let Inst{14-9} = SBASE;
+ let Inst{21-15} = SDST;
+ let Inst{26-22} = op;
+ let Inst{31-27} = 0x18; //encoding
+ let EncodingType = 5; //SIInstrEncodingType::SMRD
+
+ let NeedWait = 1;
+ let usesCustomInserter = 1;
+}
+
+class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<8> SSRC0;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = op;
+ let Inst{22-16} = SDST;
+ let Inst{31-23} = 0x17d; //encoding;
+ let EncodingType = 6; //SIInstrEncodingType::SOP1
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<8> SSRC0;
+ bits<8> SSRC1;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = SSRC1;
+ let Inst{22-16} = SDST;
+ let Inst{29-23} = op;
+ let Inst{31-30} = 0x2; // encoding
+ let EncodingType = 7; // SIInstrEncodingType::SOP2
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
+
+ bits<8> SSRC0;
+ bits<8> SSRC1;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = SSRC1;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17e;
+ let EncodingType = 8; // SIInstrEncodingType::SOPC
+
+ let DisableEncoding = "$dst";
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins , asm, pattern> {
+
+ bits <7> SDST;
+ bits <16> SIMM16;
+
+ let Inst{15-0} = SIMM16;
+ let Inst{22-16} = SDST;
+ let Inst{27-23} = op;
+ let Inst{31-28} = 0xb; //encoding
+ let EncodingType = 9; // SIInstrEncodingType::SOPK
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
+ (outs),
+ ins,
+ asm,
+ pattern > {
+
+ bits <16> SIMM16;
+
+ let Inst{15-0} = SIMM16;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17f; // encoding
+ let EncodingType = 10; // SIInstrEncodingType::SOPP
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+let Uses = [EXEC] in {
+
+class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<8> VSRC;
+ bits<2> ATTRCHAN;
+ bits<6> ATTR;
+
+ let Inst{7-0} = VSRC;
+ let Inst{9-8} = ATTRCHAN;
+ let Inst{15-10} = ATTR;
+ let Inst{17-16} = op;
+ let Inst{25-18} = VDST;
+ let Inst{31-26} = 0x32; // encoding
+ let EncodingType = 11; // SIInstrEncodingType::VINTRP
+
+ let neverHasSideEffects = 1;
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = op;
+ let Inst{24-17} = VDST;
+ let Inst{31-25} = 0x3f; //encoding
+
+ let EncodingType = 12; // SIInstrEncodingType::VOP1
+ let PostEncoderMethod = "VOPPostEncode";
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<8> VSRC1;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = VSRC1;
+ let Inst{24-17} = VDST;
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; //encoding
+
+ let EncodingType = 13; // SIInstrEncodingType::VOP2
+ let PostEncoderMethod = "VOPPostEncode";
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<9> SRC1;
+ bits<9> SRC2;
+ bits<3> ABS;
+ bits<1> CLAMP;
+ bits<2> OMOD;
+ bits<3> NEG;
+
+ let Inst{7-0} = VDST;
+ let Inst{10-8} = ABS;
+ let Inst{11} = CLAMP;
+ let Inst{25-17} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = SRC0;
+ let Inst{49-41} = SRC1;
+ let Inst{58-50} = SRC2;
+ let Inst{60-59} = OMOD;
+ let Inst{63-61} = NEG;
+
+ let EncodingType = 14; // SIInstrEncodingType::VOP3
+ let PostEncoderMethod = "VOPPostEncode";
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<9> SRC1;
+ bits<9> SRC2;
+ bits<7> SDST;
+ bits<2> OMOD;
+ bits<3> NEG;
+
+ let Inst{7-0} = VDST;
+ let Inst{14-8} = SDST;
+ let Inst{25-17} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = SRC0;
+ let Inst{49-41} = SRC1;
+ let Inst{58-50} = SRC2;
+ let Inst{60-59} = OMOD;
+ let Inst{63-61} = NEG;
+
+ let EncodingType = 14; // SIInstrEncodingType::VOP3
+ let PostEncoderMethod = "VOPPostEncode";
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
+ Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
+
+ bits<9> SRC0;
+ bits<8> VSRC1;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = VSRC1;
+ let Inst{24-17} = op;
+ let Inst{31-25} = 0x3e;
+
+ let EncodingType = 15; //SIInstrEncodingType::VOPC
+ let PostEncoderMethod = "VOPPostEncode";
+ let DisableEncoding = "$dst";
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+} // End Uses = [EXEC]
+
+class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
+ op,
+ (outs VReg_128:$vdata),
+ (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr,
+ GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
+ asm,
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
+ op,
+ (outs regClass:$dst),
+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+ i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
+ i1imm:$tfe, SReg_32:$soffset),
+ asm,
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
+ op,
+ (outs regClass:$dst),
+ (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+ i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
+ i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
+ asm,
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
+ op,
+ (outs),
+ (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
+ i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
+ GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
+ asm,
+ []> {
+ let mayStore = 1;
+ let mayLoad = 0;
+}
+
+multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass,
+ ValueType vt> {
+ def _IMM : SMRD <
+ op,
+ (outs dstClass:$dst),
+ (ins SMRDmemri:$src0),
+ asm,
+ [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))]
+ >;
+
+ def _SGPR : SMRD <
+ op,
+ (outs dstClass:$dst),
+ (ins SMRDmemrr:$src0),
+ asm,
+ [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))]
+ >;
+}
+
+multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> {
+ defm _F32 : SMRD_Helper <op, asm, dstClass, f32>;
+ defm _I32 : SMRD_Helper <op, asm, dstClass, i32>;
+}
+
+include "SIInstrFormats.td"
+include "SIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
new file mode 100644
index 0000000000..005be96645
--- /dev/null
+++ b/lib/Target/R600/SIInstructions.td
@@ -0,0 +1,1351 @@
+//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file was originally auto-generated from a GPU register header file and
+// all the instruction definitions were originally commented out. Instructions
+// that are not yet supported remain commented out.
+//===----------------------------------------------------------------------===//
+
+def isSI : Predicate<"Subtarget.device()"
+ "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">;
+
+let Predicates = [isSI] in {
+
+let neverHasSideEffects = 1 in {
+def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
+def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
+def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
+def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
+def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>;
+def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
+def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
+def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
+def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
+def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
+} // End neverHasSideEffects = 1
+////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
+////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
+////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
+////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>;
+////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
+////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
+////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
+////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
+//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
+//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
+def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
+//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
+//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
+//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
+////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
+////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
+////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
+////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
+def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>;
+def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
+def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
+def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
+
+let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in {
+
+def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
+def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
+def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
+def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
+def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
+def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
+def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
+def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
+
+} // End hasSideEffects = 1
+
+def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
+def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
+def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
+def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
+def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
+def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
+//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
+def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
+def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
+def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
+def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
+def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
+
+/*
+This instruction is disabled for now until we can figure out how to teach
+the instruction selector to correctly use the S_CMP* vs V_CMP*
+instructions.
+
+When this instruction is enabled the code generator sometimes produces this
+invalid sequence:
+
+SCC = S_CMPK_EQ_I32 SGPR0, imm
+VCC = COPY SCC
+VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
+
+def S_CMPK_EQ_I32 : SOPK <
+ 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
+ "S_CMPK_EQ_I32",
+ [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
+>;
+*/
+
+def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
+def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
+def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
+def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
+def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
+def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
+def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
+def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
+def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
+def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
+def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
+def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
+def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
+//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
+def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
+def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
+def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
+//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
+//def EXP : EXP_ <0x00000000, "EXP", []>;
+
+defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>;
+defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", []>;
+def : Pat <
+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT)),
+ (V_CMP_LT_F32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", []>;
+def : Pat <
+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)),
+ (V_CMP_EQ_F32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", []>;
+def : Pat <
+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE)),
+ (V_CMP_LE_F32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", []>;
+def : Pat <
+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT)),
+ (V_CMP_GT_F32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", []>;
+def : Pat <
+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)),
+ (V_CMP_LG_F32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", []>;
+def : Pat <
+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE)),
+ (V_CMP_GE_F32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>;
+defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>;
+defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>;
+defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>;
+defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>;
+defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>;
+defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", []>;
+def : Pat <
+ (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)),
+ (V_CMP_NEQ_F32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>;
+defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>;
+
+//Side effect is writing to EXEC
+let hasSideEffects = 1 in {
+
+defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>;
+defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>;
+defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>;
+defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>;
+defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>;
+defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>;
+defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>;
+defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>;
+defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>;
+defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>;
+defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>;
+defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>;
+defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>;
+defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>;
+defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>;
+defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>;
+
+} // End hasSideEffects = 1
+
+defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>;
+defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>;
+defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>;
+defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>;
+defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>;
+defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>;
+defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>;
+defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>;
+defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>;
+defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>;
+defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>;
+defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>;
+defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>;
+defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>;
+defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>;
+defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>;
+
+//Side effect is writing to EXEC
+let hasSideEffects = 1 in {
+
+defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>;
+defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>;
+defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>;
+defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>;
+defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>;
+defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>;
+defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>;
+defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>;
+defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>;
+defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>;
+defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>;
+defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>;
+defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>;
+defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>;
+defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>;
+defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>;
+
+} // End hasSideEffects = 1
+
+defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>;
+defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>;
+defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>;
+defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>;
+defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>;
+defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>;
+defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>;
+defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>;
+defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>;
+defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>;
+defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>;
+defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>;
+defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>;
+defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>;
+defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>;
+defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>;
+defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>;
+defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>;
+defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>;
+defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>;
+defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>;
+defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>;
+defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>;
+defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>;
+defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>;
+defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>;
+defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>;
+defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>;
+defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>;
+defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>;
+defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>;
+defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>;
+defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>;
+defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>;
+defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>;
+defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>;
+defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>;
+defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>;
+defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>;
+defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>;
+defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>;
+defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>;
+defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>;
+defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>;
+defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>;
+defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>;
+defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>;
+defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>;
+defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>;
+defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>;
+defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>;
+defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>;
+defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>;
+defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>;
+defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>;
+defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>;
+defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>;
+defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>;
+defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>;
+defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>;
+defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>;
+defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>;
+defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>;
+defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>;
+defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>;
+defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", []>;
+def : Pat <
+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LT)),
+ (V_CMP_LT_I32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", []>;
+def : Pat <
+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)),
+ (V_CMP_EQ_I32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", []>;
+def : Pat <
+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LE)),
+ (V_CMP_LE_I32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", []>;
+def : Pat <
+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GT)),
+ (V_CMP_GT_I32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", []>;
+def : Pat <
+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_NE)),
+ (V_CMP_NE_I32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", []>;
+def : Pat <
+ (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GE)),
+ (V_CMP_GE_I32_e64 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>;
+
+let hasSideEffects = 1 in {
+
+defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>;
+defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>;
+defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>;
+defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>;
+defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>;
+defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>;
+defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>;
+defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>;
+
+} // End hasSideEffects
+
+defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>;
+defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>;
+defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>;
+defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>;
+defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>;
+defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>;
+defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>;
+defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>;
+
+let hasSideEffects = 1 in {
+
+defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>;
+defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>;
+defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>;
+defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>;
+defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>;
+defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>;
+defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>;
+defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>;
+
+} // End hasSideEffects
+
+defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>;
+defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>;
+defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>;
+defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>;
+defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>;
+defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>;
+defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>;
+defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>;
+
+let hasSideEffects = 1 in {
+
+defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>;
+defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>;
+defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>;
+defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>;
+defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>;
+defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>;
+defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>;
+defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>;
+
+} // End hasSideEffects
+
+defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>;
+defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>;
+defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>;
+defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>;
+defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>;
+defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>;
+defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>;
+defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>;
+defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>;
+defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>;
+defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>;
+defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>;
+defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>;
+defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>;
+defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>;
+defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>;
+defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>;
+defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>;
+defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>;
+defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>;
+//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
+//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
+//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
+def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
+//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
+//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
+//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
+//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
+//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>;
+//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
+//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
+//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
+//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>;
+//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>;
+//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>;
+//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
+//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
+//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
+//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
+//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
+//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
+//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
+//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
+//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
+//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
+//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
+//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>;
+//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>;
+//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>;
+//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
+//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
+//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
+//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
+//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
+//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
+//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
+//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
+//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
+def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
+//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
+//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
+//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
+//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
+
+defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>;
+
+//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>;
+//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>;
+//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>;
+//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>;
+//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>;
+//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>;
+//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>;
+
+//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
+//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
+//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
+//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>;
+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
+//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
+//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
+//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>;
+//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
+//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
+//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
+//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
+//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
+//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
+//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
+//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
+//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
+//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
+//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
+//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
+//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
+def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">;
+//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
+def IMAGE_SAMPLE_D : MIMG_Load_Helper <0x00000022, "IMAGE_SAMPLE_D">;
+//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
+def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">;
+def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">;
+//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
+//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
+//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>;
+//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
+//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
+//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
+//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>;
+//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>;
+//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
+//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
+//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
+//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>;
+//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>;
+//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>;
+//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>;
+//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>;
+//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>;
+//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>;
+//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>;
+//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>;
+//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>;
+//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>;
+//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>;
+//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
+//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
+//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
+//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
+//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
+//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
+//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
+//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
+//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
+//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
+//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
+//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
+//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
+//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
+//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
+//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
+//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
+//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
+//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
+//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
+//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
+//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
+//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
+//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
+//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
+//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
+//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
+//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
+//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
+//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
+//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
+//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>;
+//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>;
+//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>;
+//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>;
+//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>;
+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
+//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
+
+let neverHasSideEffects = 1 in {
+defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
+} // End neverHasSideEffects
+defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
+//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
+//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
+defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
+ [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))]
+>;
+//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
+//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
+ [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))]
+>;
+defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
+////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
+//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
+//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
+//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
+//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
+//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>;
+//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>;
+//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
+//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
+//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
+//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
+//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
+//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
+defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
+ [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))]
+>;
+defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
+defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>;
+defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
+ [(set VReg_32:$dst, (frint AllReg_32:$src0))]
+>;
+defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
+ [(set VReg_32:$dst, (ffloor AllReg_32:$src0))]
+>;
+defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
+ [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))]
+>;
+defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
+defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>;
+defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
+defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
+defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
+ [(set VReg_32:$dst, (fdiv FP_ONE, AllReg_32:$src0))]
+>;
+defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
+defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
+defm V_RSQ_LEGACY_F32 : VOP1_32 <
+ 0x0000002d, "V_RSQ_LEGACY_F32",
+ [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))]
+>;
+defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
+defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
+defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
+defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
+defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
+defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>;
+defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>;
+defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
+defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
+defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
+defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
+defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
+defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
+defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
+//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
+defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
+defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
+//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
+defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
+//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
+defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
+defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
+defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
+
+def V_INTERP_P1_F32 : VINTRP <
+ 0x00000000,
+ (outs VReg_32:$dst),
+ (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "V_INTERP_P1_F32",
+ []> {
+ let DisableEncoding = "$m0";
+}
+
+def V_INTERP_P2_F32 : VINTRP <
+ 0x00000001,
+ (outs VReg_32:$dst),
+ (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "V_INTERP_P2_F32",
+ []> {
+
+ let Constraints = "$src0 = $dst";
+ let DisableEncoding = "$src0,$m0";
+
+}
+
+def V_INTERP_MOV_F32 : VINTRP <
+ 0x00000002,
+ (outs VReg_32:$dst),
+ (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "V_INTERP_MOV_F32",
+ []> {
+ let VSRC = 0;
+ let DisableEncoding = "$m0";
+}
+
+//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
+
+let isTerminator = 1 in {
+
+def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
+ [(IL_retflag)]> {
+ let SIMM16 = 0;
+ let isBarrier = 1;
+ let hasCtrlDep = 1;
+}
+
+let isBranch = 1 in {
+def S_BRANCH : SOPP <
+ 0x00000002, (ins brtarget:$target), "S_BRANCH",
+ [(br bb:$target)]> {
+ let isBarrier = 1;
+}
+
+let DisableEncoding = "$scc" in {
+def S_CBRANCH_SCC0 : SOPP <
+ 0x00000004, (ins brtarget:$target, SCCReg:$scc),
+ "S_CBRANCH_SCC0", []
+>;
+def S_CBRANCH_SCC1 : SOPP <
+ 0x00000005, (ins brtarget:$target, SCCReg:$scc),
+ "S_CBRANCH_SCC1",
+ []
+>;
+} // End DisableEncoding = "$scc"
+
+def S_CBRANCH_VCCZ : SOPP <
+ 0x00000006, (ins brtarget:$target, VCCReg:$vcc),
+ "S_CBRANCH_VCCZ",
+ []
+>;
+def S_CBRANCH_VCCNZ : SOPP <
+ 0x00000007, (ins brtarget:$target, VCCReg:$vcc),
+ "S_CBRANCH_VCCNZ",
+ []
+>;
+
+let DisableEncoding = "$exec" in {
+def S_CBRANCH_EXECZ : SOPP <
+ 0x00000008, (ins brtarget:$target, EXECReg:$exec),
+ "S_CBRANCH_EXECZ",
+ []
+>;
+def S_CBRANCH_EXECNZ : SOPP <
+ 0x00000009, (ins brtarget:$target, EXECReg:$exec),
+ "S_CBRANCH_EXECNZ",
+ []
+>;
+} // End DisableEncoding = "$exec"
+
+
+} // End isBranch = 1
+} // End isTerminator = 1
+
+//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>;
+let hasSideEffects = 1 in {
+def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
+ []
+>;
+} // End hasSideEffects
+//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
+//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
+//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
+//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
+//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
+//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
+//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
+//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
+//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
+//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+
+def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
+ (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32_e32",
+ []
+>{
+ let DisableEncoding = "$vcc";
+}
+
+def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
+ (ins VReg_32:$src0, VReg_32:$src1, SReg_1:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
+ "V_CNDMASK_B32_e64",
+ [(set (i32 VReg_32:$dst), (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0))]
+>;
+
+//f32 pattern for V_CNDMASK_B32_e64
+def : Pat <
+ (f32 (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0)),
+ (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_1:$src2)
+>;
+
+defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
+defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
+
+defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", []>;
+def : Pat <
+ (f32 (fadd AllReg_32:$src0, VReg_32:$src1)),
+ (V_ADD_F32_e32 AllReg_32:$src0, VReg_32:$src1)
+>;
+
+defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", []>;
+def : Pat <
+ (f32 (fsub AllReg_32:$src0, VReg_32:$src1)),
+ (V_SUB_F32_e32 AllReg_32:$src0, VReg_32:$src1)
+>;
+defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>;
+defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
+defm V_MUL_LEGACY_F32 : VOP2_32 <
+ 0x00000007, "V_MUL_LEGACY_F32",
+ [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
+ [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))]
+>;
+//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>;
+//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
+//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
+//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
+defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
+ [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
+ [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))]
+>;
+defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
+defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
+defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
+defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
+defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
+defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
+ [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))]
+>;
+defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
+ [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))]
+>;
+defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
+ [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))]
+>;
+defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
+defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
+defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
+defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
+//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
+//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
+//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
+let Defs = [VCC] in { // Carry-out goes to VCC
+defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32",
+ [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))]
+>;
+defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32",
+ [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))]
+>;
+} // End Defs = [VCC]
+defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>;
+defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>;
+defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>;
+defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>;
+defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
+////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
+////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
+////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
+ [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))]
+>;
+////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
+////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>;
+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>;
+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>;
+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>;
+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>;
+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>;
+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>;
+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>;
+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>;
+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>;
+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>;
+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
+//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
+
+let neverHasSideEffects = 1 in {
+
+def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
+def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>;
+//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>;
+//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>;
+
+} // End neverHasSideEffects
+def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
+def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
+def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
+def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
+def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
+def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
+def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
+def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
+def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
+//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
+def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
+def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
+def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
+////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
+////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
+////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
+////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
+////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
+////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
+////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
+////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
+////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
+//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
+//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
+//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
+def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
+////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
+def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
+def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>;
+def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>;
+def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>;
+def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
+def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
+def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
+def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
+def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
+def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
+def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
+def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
+def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
+def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
+//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
+//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
+//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
+def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
+def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
+def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>;
+def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>;
+def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>;
+def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>;
+def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
+def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
+def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
+def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
+
+def S_CSELECT_B32 : SOP2 <
+ 0x0000000a, (outs SReg_32:$dst),
+ (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
+ [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))]
+>;
+
+def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
+
+// f32 pattern for S_CSELECT_B32
+def : Pat <
+ (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)),
+ (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
+>;
+
+def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
+
+def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
+ [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))]
+>;
+def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64",
+ [(set SReg_1:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))]
+>;
+def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
+def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
+def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
+def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
+def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
+def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
+def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
+def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>;
+def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
+def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
+def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
+def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
+def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
+def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
+def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
+def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
+def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
+def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
+def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
+def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
+def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
+def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
+def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
+def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
+def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
+def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
+def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
+//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
+def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
+
+class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI <
+ (outs VReg_32:$dst),
+ (ins immType:$src0),
+ "V_MOV_IMM",
+ [(set VReg_32:$dst, (immNode:$src0))]
+>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>;
+def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>;
+
+def S_MOV_IMM_I32 : InstSI <
+ (outs SReg_32:$dst),
+ (ins i32imm:$src0),
+ "S_MOV_IMM_I32",
+ [(set SReg_32:$dst, (imm:$src0))]
+>;
+
+// i64 immediates aren't really supported in hardware, but LLVM will use the i64
+// type for indices on load and store instructions. The pattern for
+// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits,
+// which the hardware can handle.
+def S_MOV_IMM_I64 : InstSI <
+ (outs SReg_64:$dst),
+ (ins i64imm:$src0),
+ "S_MOV_IMM_I64 $dst, $src0",
+ [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))]
+>;
+
+} // End isCodeGenOnly, isPseudo = 1
+
+class SI_LOAD_LITERAL<Operand ImmType> :
+ Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> {
+
+ bits<32> imm;
+ let Inst{31-0} = imm;
+}
+
+def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>;
+def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+def SET_M0 : InstSI <
+ (outs SReg_32:$dst),
+ (ins i32imm:$src0),
+ "SET_M0",
+ [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))]
+>;
+
+def LOAD_CONST : AMDGPUShaderInst <
+ (outs GPRF32:$dst),
+ (ins i32imm:$src),
+ "LOAD_CONST $dst, $src",
+ [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
+>;
+
+let usesCustomInserter = 1 in {
+
+def SI_V_CNDLT : InstSI <
+ (outs VReg_32:$dst),
+ (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
+ "SI_V_CNDLT $dst, $src0, $src1, $src2",
+ [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))]
+>;
+
+def SI_INTERP : InstSI <
+ (outs VReg_32:$dst),
+ (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
+ "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params",
+ []
+>;
+
+def SI_INTERP_CONST : InstSI <
+ (outs VReg_32:$dst),
+ (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
+ "SI_INTERP_CONST $dst, $attr_chan, $attr, $params",
+ [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan,
+ imm:$attr, SReg_32:$params))]
+>;
+
+def SI_KIL : InstSI <
+ (outs),
+ (ins VReg_32:$src),
+ "SI_KIL $src",
+ [(int_AMDGPU_kill VReg_32:$src)]
+>;
+
+def SI_WQM : InstSI <
+ (outs),
+ (ins),
+ "SI_WQM",
+ [(int_SI_wqm)]
+>;
+
+} // end usesCustomInserter
+
+// SI Psuedo instructions. These are used by the CFG structurizer pass
+// and should be lowered to ISA instructions prior to codegen.
+
+let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
+ Uses = [EXEC], Defs = [EXEC] in {
+
+let isBranch = 1, isTerminator = 1 in {
+
+def SI_IF : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_1:$vcc, brtarget:$target),
+ "SI_IF",
+ [(set SReg_64:$dst, (int_SI_if SReg_1:$vcc, bb:$target))]
+>;
+
+def SI_ELSE : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src, brtarget:$target),
+ "SI_ELSE",
+ [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> {
+
+ let Constraints = "$src = $dst";
+}
+
+def SI_LOOP : InstSI <
+ (outs),
+ (ins SReg_64:$saved, brtarget:$target),
+ "SI_LOOP",
+ [(int_SI_loop SReg_64:$saved, bb:$target)]
+>;
+
+} // end isBranch = 1, isTerminator = 1
+
+def SI_BREAK : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src),
+ "SI_ELSE",
+ [(set SReg_64:$dst, (int_SI_break SReg_64:$src))]
+>;
+
+def SI_IF_BREAK : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_1:$vcc, SReg_64:$src),
+ "SI_IF_BREAK",
+ [(set SReg_64:$dst, (int_SI_if_break SReg_1:$vcc, SReg_64:$src))]
+>;
+
+def SI_ELSE_BREAK : InstSI <
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src0, SReg_64:$src1),
+ "SI_ELSE_BREAK",
+ [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))]
+>;
+
+def SI_END_CF : InstSI <
+ (outs),
+ (ins SReg_64:$saved),
+ "SI_END_CF",
+ [(int_SI_end_cf SReg_64:$saved)]
+>;
+
+} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
+ // Uses = [EXEC], Defs = [EXEC]
+
+} // end IsCodeGenOnly, isPseudo
+
+/* int_SI_vs_load_input */
+def : Pat<
+ (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
+ VReg_32:$buf_idx_vgpr),
+ (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
+ VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
+ 0, 0, (i32 SREG_LIT_0))
+>;
+
+/* int_SI_export */
+def : Pat <
+ (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
+ VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+ (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
+>;
+
+/* int_SI_sample */
+def : Pat <
+ (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler),
+ (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+/* int_SI_sample_lod */
+def : Pat <
+ (int_SI_sample_lod imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler),
+ (IMAGE_SAMPLE_L imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+/* int_SI_sample_bias */
+def : Pat <
+ (int_SI_sample_bias imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler),
+ (IMAGE_SAMPLE_B imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+def CLAMP_SI : CLAMP<VReg_32>;
+def FABS_SI : FABS<VReg_32>;
+def FNEG_SI : FNEG<VReg_32>;
+
+def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
+def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>;
+def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>;
+def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>;
+
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>;
+
+def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
+def : Vector_Build <v4i32, SReg_128, i32, SReg_32>;
+
+def : BitConvert <i32, f32, SReg_32>;
+def : BitConvert <i32, f32, VReg_32>;
+
+def : BitConvert <f32, i32, SReg_32>;
+def : BitConvert <f32, i32, VReg_32>;
+
+def : Pat <
+ (i64 (SIsreg1_bitcast SReg_1:$vcc)),
+ (S_MOV_B64 (COPY_TO_REGCLASS SReg_1:$vcc, SReg_64))
+>;
+
+def : Pat <
+ (i1 (SIsreg1_bitcast SReg_64:$vcc)),
+ (COPY_TO_REGCLASS SReg_64:$vcc, SReg_1)
+>;
+
+def : Pat <
+ (i64 (SIvcc_bitcast VCCReg:$vcc)),
+ (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64))
+>;
+
+def : Pat <
+ (i1 (SIvcc_bitcast SReg_64:$vcc)),
+ (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg)
+>;
+
+/********** ===================== **********/
+/********** Interpolation Paterns **********/
+/********** ===================== **********/
+
+def : Pat <
+ (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params),
+ (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan,
+ imm:$attr, SReg_32:$params)
+>;
+
+def : Pat <
+ (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
+ (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan,
+ imm:$attr, SReg_32:$params)
+>;
+
+def : Pat <
+ (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params),
+ (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan,
+ imm:$attr, SReg_32:$params)
+>;
+
+def : Pat <
+ (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
+ (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan,
+ imm:$attr, SReg_32:$params)
+>;
+
+def : Pat <
+ (int_SI_fs_read_face),
+ (f32 FRONT_FACE)
+>;
+
+def : Pat <
+ (int_SI_fs_read_pos 0),
+ (f32 POS_X_FLOAT)
+>;
+
+def : Pat <
+ (int_SI_fs_read_pos 1),
+ (f32 POS_Y_FLOAT)
+>;
+
+def : Pat <
+ (int_SI_fs_read_pos 2),
+ (f32 POS_Z_FLOAT)
+>;
+
+def : Pat <
+ (int_SI_fs_read_pos 3),
+ (f32 POS_W_FLOAT)
+>;
+
+/********** ================== **********/
+/********** Intrinsic Patterns **********/
+/********** ================== **********/
+
+/* llvm.AMDGPU.pow */
+/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */
+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>;
+
+def : Pat <
+ (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1),
+ (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1))
+>;
+
+def : Pat<
+ (fdiv AllReg_32:$src0, AllReg_32:$src1),
+ (V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1))
+>;
+
+def : Pat <
+ (int_AMDGPU_kilp),
+ (SI_KIL (V_MOV_IMM_I32 0xbf800000))
+>;
+
+def : Pat <
+ (int_AMDGPU_cube VReg_128:$src),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+ (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
+ (EXTRACT_SUBREG VReg_128:$src, sel_y),
+ (EXTRACT_SUBREG VReg_128:$src, sel_z),
+ 0, 0, 0, 0), sel_x),
+ (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
+ (EXTRACT_SUBREG VReg_128:$src, sel_y),
+ (EXTRACT_SUBREG VReg_128:$src, sel_z),
+ 0, 0, 0, 0), sel_y),
+ (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
+ (EXTRACT_SUBREG VReg_128:$src, sel_y),
+ (EXTRACT_SUBREG VReg_128:$src, sel_z),
+ 0, 0, 0, 0), sel_z),
+ (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
+ (EXTRACT_SUBREG VReg_128:$src, sel_y),
+ (EXTRACT_SUBREG VReg_128:$src, sel_z),
+ 0, 0, 0, 0), sel_w)
+>;
+
+/********** ================== **********/
+/********** VOP3 Patterns **********/
+/********** ================== **********/
+
+def : Pat <(f32 (IL_mad AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2)),
+ (V_MAD_LEGACY_F32 AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2,
+ 0, 0, 0, 0)>;
+
+} // End isSI predicate
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
new file mode 100644
index 0000000000..c322fef0fe
--- /dev/null
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -0,0 +1,52 @@
+//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+
+let TargetPrefix = "SI", isTarget = 1 in {
+
+ def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+ /* XXX: We may need a seperate intrinsic here for loading integer values */
+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
+ def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
+ def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
+ def int_SI_wqm : Intrinsic <[], [], []>;
+
+ def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>;
+ def int_SI_sample_bias : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>;
+ def int_SI_sample_lod : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>;
+
+ /* Interpolation Intrinsics */
+
+ def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
+ class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
+
+ def int_SI_fs_interp_linear_center : Interp;
+ def int_SI_fs_interp_linear_centroid : Interp;
+ def int_SI_fs_interp_persp_center : Interp;
+ def int_SI_fs_interp_persp_centroid : Interp;
+ def int_SI_fs_interp_constant : Interp;
+
+ def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
+ def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ /* Control flow Intrinsics */
+
+ def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;
+ def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>;
+ def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
+ def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>;
+ def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>;
+ def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>;
+ def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>;
+}
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
new file mode 100644
index 0000000000..3fbe653a49
--- /dev/null
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -0,0 +1,331 @@
+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass lowers the pseudo control flow instructions to real
+/// machine instructions.
+///
+/// All control flow is handled using predicated instructions and
+/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
+/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
+/// by writting to the 64-bit EXEC register (each bit corresponds to a
+/// single vector ALU). Typically, for predicates, a vector ALU will write
+/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
+/// Vector ALU) and then the ScalarALU will AND the VCC register with the
+/// EXEC to update the predicates.
+///
+/// For example:
+/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
+/// %SGPR0 = SI_IF %VCC
+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
+/// %SGPR0 = SI_ELSE %SGPR0
+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
+/// SI_END_CF %SGPR0
+///
+/// becomes:
+///
+/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask
+/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
+/// S_CBRANCH_EXECZ label0 // This instruction is an optional
+/// // optimization which allows us to
+/// // branch if all the bits of
+/// // EXEC are zero.
+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
+///
+/// label0:
+/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block
+/// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask
+/// S_BRANCH_EXECZ label1 // Use our branch optimization
+/// // instruction again.
+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
+/// label1:
+/// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerControlFlowPass : public MachineFunctionPass {
+
+private:
+ static const unsigned SkipThreshold = 12;
+
+ static char ID;
+ const TargetInstrInfo *TII;
+
+ void Skip(MachineInstr &MI, MachineOperand &To);
+
+ void If(MachineInstr &MI);
+ void Else(MachineInstr &MI);
+ void Break(MachineInstr &MI);
+ void IfBreak(MachineInstr &MI);
+ void ElseBreak(MachineInstr &MI);
+ void Loop(MachineInstr &MI);
+ void EndCf(MachineInstr &MI);
+
+ void Branch(MachineInstr &MI);
+
+public:
+ SILowerControlFlowPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "SI Lower control flow instructions";
+ }
+
+};
+
+} // End anonymous namespace
+
+char SILowerControlFlowPass::ID = 0;
+
+FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
+ return new SILowerControlFlowPass(tm);
+}
+
+void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+ unsigned NumInstr = 0;
+
+ for (MachineBasicBlock *MBB = *From.getParent()->succ_begin();
+ NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty();
+ MBB = *MBB->succ_begin()) {
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ NumInstr < SkipThreshold && I != E; ++I) {
+
+ if (I->isBundle() || !I->isBundled())
+ ++NumInstr;
+ }
+ }
+
+ if (NumInstr < SkipThreshold)
+ return;
+
+ DebugLoc DL = From.getDebugLoc();
+ BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+ .addOperand(To)
+ .addReg(AMDGPU::EXEC);
+}
+
+void SILowerControlFlowPass::If(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Reg = MI.getOperand(0).getReg();
+ unsigned Vcc = MI.getOperand(1).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
+ .addReg(Vcc);
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Reg);
+
+ Skip(MI, MI.getOperand(2));
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Else(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
+ .addReg(Src); // Saved EXEC
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Dst);
+
+ Skip(MI, MI.getOperand(2));
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Break(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src = MI.getOperand(1).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Src);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IfBreak(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Vcc = MI.getOperand(1).getReg();
+ unsigned Src = MI.getOperand(2).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ .addReg(Vcc)
+ .addReg(Src);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Saved = MI.getOperand(1).getReg();
+ unsigned Src = MI.getOperand(2).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ .addReg(Saved)
+ .addReg(Src);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Loop(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Src = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Src);
+
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addOperand(MI.getOperand(1))
+ .addReg(AMDGPU::EXEC);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ unsigned Reg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, MBB.getFirstNonPHI(), DL,
+ TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Reg);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Branch(MachineInstr &MI) {
+ MachineBasicBlock *Next = MI.getParent()->getNextNode();
+ MachineBasicBlock *Target = MI.getOperand(0).getMBB();
+ if (Target == Next)
+ MI.eraseFromParent();
+ else
+ assert(0);
+}
+
+bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
+ bool HaveCf = false;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next) {
+
+ Next = llvm::next(I);
+ MachineInstr &MI = *I;
+ switch (MI.getOpcode()) {
+ default: break;
+ case AMDGPU::SI_IF:
+ If(MI);
+ break;
+
+ case AMDGPU::SI_ELSE:
+ Else(MI);
+ break;
+
+ case AMDGPU::SI_BREAK:
+ Break(MI);
+ break;
+
+ case AMDGPU::SI_IF_BREAK:
+ IfBreak(MI);
+ break;
+
+ case AMDGPU::SI_ELSE_BREAK:
+ ElseBreak(MI);
+ break;
+
+ case AMDGPU::SI_LOOP:
+ Loop(MI);
+ break;
+
+ case AMDGPU::SI_END_CF:
+ HaveCf = true;
+ EndCf(MI);
+ break;
+
+ case AMDGPU::S_BRANCH:
+ Branch(MI);
+ break;
+ }
+ }
+ }
+
+ // TODO: What is this good for?
+ unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType;
+ if (HaveCf && ShaderType == ShaderType::PIXEL) {
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ if (MBB.succ_empty()) {
+
+ MachineInstr &MI = *MBB.getFirstNonPHI();
+ DebugLoc DL = MI.getDebugLoc();
+
+ // If the exec mask is non-zero, skip the next two instructions
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(3)
+ .addReg(AMDGPU::EXEC);
+
+ // Exec mask is zero: Export to NULL target...
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP))
+ .addImm(0)
+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+ .addImm(0)
+ .addImm(1)
+ .addImm(1)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0)
+ .addReg(AMDGPU::SREG_LIT_0);
+
+ // ... and terminate wavefront
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM));
+ }
+ }
+ }
+
+ return true;
+}
diff --git a/lib/Target/R600/SILowerLiteralConstants.cpp b/lib/Target/R600/SILowerLiteralConstants.cpp
new file mode 100644
index 0000000000..c0411e9b4d
--- /dev/null
+++ b/lib/Target/R600/SILowerLiteralConstants.cpp
@@ -0,0 +1,108 @@
+//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass performs the following transformation on instructions with
+/// literal constants:
+///
+/// %VGPR0 = V_MOV_IMM_I32 1
+///
+/// becomes:
+///
+/// BUNDLE
+/// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
+/// * SI_LOAD_LITERAL 1
+///
+/// The resulting sequence matches exactly how the hardware handles immediate
+/// operands, so this transformation greatly simplifies the code generator.
+///
+/// Only the *_MOV_IMM_* support immediate operands at the moment, but when
+/// support for immediate operands is added to other instructions, they
+/// will be lowered here as well.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerLiteralConstantsPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const TargetInstrInfo *TII;
+
+public:
+ SILowerLiteralConstantsPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "SI Lower literal constants pass";
+ }
+};
+
+} // End anonymous namespace
+
+char SILowerLiteralConstantsPass::ID = 0;
+
+FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
+ return new SILowerLiteralConstantsPass(tm);
+}
+
+bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next) {
+ Next = llvm::next(I);
+ MachineInstr &MI = *I;
+ switch (MI.getOpcode()) {
+ default: break;
+ case AMDGPU::S_MOV_IMM_I32:
+ case AMDGPU::S_MOV_IMM_I64:
+ case AMDGPU::V_MOV_IMM_F32:
+ case AMDGPU::V_MOV_IMM_I32: {
+ unsigned MovOpcode;
+ unsigned LoadLiteralOpcode;
+ MachineOperand LiteralOp = MI.getOperand(1);
+ if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
+ MovOpcode = AMDGPU::V_MOV_B32_e32;
+ } else {
+ MovOpcode = AMDGPU::S_MOV_B32;
+ }
+ if (LiteralOp.isImm()) {
+ LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
+ } else {
+ LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
+ }
+ MIBundleBuilder Bundle(MBB, I);
+ Bundle
+ .append(BuildMI(MF, MBB.findDebugLoc(I), TII->get(MovOpcode),
+ MI.getOperand(0).getReg())
+ .addReg(AMDGPU::SI_LITERAL_CONSTANT))
+ .append(BuildMI(MF, MBB.findDebugLoc(I),
+ TII->get(LoadLiteralOpcode))
+ .addOperand(MI.getOperand(1)));
+ llvm::finalizeBundle(MBB, Bundle.begin());
+ MI.eraseFromParent();
+ break;
+ }
+ }
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
new file mode 100644
index 0000000000..7e59b42749
--- /dev/null
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -0,0 +1,20 @@
+//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+
+#include "SIMachineFunctionInfo.h"
+
+using namespace llvm;
+
+SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
+ : MachineFunctionInfo(),
+ SPIPSInputAddr(0),
+ ShaderType(0)
+ { }
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
new file mode 100644
index 0000000000..47271f5a1e
--- /dev/null
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -0,0 +1,34 @@
+//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIMACHINEFUNCTIONINFO_H_
+#define SIMACHINEFUNCTIONINFO_H_
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
+/// tells the hardware which interpolation parameters to load.
+class SIMachineFunctionInfo : public MachineFunctionInfo {
+public:
+ SIMachineFunctionInfo(const MachineFunction &MF);
+ unsigned SPIPSInputAddr;
+ unsigned ShaderType;
+};
+
+} // End namespace llvm
+
+
+#endif //_SIMACHINEFUNCTIONINFO_H_
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
new file mode 100644
index 0000000000..88275c523f
--- /dev/null
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -0,0 +1,48 @@
+//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIRegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
+
+BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ return Reserved;
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
+ switch (rc->getID()) {
+ case AMDGPU::GPRF32RegClassID:
+ return &AMDGPU::VReg_32RegClass;
+ default: return rc;
+ }
+}
+
+const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
+ MVT VT) const {
+ switch(VT.SimpleTy) {
+ default:
+ case MVT::i32: return &AMDGPU::VReg_32RegClass;
+ }
+}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
new file mode 100644
index 0000000000..40171e4450
--- /dev/null
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -0,0 +1,47 @@
+//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for SIRegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIREGISTERINFO_H_
+#define SIREGISTERINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class TargetInstrInfo;
+
+struct SIRegisterInfo : public AMDGPURegisterInfo {
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// \param RC is an AMDIL reg class.
+ ///
+ /// \returns the SI register class that is equivalent to \p RC.
+ virtual const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass *RC) const;
+
+ /// \brief get the register class of the specified type to use in the
+ /// CFGStructurizer
+ virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+};
+
+} // End namespace llvm
+
+#endif // SIREGISTERINFO_H_
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
new file mode 100644
index 0000000000..c3f136191a
--- /dev/null
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -0,0 +1,167 @@
+
+let Namespace = "AMDGPU" in {
+ def low : SubRegIndex;
+ def high : SubRegIndex;
+
+ def sub0 : SubRegIndex;
+ def sub1 : SubRegIndex;
+ def sub2 : SubRegIndex;
+ def sub3 : SubRegIndex;
+ def sub4 : SubRegIndex;
+ def sub5 : SubRegIndex;
+ def sub6 : SubRegIndex;
+ def sub7 : SubRegIndex;
+}
+
+class SIReg <string n, bits<16> encoding = 0> : Register<n> {
+ let Namespace = "AMDGPU";
+ let HWEncoding = encoding;
+}
+
+class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices = [low, high];
+ let HWEncoding = encoding;
+}
+
+class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
+
+class VGPR_32 <bits<16> num, string name> : SIReg<name, num>;
+
+// Special Registers
+def VCC : SIReg<"VCC", 106>;
+def EXEC_LO : SIReg <"EXEC LO", 126>;
+def EXEC_HI : SIReg <"EXEC HI", 127>;
+def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>;
+def SCC : SIReg<"SCC", 253>;
+def SREG_LIT_0 : SIReg <"S LIT 0", 128>;
+def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT", 255>;
+def M0 : SIReg <"M0", 124>;
+
+//Interpolation registers
+def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
+def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
+def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
+def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
+def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
+def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
+def PERSP_I_W : SIReg <"PERSP_I_W">;
+def PERSP_J_W : SIReg <"PERSP_J_W">;
+def PERSP_1_W : SIReg <"PERSP_1_W">;
+def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
+def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
+def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
+def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
+def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
+def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
+def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
+def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
+def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
+def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
+def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
+def FRONT_FACE : SIReg <"FRONT_FACE">;
+def ANCILLARY : SIReg <"ANCILLARY">;
+def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
+def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
+
+// SGPR 32-bit registers
+foreach Index = 0-101 in {
+ def SGPR#Index : SGPR_32 <Index, "SGPR"#Index>;
+}
+
+def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "SGPR%u", 0, 101))>;
+
+// SGPR 64-bit registers
+def SGPR_64 : RegisterTuples<[low, high],
+ [(add (decimate SGPR_32, 2)),
+ (add(decimate (rotl SGPR_32, 1), 2))]>;
+
+// SGPR 128-bit registers
+def SGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
+ [(add (decimate SGPR_32, 4)),
+ (add (decimate (rotl SGPR_32, 1), 4)),
+ (add (decimate (rotl SGPR_32, 2), 4)),
+ (add (decimate (rotl SGPR_32, 3), 4))]>;
+
+// SGPR 256-bit registers
+def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+ [(add (decimate SGPR_32, 8)),
+ (add (decimate (rotl SGPR_32, 1), 8)),
+ (add (decimate (rotl SGPR_32, 2), 8)),
+ (add (decimate (rotl SGPR_32, 3), 8)),
+ (add (decimate (rotl SGPR_32, 4), 8)),
+ (add (decimate (rotl SGPR_32, 5), 8)),
+ (add (decimate (rotl SGPR_32, 6), 8)),
+ (add (decimate (rotl SGPR_32, 7), 8))]>;
+
+// VGPR 32-bit registers
+foreach Index = 0-255 in {
+ def VGPR#Index : VGPR_32 <Index, "VGPR"#Index>;
+}
+
+def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "VGPR%u", 0, 255))>;
+
+// VGPR 64-bit registers
+def VGPR_64 : RegisterTuples<[low, high],
+ [(add VGPR_32),
+ (add (rotl VGPR_32, 1))]>;
+
+// VGPR 128-bit registers
+def VGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
+ [(add VGPR_32),
+ (add (rotl VGPR_32, 1)),
+ (add (rotl VGPR_32, 2)),
+ (add (rotl VGPR_32, 3))]>;
+
+// Register class for all scalar registers (SGPRs + Special Registers)
+def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI)
+>;
+
+def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>;
+
+def SReg_1 : RegisterClass<"AMDGPU", [i1], 1, (add VCC, SGPR_64, EXEC)>;
+
+def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>;
+
+def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>;
+
+// Register class for all vector registers (VGPRs + Interploation Registers)
+def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add VGPR_32,
+ PERSP_SAMPLE_I, PERSP_SAMPLE_J,
+ PERSP_CENTER_I, PERSP_CENTER_J,
+ PERSP_CENTROID_I, PERSP_CENTROID_J,
+ PERSP_I_W, PERSP_J_W, PERSP_1_W,
+ LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
+ LINEAR_CENTER_I, LINEAR_CENTER_J,
+ LINEAR_CENTROID_I, LINEAR_CENTROID_J,
+ LINE_STIPPLE_TEX_COORD,
+ POS_X_FLOAT,
+ POS_Y_FLOAT,
+ POS_Z_FLOAT,
+ POS_W_FLOAT,
+ FRONT_FACE,
+ ANCILLARY,
+ SAMPLE_COVERAGE,
+ POS_FIXED_PT
+ )
+>;
+
+def VReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add VGPR_64)>;
+
+def VReg_128 : RegisterClass<"AMDGPU", [v4f32], 128, (add VGPR_128)>;
+
+// AllReg_* - A set of all scalar and vector registers of a given width.
+def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add VReg_32, SReg_32)>;
+
+def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64, (add SReg_64, VReg_64)>;
+
+// Special register classes for predicates and the M0 register
+def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
+def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
+def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
+def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
+
diff --git a/lib/Target/R600/SISchedule.td b/lib/Target/R600/SISchedule.td
new file mode 100644
index 0000000000..28b65b8258
--- /dev/null
+++ b/lib/Target/R600/SISchedule.td
@@ -0,0 +1,15 @@
+//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: This is just a place holder for now.
+//
+//===----------------------------------------------------------------------===//
+
+
+def SI_Itin : ProcessorItineraries <[], [], []>;
diff --git a/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp b/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
new file mode 100644
index 0000000000..46b1f18c62
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- TargetInfo/AMDGPUTargetInfo.cpp - TargetInfo for AMDGPU -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+/// \brief The target for the AMDGPU backend
+Target llvm::TheAMDGPUTarget;
+
+/// \brief Extern function to initialize the targets for the AMDGPU backend
+extern "C" void LLVMInitializeR600TargetInfo() {
+ RegisterTarget<Triple::r600, false>
+ R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
+}
diff --git a/lib/Target/R600/TargetInfo/CMakeLists.txt b/lib/Target/R600/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000000..3d1584eba3
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMR600Info
+ AMDGPUTargetInfo.cpp
+ )
+
+add_dependencies(LLVMR600Info AMDGPUCommonTableGen intrinsics_gen)
diff --git a/lib/Target/R600/TargetInfo/LLVMBuild.txt b/lib/Target/R600/TargetInfo/LLVMBuild.txt
new file mode 100644
index 0000000000..4c6fea4aa0
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/R600/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = R600Info
+parent = R600
+required_libraries = MC Support
+add_to_library_groups = R600
diff --git a/lib/Target/R600/TargetInfo/Makefile b/lib/Target/R600/TargetInfo/Makefile
new file mode 100644
index 0000000000..b8ac4e7823
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AMDGPU/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMR600Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 8165f5b8cc..a9aab86abd 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -262,22 +262,7 @@ unsigned countbits_slow(unsigned v) {
c += v & 1;
return c;
}
-unsigned countbits_fast(unsigned v){
- unsigned c;
- for (c = 0; v; c++)
- v &= v - 1; // clear the least significant bit set
- return c;
-}
-BITBOARD = unsigned long long
-int PopCnt(register BITBOARD a) {
- register int c=0;
- while(a) {
- c++;
- a &= a - 1;
- }
- return c;
-}
unsigned int popcount(unsigned int input) {
unsigned int count = 0;
for (unsigned int i = 0; i < 4 * 8; i++)
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 874a5d5c51..6c47c70222 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -19,8 +19,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Function.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 5beed7635f..5fa545d301 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -13,7 +13,7 @@
#include "SparcTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index bc6e8ddfd7..168640f457 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -22,9 +22,9 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 303e46213e..9c1c30b9d3 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -19,9 +19,9 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
#define GET_REGINFO_TARGET_DESC
#include "SparcGenRegisterInfo.inc"
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 7c628d7c55..60bceb708f 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
DL(Subtarget.getDataLayout()),
InstrInfo(Subtarget),
TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) {
+ FrameLowering(Subtarget) {
}
namespace {
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 53cf276a3e..081075de2d 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -19,10 +19,9 @@
#include "SparcInstrInfo.h"
#include "SparcSelectionDAGInfo.h"
#include "SparcSubtarget.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -33,8 +32,6 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcTargetLowering TLInfo;
SparcSelectionDAGInfo TSInfo;
SparcFrameLowering FrameLowering;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -55,12 +52,6 @@ public:
virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index c9d5b7bdfb..bb71463234 100644
--- a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "Sparc.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 8571bc0fa1..9a78ebc3fa 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -14,9 +14,9 @@
#include "llvm-c/Target.h"
#include "llvm-c/Initialization.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
-#include "llvm/LLVMContext.h"
#include "llvm/PassManager.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include <cstring>
@@ -26,7 +26,6 @@ using namespace llvm;
void llvm::initializeTarget(PassRegistry &Registry) {
initializeDataLayoutPass(Registry);
initializeTargetLibraryInfoPass(Registry);
- initializeTargetTransformInfoPass(Registry);
}
void LLVMInitializeTarget(LLVMPassRegistryRef R) {
diff --git a/lib/Target/TargetIntrinsicInfo.cpp b/lib/Target/TargetIntrinsicInfo.cpp
index 8e8fd93801..64bd56f6e7 100644
--- a/lib/Target/TargetIntrinsicInfo.cpp
+++ b/lib/Target/TargetIntrinsicInfo.cpp
@@ -13,7 +13,7 @@
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
TargetIntrinsicInfo::TargetIntrinsicInfo() {
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 1ca47d023f..f5121e34f7 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -13,11 +13,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index efc873d55a..6af040936a 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetMachine.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/Support/CommandLine.h"
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 10c770b135..1e4c195b0f 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -14,8 +14,8 @@
#include "llvm-c/TargetMachine.h"
#include "llvm-c/Core.h"
#include "llvm-c/Target.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
deleted file mode 100644
index 7f9fdfc938..0000000000
--- a/lib/Target/TargetTransformImpl.cpp
+++ /dev/null
@@ -1,372 +0,0 @@
-// llvm/Target/TargetTransformImpl.cpp - Target Loop Trans Info ---*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/Target/TargetLowering.h"
-#include <utility>
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//
-// Calls used by scalar transformations.
-//
-//===----------------------------------------------------------------------===//
-
-bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const {
- return TLI->isLegalAddImmediate(imm);
-}
-
-bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const {
- return TLI->isLegalICmpImmediate(imm);
-}
-
-bool ScalarTargetTransformImpl::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
- return TLI->isLegalAddressingMode(AM, Ty);
-}
-
-bool ScalarTargetTransformImpl::isTruncateFree(Type *Ty1, Type *Ty2) const {
- return TLI->isTruncateFree(Ty1, Ty2);
-}
-
-bool ScalarTargetTransformImpl::isTypeLegal(Type *Ty) const {
- EVT T = TLI->getValueType(Ty);
- return TLI->isTypeLegal(T);
-}
-
-unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const {
- return TLI->getJumpBufAlignment();
-}
-
-unsigned ScalarTargetTransformImpl::getJumpBufSize() const {
- return TLI->getJumpBufSize();
-}
-
-bool ScalarTargetTransformImpl::shouldBuildLookupTables() const {
- return TLI->supportJumpTables() &&
- (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
- TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
-}
-
-//===----------------------------------------------------------------------===//
-//
-// Calls used by the vectorizers.
-//
-//===----------------------------------------------------------------------===//
-int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const {
- enum InstructionOpcodes {
-#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
-#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
-#include "llvm/Instruction.def"
- };
- switch (static_cast<InstructionOpcodes>(Opcode)) {
- case Ret: return 0;
- case Br: return 0;
- case Switch: return 0;
- case IndirectBr: return 0;
- case Invoke: return 0;
- case Resume: return 0;
- case Unreachable: return 0;
- case Add: return ISD::ADD;
- case FAdd: return ISD::FADD;
- case Sub: return ISD::SUB;
- case FSub: return ISD::FSUB;
- case Mul: return ISD::MUL;
- case FMul: return ISD::FMUL;
- case UDiv: return ISD::UDIV;
- case SDiv: return ISD::UDIV;
- case FDiv: return ISD::FDIV;
- case URem: return ISD::UREM;
- case SRem: return ISD::SREM;
- case FRem: return ISD::FREM;
- case Shl: return ISD::SHL;
- case LShr: return ISD::SRL;
- case AShr: return ISD::SRA;
- case And: return ISD::AND;
- case Or: return ISD::OR;
- case Xor: return ISD::XOR;
- case Alloca: return 0;
- case Load: return ISD::LOAD;
- case Store: return ISD::STORE;
- case GetElementPtr: return 0;
- case Fence: return 0;
- case AtomicCmpXchg: return 0;
- case AtomicRMW: return 0;
- case Trunc: return ISD::TRUNCATE;
- case ZExt: return ISD::ZERO_EXTEND;
- case SExt: return ISD::SIGN_EXTEND;
- case FPToUI: return ISD::FP_TO_UINT;
- case FPToSI: return ISD::FP_TO_SINT;
- case UIToFP: return ISD::UINT_TO_FP;
- case SIToFP: return ISD::SINT_TO_FP;
- case FPTrunc: return ISD::FP_ROUND;
- case FPExt: return ISD::FP_EXTEND;
- case PtrToInt: return ISD::BITCAST;
- case IntToPtr: return ISD::BITCAST;
- case BitCast: return ISD::BITCAST;
- case ICmp: return ISD::SETCC;
- case FCmp: return ISD::SETCC;
- case PHI: return 0;
- case Call: return 0;
- case Select: return ISD::SELECT;
- case UserOp1: return 0;
- case UserOp2: return 0;
- case VAArg: return 0;
- case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
- case InsertElement: return ISD::INSERT_VECTOR_ELT;
- case ShuffleVector: return ISD::VECTOR_SHUFFLE;
- case ExtractValue: return ISD::MERGE_VALUES;
- case InsertValue: return ISD::MERGE_VALUES;
- case LandingPad: return 0;
- }
-
- llvm_unreachable("Unknown instruction type encountered!");
-}
-
-std::pair<unsigned, MVT>
-VectorTargetTransformImpl::getTypeLegalizationCost(Type *Ty) const {
-
- LLVMContext &C = Ty->getContext();
- EVT MTy = TLI->getValueType(Ty);
-
- unsigned Cost = 1;
- // We keep legalizing the type until we find a legal kind. We assume that
- // the only operation that costs anything is the split. After splitting
- // we need to handle two types.
- while (true) {
- TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, MTy);
-
- if (LK.first == TargetLowering::TypeLegal)
- return std::make_pair(Cost, MTy.getSimpleVT());
-
- if (LK.first == TargetLowering::TypeSplitVector ||
- LK.first == TargetLowering::TypeExpandInteger)
- Cost *= 2;
-
- // Keep legalizing the type.
- MTy = LK.second;
- }
-}
-
-unsigned
-VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty,
- bool Insert,
- bool Extract) const {
- assert (Ty->isVectorTy() && "Can only scalarize vectors");
- unsigned Cost = 0;
-
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
- if (Insert)
- Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
- if (Extract)
- Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
- }
-
- return Cost;
-}
-
-unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode,
- Type *Ty) const {
- // Check if any of the operands are vector operands.
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty);
-
- if (!TLI->isOperationExpand(ISD, LT.second)) {
- // The operation is legal. Assume it costs 1. Multiply
- // by the type-legalization overhead.
- return LT.first * 1;
- }
-
- // Else, assume that we need to scalarize this op.
- if (Ty->isVectorTy()) {
- unsigned Num = Ty->getVectorNumElements();
- unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
- // return the cost of multiple scalar invocation plus the cost of inserting
- // and extracting the values.
- return getScalarizationOverhead(Ty, true, true) + Num * Cost;
- }
-
- // We don't know anything about this scalar instruction.
- return 1;
-}
-
-unsigned VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const {
- return 1;
-}
-
-unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const {
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- std::pair<unsigned, MVT> SrcLT = getTypeLegalizationCost(Src);
- std::pair<unsigned, MVT> DstLT = getTypeLegalizationCost(Dst);
-
- // Handle scalar conversions.
- if (!Src->isVectorTy() && !Dst->isVectorTy()) {
-
- // Scalar bitcasts are usually free.
- if (Opcode == Instruction::BitCast)
- return 0;
-
- if (Opcode == Instruction::Trunc &&
- TLI->isTruncateFree(SrcLT.second, DstLT.second))
- return 0;
-
- if (Opcode == Instruction::ZExt &&
- TLI->isZExtFree(SrcLT.second, DstLT.second))
- return 0;
-
- // Just check the op cost. If the operation is legal then assume it costs 1.
- if (!TLI->isOperationExpand(ISD, DstLT.second))
- return 1;
-
- // Assume that illegal scalar instruction are expensive.
- return 4;
- }
-
- // Check vector-to-vector casts.
- if (Dst->isVectorTy() && Src->isVectorTy()) {
-
- // If the cast is between same-sized registers, then the check is simple.
- if (SrcLT.first == DstLT.first &&
- SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
-
- // Bitcast between types that are legalized to the same type are free.
- if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
- return 0;
-
- // Assume that Zext is done using AND.
- if (Opcode == Instruction::ZExt)
- return 1;
-
- // Assume that sext is done using SHL and SRA.
- if (Opcode == Instruction::SExt)
- return 2;
-
- // Just check the op cost. If the operation is legal then assume it costs
- // 1 and multiply by the type-legalization overhead.
- if (!TLI->isOperationExpand(ISD, DstLT.second))
- return SrcLT.first * 1;
- }
-
- // If we are converting vectors and the operation is illegal, or
- // if the vectors are legalized to different types, estimate the
- // scalarization costs.
- unsigned Num = Dst->getVectorNumElements();
- unsigned Cost = getCastInstrCost(Opcode, Dst->getScalarType(),
- Src->getScalarType());
-
- // Return the cost of multiple scalar invocation plus the cost of
- // inserting and extracting the values.
- return getScalarizationOverhead(Dst, true, true) + Num * Cost;
- }
-
- // We already handled vector-to-vector and scalar-to-scalar conversions. This
- // is where we handle bitcast between vectors and scalars. We need to assume
- // that the conversion is scalarized in one way or another.
- if (Opcode == Instruction::BitCast)
- // Illegal bitcasts are done by storing and loading from a stack slot.
- return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
- (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
-
- llvm_unreachable("Unhandled cast");
- }
-
-unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const {
- return 0;
-}
-
-unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode,
- Type *ValTy,
- Type *CondTy) const {
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- // Selects on vectors are actually vector selects.
- if (ISD == ISD::SELECT) {
- assert(CondTy && "CondTy must exist");
- if (CondTy->isVectorTy())
- ISD = ISD::VSELECT;
- }
-
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy);
-
- if (!TLI->isOperationExpand(ISD, LT.second)) {
- // The operation is legal. Assume it costs 1. Multiply
- // by the type-legalization overhead.
- return LT.first * 1;
- }
-
- // Otherwise, assume that the cast is scalarized.
- if (ValTy->isVectorTy()) {
- unsigned Num = ValTy->getVectorNumElements();
- if (CondTy)
- CondTy = CondTy->getScalarType();
- unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
- CondTy);
-
- // Return the cost of multiple scalar invocation plus the cost of inserting
- // and extracting the values.
- return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
- }
-
- // Unknown scalar opcode.
- return 1;
-}
-
-unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode,
- Type *Val,
- unsigned Index) const {
- return 1;
-}
-
-unsigned
-VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
- Type *Ty2) const {
- return 1;
-}
-
-unsigned
-VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const {
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Src);
-
- // Assume that all loads of legal types cost 1.
- return LT.first;
-}
-
-unsigned
-VectorTargetTransformImpl::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
- ArrayRef<Type*> Tys) const {
- // assume that we need to scalarize this intrinsic.
- unsigned ScalarizationCost = 0;
- unsigned ScalarCalls = 1;
- if (RetTy->isVectorTy()) {
- ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
- ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
- }
- for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
- if (Tys[i]->isVectorTy()) {
- ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
- ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
- }
- }
- return ScalarCalls + ScalarizationCost;
-}
-
-unsigned
-VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const {
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Tp);
- return LT.first;
-}
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index 47489bb06c..54204d4b63 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -1,7 +1,6 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMX86AsmParser
- X86AsmLexer.cpp
X86AsmParser.cpp
)
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
deleted file mode 100644
index b12399d447..0000000000
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-//===-- X86AsmLexer.cpp - Tokenize X86 assembly to AsmTokens --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/X86BaseInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCTargetAsmLexer.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-namespace {
-
-class X86AsmLexer : public MCTargetAsmLexer {
- const MCAsmInfo &AsmInfo;
-
- bool tentativeIsValid;
- AsmToken tentativeToken;
-
- const AsmToken &lexTentative() {
- tentativeToken = getLexer()->Lex();
- tentativeIsValid = true;
- return tentativeToken;
- }
-
- const AsmToken &lexDefinite() {
- if (tentativeIsValid) {
- tentativeIsValid = false;
- return tentativeToken;
- }
- return getLexer()->Lex();
- }
-
- AsmToken LexTokenATT();
- AsmToken LexTokenIntel();
-protected:
- AsmToken LexToken() {
- if (!Lexer) {
- SetError(SMLoc(), "No MCAsmLexer installed");
- return AsmToken(AsmToken::Error, "", 0);
- }
-
- switch (AsmInfo.getAssemblerDialect()) {
- default:
- SetError(SMLoc(), "Unhandled dialect");
- return AsmToken(AsmToken::Error, "", 0);
- case 0:
- return LexTokenATT();
- case 1:
- return LexTokenIntel();
- }
- }
-public:
- X86AsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
- : MCTargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
- }
-};
-
-} // end anonymous namespace
-
-#define GET_REGISTER_MATCHER
-#include "X86GenAsmMatcher.inc"
-
-AsmToken X86AsmLexer::LexTokenATT() {
- AsmToken lexedToken = lexDefinite();
-
- switch (lexedToken.getKind()) {
- default:
- return lexedToken;
- case AsmToken::Error:
- SetError(Lexer->getErrLoc(), Lexer->getErr());
- return lexedToken;
-
- case AsmToken::Percent: {
- const AsmToken &nextToken = lexTentative();
- if (nextToken.getKind() != AsmToken::Identifier)
- return lexedToken;
-
- if (unsigned regID = MatchRegisterName(nextToken.getString())) {
- lexDefinite();
-
- // FIXME: This is completely wrong when there is a space or other
- // punctuation between the % and the register name.
- StringRef regStr(lexedToken.getString().data(),
- lexedToken.getString().size() +
- nextToken.getString().size());
-
- return AsmToken(AsmToken::Register, regStr,
- static_cast<int64_t>(regID));
- }
-
- // Match register name failed. If this is "db[0-7]", match it as an alias
- // for dr[0-7].
- if (nextToken.getString().size() == 3 &&
- nextToken.getString().startswith("db")) {
- int RegNo = -1;
- switch (nextToken.getString()[2]) {
- case '0': RegNo = X86::DR0; break;
- case '1': RegNo = X86::DR1; break;
- case '2': RegNo = X86::DR2; break;
- case '3': RegNo = X86::DR3; break;
- case '4': RegNo = X86::DR4; break;
- case '5': RegNo = X86::DR5; break;
- case '6': RegNo = X86::DR6; break;
- case '7': RegNo = X86::DR7; break;
- }
-
- if (RegNo != -1) {
- lexDefinite();
-
- // FIXME: This is completely wrong when there is a space or other
- // punctuation between the % and the register name.
- StringRef regStr(lexedToken.getString().data(),
- lexedToken.getString().size() +
- nextToken.getString().size());
- return AsmToken(AsmToken::Register, regStr,
- static_cast<int64_t>(RegNo));
- }
- }
-
-
- return lexedToken;
- }
- }
-}
-
-AsmToken X86AsmLexer::LexTokenIntel() {
- const AsmToken &lexedToken = lexDefinite();
-
- switch(lexedToken.getKind()) {
- default:
- return lexedToken;
- case AsmToken::Error:
- SetError(Lexer->getErrLoc(), Lexer->getErr());
- return lexedToken;
- case AsmToken::Identifier: {
- unsigned regID = MatchRegisterName(lexedToken.getString().lower());
-
- if (regID)
- return AsmToken(AsmToken::Register,
- lexedToken.getString(),
- static_cast<int64_t>(regID));
- return lexedToken;
- }
- }
-}
-
-extern "C" void LLVMInitializeX86AsmLexer() {
- RegisterMCAsmLexer<X86AsmLexer> X(TheX86_32Target);
- RegisterMCAsmLexer<X86AsmLexer> Y(TheX86_64Target);
-}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index fdb7583ed0..5ce258ed0f 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -168,6 +168,7 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
SMLoc OffsetOfLoc;
+ bool AddressOf;
union {
struct {
@@ -340,6 +341,10 @@ struct X86Operand : public MCParsedAsmOperand {
return OffsetOfLoc.getPointer();
}
+ bool needAddressOf() const {
+ return AddressOf;
+ }
+
bool needSizeDirective() const {
assert(Kind == Memory && "Invalid access!");
return Mem.NeedSizeDir;
@@ -463,7 +468,7 @@ struct X86Operand : public MCParsedAsmOperand {
}
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
- SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size() - 1);
+ SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
@@ -471,9 +476,11 @@ struct X86Operand : public MCParsedAsmOperand {
}
static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
+ bool AddressOf = false,
SMLoc OffsetOfLoc = SMLoc()) {
X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
Res->Reg.RegNo = RegNo;
+ Res->AddressOf = AddressOf;
Res->OffsetOfLoc = OffsetOfLoc;
return Res;
}
@@ -488,7 +495,7 @@ struct X86Operand : public MCParsedAsmOperand {
/// Create an absolute memory operand.
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, bool NeedSizeDir = false){
+ unsigned Size = 0, bool NeedSizeDir = false) {
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -497,6 +504,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
Res->Mem.NeedSizeDir = NeedSizeDir;
+ Res->AddressOf = false;
return Res;
}
@@ -520,6 +528,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
Res->Mem.NeedSizeDir = NeedSizeDir;
+ Res->AddressOf = false;
return Res;
}
};
@@ -558,10 +567,12 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
Parser.Lex(); // Eat percent token.
const AsmToken &Tok = Parser.getTok();
+ EndLoc = Tok.getEndLoc();
+
if (Tok.isNot(AsmToken::Identifier)) {
if (isParsingIntelSyntax()) return true;
return Error(StartLoc, "invalid register name",
- SMRange(StartLoc, Tok.getEndLoc()));
+ SMRange(StartLoc, EndLoc));
}
RegNo = MatchRegisterName(Tok.getString());
@@ -582,13 +593,12 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
X86II::isX86_64ExtendedReg(RegNo))
return Error(StartLoc, "register %"
+ Tok.getString() + " is only available in 64-bit mode",
- SMRange(StartLoc, Tok.getEndLoc()));
+ SMRange(StartLoc, EndLoc));
}
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
RegNo = X86::ST0;
- EndLoc = Tok.getLoc();
Parser.Lex(); // Eat 'st'
// Check to see if we have '(4)' after %st.
@@ -615,11 +625,13 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
if (getParser().Lex().isNot(AsmToken::RParen))
return Error(Parser.getTok().getLoc(), "expected ')'");
- EndLoc = Tok.getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat ')'
return false;
}
+ EndLoc = Parser.getTok().getEndLoc();
+
// If this is "db[0-7]", match it as an alias
// for dr[0-7].
if (RegNo == 0 && Tok.getString().size() == 3 &&
@@ -636,7 +648,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
}
if (RegNo != 0) {
- EndLoc = Tok.getLoc();
+ EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat it.
return false;
}
@@ -645,10 +657,9 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
if (RegNo == 0) {
if (isParsingIntelSyntax()) return true;
return Error(StartLoc, "invalid register name",
- SMRange(StartLoc, Tok.getEndLoc()));
+ SMRange(StartLoc, EndLoc));
}
- EndLoc = Tok.getEndLoc();
Parser.Lex(); // Eat identifier token.
return false;
}
@@ -677,7 +688,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
unsigned Size) {
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
const AsmToken &Tok = Parser.getTok();
- SMLoc Start = Tok.getLoc(), End;
+ SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
// Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ]
@@ -693,9 +704,9 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
// Handle '[' 'symbol' ']'
if (getParser().ParseExpression(Disp, End)) return 0;
if (getLexer().isNot(AsmToken::RBrac))
- return ErrorOperand(Start, "Expected ']' token!");
+ return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
+ End = Parser.getTok().getEndLoc();
Parser.Lex();
- End = Tok.getLoc();
return X86Operand::CreateMem(Disp, Start, End, Size);
}
} else if (getLexer().is(AsmToken::Integer)) {
@@ -704,8 +715,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
SMLoc Loc = Tok.getLoc();
if (getLexer().is(AsmToken::RBrac)) {
// Handle '[' number ']'
+ End = Parser.getTok().getEndLoc();
Parser.Lex();
- End = Tok.getLoc();
const MCExpr *Disp = MCConstantExpr::Create(Val, getContext());
if (SegReg)
return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale,
@@ -726,8 +737,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
bool ExpectRBrac = true;
if (getLexer().is(AsmToken::RBrac)) {
ExpectRBrac = false;
+ End = Parser.getTok().getEndLoc();
Parser.Lex();
- End = Tok.getLoc();
}
if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) ||
@@ -753,18 +764,18 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
return ErrorOperand(PlusLoc, "unexpected token after +");
} else if (getLexer().is(AsmToken::Identifier)) {
// This could be an index register or a displacement expression.
- End = Tok.getLoc();
if (!IndexReg)
ParseRegister(IndexReg, Start, End);
- else if (getParser().ParseExpression(Disp, End)) return 0;
+ else if (getParser().ParseExpression(Disp, End))
+ return 0;
}
}
// Parse ][ as a plus.
if (getLexer().is(AsmToken::RBrac)) {
ExpectRBrac = false;
+ End = Parser.getTok().getEndLoc();
Parser.Lex();
- End = Tok.getLoc();
if (getLexer().is(AsmToken::LBrac)) {
ExpectRBrac = true;
Parser.Lex();
@@ -772,15 +783,15 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
return 0;
}
} else if (ExpectRBrac) {
- if (getParser().ParseExpression(Disp, End))
- return 0;
+ if (getParser().ParseExpression(Disp, End))
+ return 0;
}
if (ExpectRBrac) {
if (getLexer().isNot(AsmToken::RBrac))
return ErrorOperand(End, "expected ']' token!");
+ End = Parser.getTok().getEndLoc();
Parser.Lex();
- End = Tok.getLoc();
}
// Parse the dot operator (e.g., [ebx].foo.bar).
@@ -790,12 +801,11 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
if (ParseIntelDotOperator(Disp, &NewDisp, Err))
return ErrorOperand(Tok.getLoc(), Err);
+ End = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the field.
Disp = NewDisp;
}
- End = Tok.getLoc();
-
// handle [-42]
if (!BaseReg && !IndexReg)
return X86Operand::CreateMem(Disp, Start, End, Size);
@@ -831,28 +841,43 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
}
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
- if (getParser().ParseExpression(Disp, End)) return 0;
- End = Parser.getTok().getLoc();
+ if (getParser().ParseExpression(Disp, End))
+ return 0;
bool NeedSizeDir = false;
- if (!Size && isParsingInlineAsm()) {
+ bool IsVarDecl = false;
+ if (isParsingInlineAsm()) {
if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
const MCSymbol &Sym = SymRef->getSymbol();
// FIXME: The SemaLookup will fail if the name is anything other then an
// identifier.
// FIXME: Pass a valid SMLoc.
- SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size);
+ unsigned tSize;
+ SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tSize,
+ IsVarDecl);
+ if (!Size)
+ Size = tSize;
NeedSizeDir = Size > 0;
}
}
if (!isParsingInlineAsm())
return X86Operand::CreateMem(Disp, Start, End, Size);
- else
+ else {
+ // If this is not a VarDecl then assume it is a FuncDecl or some other label
+ // reference. We need an 'r' constraint here, so we need to create register
+ // operand to ensure proper matching. Just pick a GPR based on the size of
+ // a pointer.
+ if (!IsVarDecl) {
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
+ }
+
// When parsing inline assembly we set the base register to a non-zero value
// as we don't know the actual value at this time. This is necessary to
// get the matching correct in some cases.
return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
/*Scale*/1, Start, End, Size, NeedSizeDir);
+ }
}
/// Parse the '.' operator.
@@ -921,8 +946,6 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
if (getParser().ParseExpression(Val, End))
return ErrorOperand(Start, "Unable to parse expression!");
- End = Parser.getTok().getLoc();
-
// Don't emit the offset operator.
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
@@ -930,7 +953,8 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
// register operand to ensure proper matching. Just pick a GPR based on
// the size of a pointer.
unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
- return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc);
+ return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
+ OffsetOfLoc);
}
/// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or
@@ -947,15 +971,15 @@ X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) {
if (getParser().ParseExpression(Val, End))
return 0;
- End = Parser.getTok().getLoc();
-
unsigned Size = 0;
if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
const MCSymbol &Sym = SymRef->getSymbol();
// FIXME: The SemaLookup will fail if the name is anything other then an
// identifier.
// FIXME: Pass a valid SMLoc.
- if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size))
+ bool IsVarDecl;
+ if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size,
+ IsVarDecl))
return ErrorOperand(Start, "Unable to lookup TYPE of expr!");
Size /= 8; // Size is in terms of bits, but we want bytes in the context.
@@ -995,7 +1019,6 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
getLexer().is(AsmToken::Minus)) {
const MCExpr *Val;
if (!getParser().ParseExpression(Val, End)) {
- End = Parser.getTok().getLoc();
return X86Operand::CreateImm(Val, Start, End);
}
}
@@ -1006,7 +1029,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
// If this is a segment register followed by a ':', then this is the start
// of a memory reference, otherwise this is a normal register reference.
if (getLexer().isNot(AsmToken::Colon))
- return X86Operand::CreateReg(RegNo, Start, Parser.getTok().getLoc());
+ return X86Operand::CreateReg(RegNo, Start, End);
getParser().Lex(); // Eat the colon.
return ParseIntelMemOperand(RegNo, Start);
@@ -1183,7 +1206,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
return 0;
}
- SMLoc MemEnd = Parser.getTok().getLoc();
+ SMLoc MemEnd = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ')'.
// If we have both a base register and an index register make sure they are
@@ -2021,7 +2044,7 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getParser().ParseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+ getParser().getStreamer().EmitValue(Value, Size);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -2059,14 +2082,10 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
return false;
}
-
-extern "C" void LLVMInitializeX86AsmLexer();
-
// Force static initialization.
extern "C" void LLVMInitializeX86AsmParser() {
RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
- LLVMInitializeX86AsmLexer();
}
#define GET_REGISTER_MATCHER
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 1b2ffb01ad..af1381ebdf 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -10,7 +10,6 @@ tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM X86GenEDInfo.inc -gen-enhanced-disassembly-info)
add_public_tablegen_target(X86CommonTableGen)
set(sources
@@ -27,11 +26,13 @@ set(sources
X86MCInstLower.cpp
X86MachineFunctionInfo.cpp
X86NaClRewritePass.cpp
+ X86PadShortFunction.cpp
X86RegisterInfo.cpp
X86SelectionDAGInfo.cpp
X86Subtarget.cpp
X86TargetMachine.cpp
X86TargetObjectFile.cpp
+ X86TargetTransformInfo.cpp
X86VZeroUpper.cpp
)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index ed61c01130..ca6f80ce3e 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -16,7 +16,6 @@
#include "X86Disassembler.h"
#include "X86DisassemblerDecoder.h"
-#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
@@ -32,7 +31,6 @@
#include "X86GenRegisterInfo.inc"
#define GET_INSTRINFO_ENUM
#include "X86GenInstrInfo.inc"
-#include "X86GenEDInfo.inc"
using namespace llvm;
using namespace llvm::X86Disassembler;
@@ -83,10 +81,6 @@ X86GenericDisassembler::~X86GenericDisassembler() {
delete MII;
}
-const EDInstInfo *X86GenericDisassembler::getEDInfo() const {
- return instInfoX86;
-}
-
/// regionReader - a callback function that wraps the readByte method from
/// MemoryObject.
///
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 981701f527..b92427a7e9 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -95,8 +95,6 @@ class MCSubtargetInfo;
class MemoryObject;
class raw_ostream;
-struct EDInstInfo;
-
namespace X86Disassembler {
/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
@@ -122,8 +120,6 @@ public:
raw_ostream &vStream,
raw_ostream &cStream) const;
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
private:
DisassemblerMode fMode;
};
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 8346b3392d..1926d4de29 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -114,7 +114,7 @@ public:
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const;
void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
@@ -256,7 +256,7 @@ bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
- const MCInstFragment *DF,
+ const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const {
// Relax if the value is too big for a (signed) i8.
return int64_t(Value) != int64_t(int8_t(Value));
@@ -280,9 +280,9 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
Res.setOpcode(RelaxedOp);
}
-/// writeNopData - Write optimal nops to the output file for the \p Count
-/// bytes. This returns the number of bytes written. It may return 0 if
-/// the \p Count is more than the maximum optimal nops.
+/// \brief Write a sequence of optimal nops to the output, covering \p Count
+/// bytes.
+/// \return - true on success, false on failure
bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
static const uint8_t Nops[10][10] = {
// nop
diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
index 29d87ba2c6..9acaf68c82 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
@@ -46,8 +46,7 @@ unsigned DemoteRegTo32_(unsigned RegIn);
static void EmitDirectCall(const MCOperand &Op, bool Is64Bit,
MCStreamer &Out) {
- Out.EmitBundleAlignEnd();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(true);
MCInst CALLInst;
CALLInst.setOpcode(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
@@ -63,10 +62,7 @@ static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall,
const unsigned Reg32 = Op.getReg();
const unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64);
- if (IsCall)
- Out.EmitBundleAlignEnd();
-
- Out.EmitBundleLock();
+ Out.EmitBundleLock(IsCall);
MCInst ANDInst;
ANDInst.setOpcode(X86::AND32ri8);
@@ -160,7 +156,7 @@ static void EmitRegFix(unsigned Reg64, MCStreamer &Out) {
static void EmitSPArith(unsigned Opc, const MCOperand &ImmOp,
MCStreamer &Out) {
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
MCInst Tmp;
Tmp.setOpcode(Opc);
@@ -174,7 +170,7 @@ static void EmitSPArith(unsigned Opc, const MCOperand &ImmOp,
}
static void EmitSPAdj(const MCOperand &ImmOp, MCStreamer &Out) {
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
MCInst Tmp;
Tmp.setOpcode(X86::LEA64_32r);
@@ -286,8 +282,7 @@ static bool SandboxMemoryRef(MCInst *Inst,
}
static void EmitTLSAddr32(const MCInst &Inst, MCStreamer &Out) {
- Out.EmitBundleAlignEnd();
- Out.EmitBundleLock();
+ Out.EmitBundleLock(true);
MCInst LeaInst;
LeaInst.setOpcode(X86::LEA32r);
@@ -315,7 +310,7 @@ static void EmitTLSAddr32(const MCInst &Inst, MCStreamer &Out) {
static void EmitREST(const MCInst &Inst, unsigned Reg32,
bool IsMem, MCStreamer &Out) {
unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64);
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
if (!IsMem) {
EmitMoveRegReg(false, Reg32, Inst.getOperand(0).getReg(), Out);
} else {
@@ -479,7 +474,7 @@ bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) {
PrefixSaved = 0;
if (PrefixLocal || !UseZeroBasedSandbox)
- Out.EmitBundleLock();
+ Out.EmitBundleLock(false);
HandleMemoryRefTruncation(&SandboxedInst, IndexOpPosition, Out);
ShortenMemoryRef(&SandboxedInst, IndexOpPosition);
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index f66b203f0d..c67bb944b6 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -257,7 +257,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitX86MCRegisterInfo(X, RA,
X86_MC::getDwarfRegFlavour(TT, false),
- X86_MC::getDwarfRegFlavour(TT, true));
+ X86_MC::getDwarfRegFlavour(TT, true),
+ RA);
X86_MC::InitLLVM2SEHRegisterMapping(X);
return X;
}
@@ -365,7 +366,14 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll);
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ // @LOCALMOD-BEGIN
+ MCStreamer *Streamer = createELFStreamer(Ctx, MAB, _OS, _Emitter,
+ RelaxAll, NoExecStack);
+ if (TheTriple.isOSNaCl())
+ Streamer->EmitBundleAlignMode(5);
+
+ return Streamer;
+ // @LOCALMOD-END
}
static MCInstPrinter *createX86MCInstPrinter(const Target &T,
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index 949661eb99..e518fecf04 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -16,8 +16,7 @@ BUILT_SOURCES = X86GenRegisterInfo.inc X86GenInstrInfo.inc \
X86GenAsmWriter.inc X86GenAsmMatcher.inc \
X86GenAsmWriter1.inc X86GenDAGISel.inc \
X86GenDisassemblerTables.inc X86GenFastISel.inc \
- X86GenCallingConv.inc X86GenSubtargetInfo.inc \
- X86GenEDInfo.inc
+ X86GenCallingConv.inc X86GenSubtargetInfo.inc
DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 6a8a4fdf25..b4285a0718 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1568,43 +1568,6 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
Core 2, and "Generic"
//===---------------------------------------------------------------------===//
-
-Testcase:
-int a(int x) { return (x & 127) > 31; }
-
-Current output:
- movl 4(%esp), %eax
- andl $127, %eax
- cmpl $31, %eax
- seta %al
- movzbl %al, %eax
- ret
-
-Ideal output:
- xorl %eax, %eax
- testl $96, 4(%esp)
- setne %al
- ret
-
-This should definitely be done in instcombine, canonicalizing the range
-condition into a != condition. We get this IR:
-
-define i32 @a(i32 %x) nounwind readnone {
-entry:
- %0 = and i32 %x, 127 ; <i32> [#uses=1]
- %1 = icmp ugt i32 %0, 31 ; <i1> [#uses=1]
- %2 = zext i1 %1 to i32 ; <i32> [#uses=1]
- ret i32 %2
-}
-
-Instcombine prefers to strength reduce relational comparisons to equality
-comparisons when possible, this should be another case of that. This could
-be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it
-looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already
-be redesigned to use ComputeMaskedBits and friends.
-
-
-//===---------------------------------------------------------------------===//
Testcase:
int x(int a) { return (a&0xf0)>>4; }
diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index 52a67f763b..815d23588f 100644
--- a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "X86.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index b8b0a5a05b..8813aa836a 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -68,6 +68,13 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
///
FunctionPass *createEmitX86CodeToMemory();
+/// \brief Creates an X86-specific Target Transformation Info pass.
+ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM);
+
+/// createX86PadShortFunctions - Return a pass that pads short functions
+/// with NOOPs. This will prevent a stall when returning on the Atom.
+FunctionPass *createX86PadShortFunctions();
+
} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index e3c22d9c3b..3ab2899365 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -123,8 +123,11 @@ def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
- "HasSlowDivide", "true",
- "Use small divide for positive values less than 256">;
+ "HasSlowDivide", "true",
+ "Use small divide for positive values less than 256">;
+def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
+ "PadShortFunctions", "true",
+ "Pad short functions">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -155,7 +158,8 @@ def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
-def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem]>;
def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B,
@@ -166,7 +170,7 @@ def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
- FeatureSlowDivide]>;
+ FeatureSlowDivide, FeaturePadShortFunctions]>;
// "Arrandale" along with corei3 and corei5
def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B,
FeatureSlowBTMem, FeatureFastUAMem,
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 9cbc4ea649..24f94725e0 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -20,26 +20,26 @@
#include "X86TargetMachine.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Module.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -572,7 +572,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
MCSA_IndirectSymbol);
// hlt; hlt; hlt; hlt; hlt hlt = 0xf4.
const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4";
- OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
+ OutStreamer.EmitBytes(StringRef(HltInsts, 5));
}
Stubs.clear();
@@ -598,7 +598,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// .long 0
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(0, 4/*size*/);
else
// Internal to current translation unit.
//
@@ -607,8 +607,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// using NLPs. However, sometimes the types are local to the file. So
// we need to fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
- OutContext),
- 4/*size*/, 0/*addrspace*/);
+ OutContext), 4/*size*/);
}
Stubs.clear();
OutStreamer.AddBlankLine();
@@ -625,8 +624,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// .long _foo
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
- OutContext),
- 4/*size*/, 0/*addrspace*/);
+ OutContext), 4/*size*/);
}
Stubs.clear();
OutStreamer.AddBlankLine();
@@ -692,7 +690,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
name += ",DATA";
else
name += ",data";
- OutStreamer.EmitBytes(name, 0);
+ OutStreamer.EmitBytes(name);
}
for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
@@ -701,7 +699,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
else
name = " -export:";
name += DLLExportedFns[i]->getName();
- OutStreamer.EmitBytes(name, 0);
+ OutStreamer.EmitBytes(name);
}
}
}
@@ -721,7 +719,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
- TD->getPointerSize(), 0);
+ TD->getPointerSize());
}
Stubs.clear();
}
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 8dbff74e51..0a0a4c3f4d 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -25,7 +25,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 3201eb9cc0..670de2fa2e 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -19,19 +19,19 @@
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
@@ -849,8 +849,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
- Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -1661,9 +1660,9 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
CallingConv::ID CC = CS.getCallingConv();
if (CC == CallingConv::Fast || CC == CallingConv::GHC)
return 0;
- if (!CS.paramHasAttr(1, Attributes::StructRet))
+ if (!CS.paramHasAttr(1, Attribute::StructRet))
return 0;
- if (CS.paramHasAttr(1, Attributes::InReg))
+ if (CS.paramHasAttr(1, Attribute::InReg))
return 0;
return 4;
}
@@ -1701,8 +1700,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(),
- Outs, TLI);
+ GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI);
bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
*FuncInfo.MF, FTy->isVarArg(),
Outs, FTy->getContext());
@@ -1742,12 +1740,12 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
Value *ArgVal = *i;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attributes::SExt))
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attributes::ZExt))
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
- if (CS.paramHasAttr(AttrInd, Attributes::ByVal)) {
+ if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
PointerType *Ty = cast<PointerType>(ArgVal->getType());
Type *ElementTy = Ty->getElementType();
unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
@@ -1761,9 +1759,9 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
return false;
}
- if (CS.paramHasAttr(AttrInd, Attributes::InReg))
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg))
Flags.setInReg();
- if (CS.paramHasAttr(AttrInd, Attributes::Nest))
+ if (CS.paramHasAttr(AttrInd, Attribute::Nest))
Flags.setNest();
// If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
@@ -2047,17 +2045,17 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
ComputeValueVTs(TLI, I->getType(), RetTys);
for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
EVT VT = RetTys[i];
- EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
+ MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
for (unsigned j = 0; j != NumRegs; ++j) {
ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT.getSimpleVT();
+ MyFlags.VT = RegisterVT;
MyFlags.Used = !CS.getInstruction()->use_empty();
- if (CS.paramHasAttr(0, Attributes::SExt))
+ if (CS.paramHasAttr(0, Attribute::SExt))
MyFlags.Flags.setSExt();
- if (CS.paramHasAttr(0, Attributes::ZExt))
+ if (CS.paramHasAttr(0, Attribute::ZExt))
MyFlags.Flags.setZExt();
- if (CS.paramHasAttr(0, Attributes::InReg))
+ if (CS.paramHasAttr(0, Attribute::InReg))
MyFlags.Flags.setInReg();
Ins.push_back(MyFlags);
}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 16197e09e9..0585b43a46 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -36,7 +36,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/InlineAsm.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 48dc67da38..3ae70c6780 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -23,8 +23,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Function.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
@@ -627,6 +627,22 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
return CompactUnwindEncoding;
}
+/// usesTheStack - This function checks if any of the users of EFLAGS
+/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
+/// to use the stack, and if we don't adjust the stack we clobber the first
+/// frame index.
+/// See X86InstrInfo::copyPhysReg.
+static bool usesTheStack(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS),
+ re = MRI.reg_end(); ri != re; ++ri)
+ if (ri->isCopy())
+ return true;
+
+ return false;
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -675,12 +691,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
- // stack pointer (we fit in the Red Zone).
- if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attributes::NoRedZone) &&
+ // stack pointer (we fit in the Red Zone). We also check that we don't
+ // push and pop from the stack.
+ if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoRedZone) &&
!RegInfo->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
!IsWin64 && // Win64 has no Red Zone
+ !usesTheStack(MF) && // Don't push and pop.
!MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
@@ -1161,7 +1180,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
MachineInstr *NewMI = prior(MBBI);
- NewMI->copyImplicitOps(MBBI);
+ NewMI->copyImplicitOps(MF, MBBI);
// Delete the pseudo instruction TCRETURN.
MBB.erase(MBBI);
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index a35b9e0e1f..70cfa7d516 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -25,15 +25,15 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
using namespace llvm;
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
@@ -435,6 +435,11 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
if (!Chain.getNumOperands())
return false;
+ // Since we are not checking for AA here, conservatively abort if the chain
+ // writes to memory. It's not safe to move the callee (a load) across a store.
+ if (isa<MemSDNode>(Chain.getNode()) &&
+ cast<MemSDNode>(Chain.getNode())->writeMem())
+ return false;
if (Chain.getOperand(0).getNode() == Callee.getNode())
return true;
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
@@ -446,8 +451,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.
- OptForSize = MF->getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize);
+ OptForSize = MF->getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
@@ -457,7 +462,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
!Subtarget->isTargetNaCl() && // @LOCALMOD: We can't fold load/call
(N->getOpcode() == X86ISD::CALL ||
(N->getOpcode() == X86ISD::TC_RETURN &&
- // Only does this if load can be foled into TC_RETURN.
+ // Only does this if load can be folded into TC_RETURN.
(Subtarget->is64Bit() ||
getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
/// Also try moving call address load from outside callseq_start to just
@@ -1064,8 +1069,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM.IndexReg = ShVal;
return false;
}
- break;
}
+ break;
case ISD::SRL: {
// Scale must not be used already.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index e05b43add8..61169626f1 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -23,7 +23,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/VariadicFunction.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -31,14 +30,15 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -882,6 +882,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
@@ -1058,6 +1059,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
}
+ setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
@@ -1099,6 +1102,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
@@ -1121,6 +1125,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
setOperationAction(ISD::SRA, MVT::v32i8, Custom);
+ setOperationAction(ISD::SDIV, MVT::v16i16, Custom);
+
setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
@@ -1135,6 +1141,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
+
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f64, Legal);
@@ -1169,6 +1182,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SHL, MVT::v8i32, Legal);
setOperationAction(ISD::SRA, MVT::v8i32, Legal);
+
+ setOperationAction(ISD::SDIV, MVT::v8i32, Custom);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
@@ -1250,7 +1265,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
-
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
//
@@ -1333,13 +1347,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setPrefFunctionAlignment(4); // 2^4 bytes.
}
-
EVT X86TargetLowering::getSetCCResultType(EVT VT) const {
if (!VT.isVector()) return MVT::i8;
return VT.changeVectorElementTypeToInteger();
}
-
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
@@ -1389,22 +1401,22 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If
-/// 'IsZeroVal' is true, that means it's safe to return a
-/// non-scalar-integer type, e.g. empty string source, constant, or loaded
-/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-/// constant so it does not need to be loaded.
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal,
+ bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
const Function *F = MF.getFunction();
- if (IsZeroVal &&
- !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
+ if ((!IsMemset || ZeroMemset) &&
+ !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
@@ -1432,6 +1444,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::i32;
}
+bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
+ if (VT == MVT::f32)
+ return X86ScalarSSEf32;
+ else if (VT == MVT::f64)
+ return X86ScalarSSEf64;
+ return true;
+}
+
bool
X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
if (Fast)
@@ -1492,10 +1512,10 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
// FIXME: Why this routine is here? Move to RegInfo!
std::pair<const TargetRegisterClass*, uint8_t>
-X86TargetLowering::findRepresentativeClass(EVT VT) const{
+X86TargetLowering::findRepresentativeClass(MVT VT) const{
const TargetRegisterClass *RRC = 0;
uint8_t Cost = 1;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
@@ -1537,7 +1557,6 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
return true;
}
-
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1718,8 +1737,8 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return true;
}
-EVT
-X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+MVT
+X86TargetLowering::getTypeForExtArgOrReturn(MVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT;
// TODO: Is this also valid on 32-bit?
@@ -1728,7 +1747,7 @@ X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
else
ReturnMVT = MVT::i32;
- EVT MinVT = getRegisterType(Context, ReturnMVT);
+ MVT MinVT = getRegisterType(ReturnMVT);
return VT.bitsLT(MinVT) ? MinVT : VT;
}
@@ -1750,7 +1769,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
EVT CopyVT = VA.getValVT();
@@ -1794,7 +1813,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
return Chain;
}
-
//===----------------------------------------------------------------------===//
// C & StdCall & Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
@@ -2008,10 +2026,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (VA.isExtInLoc()) {
// Handle MMX values passed in XMM regs.
- if (RegVT.isVector()) {
- ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
- ArgValue);
- } else
+ if (RegVT.isVector())
+ ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
+ else
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
}
} else {
@@ -2089,8 +2106,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
- bool NoImplicitFloatOps = Fn->getFnAttributes().
- hasAttribute(Attributes::NoImplicitFloat);
+ bool NoImplicitFloatOps = Fn->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
@@ -2568,8 +2585,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
OpFlags = X86II::MO_DARWIN_STUB;
} else if (Subtarget->isPICStyleRIPRel() &&
isa<Function>(GV) &&
- cast<Function>(GV)->getFnAttributes().
- hasAttribute(Attributes::NonLazyBind)) {
+ cast<Function>(GV)->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NonLazyBind)) {
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
// at the cost of eager binding (and one extra byte of encoding).
@@ -2687,7 +2705,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ins, dl, DAG, InVals);
}
-
//===----------------------------------------------------------------------===//
// Fast Calling Convention (tail call) implementation
//===----------------------------------------------------------------------===//
@@ -2996,7 +3013,6 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
return X86::createFastISel(funcInfo, libInfo);
}
-
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
@@ -3108,7 +3124,6 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
}
-
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement) {
// Offset should fit into 32 bit immediate field.
@@ -6759,8 +6774,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool HasFp256 = Subtarget->hasFp256();
bool HasInt256 = Subtarget->hasInt256();
MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize);
+ bool OptForSize = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
@@ -7021,7 +7036,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1,
getShuffleCLImmediate(SVOp), DAG);
-
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
// lower it into other known shuffles. FIXME: this isn't true yet, but
@@ -7123,7 +7137,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
return SDValue();
}
-
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
@@ -7488,7 +7501,6 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->is64Bit()) {
@@ -7932,7 +7944,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("TLS not implemented for this target.");
}
-
/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values
/// and take a 2 x i32 value to shift plus a shift amount.
SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
@@ -8381,12 +8392,71 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co
}
}
-SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = Op->getValueType(0);
+ SDValue In = Op->getOperand(0);
+ EVT InVT = In.getValueType();
+ DebugLoc dl = Op->getDebugLoc();
+
+ // Optimize vectors in AVX mode:
+ //
+ // v8i16 -> v8i32
+ // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
+ // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
+ // Concat upper and lower parts.
+ //
+ // v4i32 -> v4i64
+ // Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
+ // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
+ // Concat upper and lower parts.
+ //
+
+ if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
+ ((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
+ return SDValue();
+
+ if (Subtarget->hasInt256())
+ return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In);
+
+ SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
+ SDValue Undef = DAG.getUNDEF(InVT);
+ bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
+ SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
+ SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
+
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorNumElements()/2);
+
+ OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+}
+
+SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Subtarget->hasFp256()) {
+ SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
+ if (Res.getNode())
+ return Res;
+ }
+
+ return SDValue();
+}
+SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue In = Op.getOperand(0);
EVT SVT = In.getValueType();
+ if (Subtarget->hasFp256()) {
+ SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
+ if (Res.getNode())
+ return Res;
+ }
+
if (!VT.is256BitVector() || !SVT.is128BitVector() ||
VT.getVectorNumElements() != SVT.getVectorNumElements())
return SDValue();
@@ -8400,7 +8470,9 @@ SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const
SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
- DAG.getVectorShuffle(MVT::v8i16, DL, In, DAG.getUNDEF(MVT::v8i16), &Mask[0]));
+ DAG.getVectorShuffle(MVT::v8i16, DL, In,
+ DAG.getUNDEF(MVT::v8i16),
+ &Mask[0]));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
}
@@ -8408,19 +8480,109 @@ SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const
SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
- EVT SVT = Op.getOperand(0).getValueType();
+ SDValue In = Op.getOperand(0);
+ EVT SVT = In.getValueType();
- if (!VT.is128BitVector() || !SVT.is256BitVector() ||
- VT.getVectorNumElements() != SVT.getVectorNumElements())
+ if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
+ // On AVX2, v4i64 -> v4i32 becomes VPERMD.
+ if (Subtarget->hasInt256()) {
+ static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
+ In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In);
+ In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
+ ShufMask);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // On AVX, v4i64 -> v4i32 becomes a sequence that uses PSHUFD and MOVLHPS.
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(0));
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(2));
+
+ OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+
+ // The PSHUFD mask:
+ static const int ShufMask1[] = {0, 2, 0, 0};
+ SDValue Undef = DAG.getUNDEF(VT);
+ OpLo = DAG.getVectorShuffle(VT, DL, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(VT, DL, OpHi, Undef, ShufMask1);
+
+ // The MOVLHPS mask:
+ static const int ShufMask2[] = {0, 1, 4, 5};
+ return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
+ }
+
+ if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) {
+ // On AVX2, v8i32 -> v8i16 becomed PSHUFB.
+ if (Subtarget->hasInt256()) {
+ In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
+
+ SmallVector<SDValue,32> pshufbMask;
+ for (unsigned i = 0; i < 2; ++i) {
+ pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
+ for (unsigned j = 0; j < 8; ++j)
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ }
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8,
+ &pshufbMask[0], 32);
+ In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
+ In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In);
+
+ static const int ShufMask[] = {0, 2, -1, -1};
+ In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64),
+ &ShufMask[0]);
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::BITCAST, DL, VT, In);
+ }
+
+ SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ DAG.getIntPtrConstant(0));
+
+ SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ DAG.getIntPtrConstant(4));
+
+ OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi);
+
+ // The PSHUFB mask:
+ static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1};
+
+ SDValue Undef = DAG.getUNDEF(MVT::v16i8);
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1);
+
+ OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
+ OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+
+ // The MOVLHPS Mask:
+ static const int ShufMask2[] = {0, 1, 4, 5};
+ SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res);
+ }
+
+ // Handle truncation of V256 to V128 using shuffles.
+ if (!VT.is128BitVector() || !SVT.is256BitVector())
return SDValue();
- assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
+ assert(VT.getVectorNumElements() != SVT.getVectorNumElements() &&
+ "Invalid op");
+ assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
NumElems * 2);
- SDValue In = Op.getOperand(0);
SmallVector<int, 16> MaskVec(NumElems * 2, -1);
// Prepare truncation shuffle mask
for (unsigned i = 0; i != NumElems; ++i)
@@ -9179,7 +9341,6 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
}
-
SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond;
SDValue Op0 = Op.getOperand(0);
@@ -9287,8 +9448,28 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::v2i64) {
if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
return SDValue();
- if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
- return SDValue();
+ if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
+ // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
+ // pcmpeqd + pshufd + pand.
+ assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
+
+ // First cast everything to the right type,
+ Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+
+ // Do the compare.
+ SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
+
+ // Make sure the lower and upper halves are both all-ones.
+ const int Mask[] = { 1, 0, 3, 2 };
+ SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
+ Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
+
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, MVT::v4i32);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
}
// Since SSE has no unsigned integer comparisons, we need to flip the sign
@@ -9544,6 +9725,53 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
}
+SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op->getValueType(0);
+ SDValue In = Op->getOperand(0);
+ EVT InVT = In.getValueType();
+ DebugLoc dl = Op->getDebugLoc();
+
+ if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
+ (VT != MVT::v8i32 || InVT != MVT::v8i16))
+ return SDValue();
+
+ if (Subtarget->hasInt256())
+ return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In);
+
+ // Optimize vectors in AVX mode
+ // Sign extend v8i16 to v8i32 and
+ // v4i32 to v4i64
+ //
+ // Divide input vector into two parts
+ // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+ // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
+ // concat the vectors to original VT
+
+ unsigned NumElems = InVT.getVectorNumElements();
+ SDValue Undef = DAG.getUNDEF(InVT);
+
+ SmallVector<int,8> ShufMask1(NumElems, -1);
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ ShufMask1[i] = i;
+
+ SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]);
+
+ SmallVector<int,8> ShufMask2(NumElems, -1);
+ for (unsigned i = 0; i != NumElems/2; ++i)
+ ShufMask2[i] = i + NumElems/2;
+
+ SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
+
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ VT.getVectorNumElements()/2);
+
+ OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
+ OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+}
+
// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
// from the AND / OR.
@@ -9832,7 +10060,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
Chain, Dest, CC, Cond);
}
-
// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
// Calls to _alloca is needed to probe the stack when allocating more than 4k
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
@@ -9998,8 +10225,9 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// Sanity Check: Make sure using fp_offset makes sense.
assert(!getTargetMachine().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
- .getFunction()->getFnAttributes()
- .hasAttribute(Attributes::NoImplicitFloat)) &&
+ .getFunction()->getAttributes()
+ .hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat)) &&
Subtarget->hasSSE1());
}
@@ -10275,6 +10503,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ // SSE2/AVX2 sub with unsigned saturation intrinsics
+ case Intrinsic::x86_sse2_psubus_b:
+ case Intrinsic::x86_sse2_psubus_w:
+ case Intrinsic::x86_avx2_psubus_b:
+ case Intrinsic::x86_avx2_psubus_w:
+ return DAG.getNode(X86ISD::SUBUS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
// SSE3/AVX horizontal add/sub intrinsics
case Intrinsic::x86_sse3_hadd_ps:
case Intrinsic::x86_sse3_hadd_pd:
@@ -10324,6 +10560,100 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), Op.getOperand(2));
}
+ // SSE2/SSE41/AVX2 integer max/min intrinsics.
+ case Intrinsic::x86_sse2_pmaxu_b:
+ case Intrinsic::x86_sse41_pmaxuw:
+ case Intrinsic::x86_sse41_pmaxud:
+ case Intrinsic::x86_avx2_pmaxu_b:
+ case Intrinsic::x86_avx2_pmaxu_w:
+ case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_sse2_pminu_b:
+ case Intrinsic::x86_sse41_pminuw:
+ case Intrinsic::x86_sse41_pminud:
+ case Intrinsic::x86_avx2_pminu_b:
+ case Intrinsic::x86_avx2_pminu_w:
+ case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_sse41_pmaxsb:
+ case Intrinsic::x86_sse2_pmaxs_w:
+ case Intrinsic::x86_sse41_pmaxsd:
+ case Intrinsic::x86_avx2_pmaxs_b:
+ case Intrinsic::x86_avx2_pmaxs_w:
+ case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_sse41_pminsb:
+ case Intrinsic::x86_sse2_pmins_w:
+ case Intrinsic::x86_sse41_pminsd:
+ case Intrinsic::x86_avx2_pmins_b:
+ case Intrinsic::x86_avx2_pmins_w:
+ case Intrinsic::x86_avx2_pmins_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_pmaxu_b:
+ case Intrinsic::x86_sse41_pmaxuw:
+ case Intrinsic::x86_sse41_pmaxud:
+ case Intrinsic::x86_avx2_pmaxu_b:
+ case Intrinsic::x86_avx2_pmaxu_w:
+ case Intrinsic::x86_avx2_pmaxu_d:
+ Opcode = X86ISD::UMAX;
+ break;
+ case Intrinsic::x86_sse2_pminu_b:
+ case Intrinsic::x86_sse41_pminuw:
+ case Intrinsic::x86_sse41_pminud:
+ case Intrinsic::x86_avx2_pminu_b:
+ case Intrinsic::x86_avx2_pminu_w:
+ case Intrinsic::x86_avx2_pminu_d:
+ Opcode = X86ISD::UMIN;
+ break;
+ case Intrinsic::x86_sse41_pmaxsb:
+ case Intrinsic::x86_sse2_pmaxs_w:
+ case Intrinsic::x86_sse41_pmaxsd:
+ case Intrinsic::x86_avx2_pmaxs_b:
+ case Intrinsic::x86_avx2_pmaxs_w:
+ case Intrinsic::x86_avx2_pmaxs_d:
+ Opcode = X86ISD::SMAX;
+ break;
+ case Intrinsic::x86_sse41_pminsb:
+ case Intrinsic::x86_sse2_pmins_w:
+ case Intrinsic::x86_sse41_pminsd:
+ case Intrinsic::x86_avx2_pmins_b:
+ case Intrinsic::x86_avx2_pmins_w:
+ case Intrinsic::x86_avx2_pmins_d:
+ Opcode = X86ISD::SMIN;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // SSE/SSE2/AVX floating point max/min intrinsics.
+ case Intrinsic::x86_sse_max_ps:
+ case Intrinsic::x86_sse2_max_pd:
+ case Intrinsic::x86_avx_max_ps_256:
+ case Intrinsic::x86_avx_max_pd_256:
+ case Intrinsic::x86_sse_min_ps:
+ case Intrinsic::x86_sse2_min_pd:
+ case Intrinsic::x86_avx_min_ps_256:
+ case Intrinsic::x86_avx_min_pd_256: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse_max_ps:
+ case Intrinsic::x86_sse2_max_pd:
+ case Intrinsic::x86_avx_max_ps_256:
+ case Intrinsic::x86_avx_max_pd_256:
+ Opcode = X86ISD::FMAX;
+ break;
+ case Intrinsic::x86_sse_min_ps:
+ case Intrinsic::x86_sse2_min_pd:
+ case Intrinsic::x86_avx_min_ps_256:
+ case Intrinsic::x86_avx_min_pd_256:
+ Opcode = X86ISD::FMIN;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
// AVX2 variable shift intrinsics
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:
@@ -10391,6 +10721,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
+ case Intrinsic::x86_sse_sqrt_ps:
+ case Intrinsic::x86_sse2_sqrt_pd:
+ case Intrinsic::x86_avx_sqrt_ps_256:
+ case Intrinsic::x86_avx_sqrt_pd_256:
+ return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1));
+
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
// or testp pattern and a setcc for the result.
@@ -10914,7 +11250,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
- if (Attrs.getParamAttributes(Idx).hasAttribute(Attributes::InReg))
+ if (Attrs.hasAttribute(Idx, Attribute::InReg))
// FIXME: should only count parameters that are lowered to integers.
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
@@ -11004,7 +11340,6 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
-
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, 2, 2);
@@ -11037,7 +11372,6 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
DAG.getConstant(1, MVT::i16)),
DAG.getConstant(3, MVT::i16));
-
return DAG.getNode((VT.getSizeInBits() < 16 ?
ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
@@ -11166,17 +11500,43 @@ static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntArith(Op, DAG);
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
+ if (VT == MVT::v4i32) {
+ assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() &&
+ "Should not custom lower when pmuldq is available!");
+
+ // Extract the odd parts.
+ const int UnpackMask[] = { 1, -1, 3, -1 };
+ SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
+ SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
+
+ // Multiply the even parts.
+ SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B);
+ // Now multiply odd parts.
+ SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds);
+
+ Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens);
+ Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds);
+
+ // Merge the two vectors back together with a shuffle. This expands into 2
+ // shuffles.
+ const int ShufMask[] = { 0, 4, 2, 6 };
+ return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
+ }
+
assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
"Only know how to lower V2I64/V4I64 multiply");
- DebugLoc dl = Op.getDebugLoc();
-
// Ahi = psrlqi(a, 32);
// Bhi = psrlqi(b, 32);
//
@@ -11188,9 +11548,6 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
// AhiBlo = psllqi(AhiBlo, 32);
// return AloBlo + AloBhi + AhiBlo;
- SDValue A = Op.getOperand(0);
- SDValue B = Op.getOperand(1);
-
SDValue ShAmt = DAG.getConstant(32, MVT::i32);
SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt);
@@ -11214,6 +11571,49 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
+SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT EltTy = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Lower sdiv X, pow2-const.
+ BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
+ if (!C)
+ return SDValue();
+
+ APInt SplatValue, SplatUndef;
+ unsigned MinSplatBits;
+ bool HasAnyUndefs;
+ if (!C->isConstantSplat(SplatValue, SplatUndef, MinSplatBits, HasAnyUndefs))
+ return SDValue();
+
+ if ((SplatValue != 0) &&
+ (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
+ unsigned lg2 = SplatValue.countTrailingZeros();
+ // Splat the sign bit.
+ SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32);
+ SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG);
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32);
+ SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG);
+ SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
+ SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32);
+ SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG);
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (SplatValue.isNonNegative())
+ return SRA;
+
+ SmallVector<SDValue, 16> V(NumElts, DAG.getConstant(0, EltTy));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts);
+ return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA);
+ }
+ return SDValue();
+}
+
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -11567,7 +11967,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
}
}
-
static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
@@ -11652,7 +12051,6 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
}
-
static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
EVT T = Op.getValueType();
@@ -11821,7 +12219,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG);
- case ISD::ZERO_EXTEND: return lowerZERO_EXTEND(Op, DAG);
+ case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
+ case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
@@ -11870,6 +12270,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
// @LOCALMOD-BEGIN
case ISD::NACL_TP_TLS_OFFSET: return LowerNaClTpTlsOffset(Op, DAG);
case ISD::NACL_TP_TDB_OFFSET: return LowerNaClTpTdbOffset(Op, DAG);
@@ -11928,6 +12329,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
DebugLoc dl = N->getDebugLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
@@ -11977,6 +12379,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_ROUND: {
+ if (!TLI.isTypeLegal(N->getOperand(0).getValueType()))
+ return;
SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
Results.push_back(V);
return;
@@ -12144,10 +12548,15 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PSIGN: return "X86ISD::PSIGN";
case X86ISD::BLENDV: return "X86ISD::BLENDV";
case X86ISD::BLENDI: return "X86ISD::BLENDI";
+ case X86ISD::SUBUS: return "X86ISD::SUBUS";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
+ case X86ISD::UMAX: return "X86ISD::UMAX";
+ case X86ISD::UMIN: return "X86ISD::UMIN";
+ case X86ISD::SMAX: return "X86ISD::SMAX";
+ case X86ISD::SMIN: return "X86ISD::SMIN";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FMAXC: return "X86ISD::FMAXC";
@@ -12202,7 +12611,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
- case X86ISD::ANDN: return "X86ISD::ANDN";
case X86ISD::BLSI: return "X86ISD::BLSI";
case X86ISD::BLSMSK: return "X86ISD::BLSMSK";
case X86ISD::BLSR: return "X86ISD::BLSR";
@@ -12305,24 +12713,21 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
return true;
}
-
bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
+ return NumBits1 > NumBits2;
}
bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
- return Imm == (int32_t)Imm;
+ return isInt<32>(Imm);
}
bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
// Can also use sub to handle negated immediates.
- return Imm == (int32_t)Imm;
+ return isInt<32>(Imm);
}
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
@@ -12330,9 +12735,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
+ return NumBits1 > NumBits2;
}
bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
@@ -14544,127 +14947,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
}
-
/// PerformTruncateCombine - Converts truncate operation to
/// a sequence of vector shuffle operations.
/// It is possible when we truncate 256-bit vector to 128-bit vector
static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
- if (!DCI.isBeforeLegalizeOps())
- return SDValue();
-
- if (!Subtarget->hasFp256())
- return SDValue();
-
- EVT VT = N->getValueType(0);
- SDValue Op = N->getOperand(0);
- EVT OpVT = Op.getValueType();
- DebugLoc dl = N->getDebugLoc();
-
- if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) {
-
- if (Subtarget->hasInt256()) {
- // AVX2: v4i64 -> v4i32
-
- // VPERMD
- static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
-
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v8i32, Op);
- Op = DAG.getVectorShuffle(MVT::v8i32, dl, Op, DAG.getUNDEF(MVT::v8i32),
- ShufMask);
-
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
- DAG.getIntPtrConstant(0));
- }
-
- // AVX: v4i64 -> v4i32
- SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
- DAG.getIntPtrConstant(0));
-
- SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
- DAG.getIntPtrConstant(2));
-
- OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
-
- // PSHUFD
- static const int ShufMask1[] = {0, 2, 0, 0};
-
- SDValue Undef = DAG.getUNDEF(VT);
- OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1);
- OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1);
-
- // MOVLHPS
- static const int ShufMask2[] = {0, 1, 4, 5};
-
- return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2);
- }
-
- if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) {
-
- if (Subtarget->hasInt256()) {
- // AVX2: v8i32 -> v8i16
-
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v32i8, Op);
-
- // PSHUFB
- SmallVector<SDValue,32> pshufbMask;
- for (unsigned i = 0; i < 2; ++i) {
- pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
- for (unsigned j = 0; j < 8; ++j)
- pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
- }
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v32i8,
- &pshufbMask[0], 32);
- Op = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, Op, BV);
-
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i64, Op);
-
- static const int ShufMask[] = {0, 2, -1, -1};
- Op = DAG.getVectorShuffle(MVT::v4i64, dl, Op, DAG.getUNDEF(MVT::v4i64),
- &ShufMask[0]);
-
- Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
- DAG.getIntPtrConstant(0));
-
- return DAG.getNode(ISD::BITCAST, dl, VT, Op);
- }
-
- SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
- DAG.getIntPtrConstant(0));
-
- SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
- DAG.getIntPtrConstant(4));
-
- OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi);
-
- // PSHUFB
- static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
- -1, -1, -1, -1, -1, -1, -1, -1};
-
- SDValue Undef = DAG.getUNDEF(MVT::v16i8);
- OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1);
- OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1);
-
- OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
-
- // MOVLHPS
- static const int ShufMask2[] = {0, 1, 4, 5};
-
- SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2);
- return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res);
- }
-
return SDValue();
}
@@ -14859,6 +15147,76 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
+static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ if (!VT.isVector())
+ return 0;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::v32i8:
+ case MVT::v16i16:
+ case MVT::v8i32:
+ if (!Subtarget->hasAVX2())
+ return 0;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ if (!Subtarget->hasSSE2())
+ return 0;
+ }
+
+ // SSE2 has only a small subset of the operations.
+ bool hasUnsigned = Subtarget->hasSSE41() ||
+ (Subtarget->hasSSE2() && VT == MVT::v16i8);
+ bool hasSigned = Subtarget->hasSSE41() ||
+ (Subtarget->hasSSE2() && VT == MVT::v8i16);
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ // Check for x CC y ? x : y.
+ if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ return hasUnsigned ? X86ISD::UMIN : 0;
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ return hasUnsigned ? X86ISD::UMAX : 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ return hasSigned ? X86ISD::SMIN : 0;
+ case ISD::SETGT:
+ case ISD::SETGE:
+ return hasSigned ? X86ISD::SMAX : 0;
+ }
+ // Check for x CC y ? y : x -- a min/max with reversed arms.
+ } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(0))) {
+ switch (CC) {
+ default: break;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ return hasUnsigned ? X86ISD::UMAX : 0;
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ return hasUnsigned ? X86ISD::UMIN : 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ return hasSigned ? X86ISD::SMAX : 0;
+ case ISD::SETGT:
+ case ISD::SETGE:
+ return hasSigned ? X86ISD::SMIN : 0;
+ }
+ }
+
+ return 0;
+}
+
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
/// nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
@@ -15139,6 +15497,71 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ // Match VSELECTs into subs with unsigned saturation.
+ if (!DCI.isBeforeLegalize() &&
+ N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ // psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
+ ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
+ (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ // Check if one of the arms of the VSELECT is a zero vector. If it's on the
+ // left side invert the predicate to simplify logic below.
+ SDValue Other;
+ if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
+ Other = RHS;
+ CC = ISD::getSetCCInverse(CC, true);
+ } else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
+ Other = LHS;
+ }
+
+ if (Other.getNode() && Other->getNumOperands() == 2 &&
+ DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
+ SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
+ SDValue CondRHS = Cond->getOperand(1);
+
+ // Look for a general sub with unsigned saturation first.
+ // x >= y ? x-y : 0 --> subus x, y
+ // x > y ? x-y : 0 --> subus x, y
+ if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
+ Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
+ return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
+
+ // If the RHS is a constant we have to reverse the const canonicalization.
+ // x > C-1 ? x+-C : 0 --> subus x, C
+ if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
+ isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
+ APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
+ if (CondRHS.getConstantOperandVal(0) == -A-1) {
+ SmallVector<SDValue, 32> V(VT.getVectorNumElements(),
+ DAG.getConstant(-A, VT.getScalarType()));
+ return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+ V.data(), V.size()));
+ }
+ }
+
+ // Another special case: If C was a sign bit, the sub has been
+ // canonicalized into a xor.
+ // FIXME: Would it be better to use ComputeMaskedBits to determine whether
+ // it's safe to decanonicalize the xor?
+ // x s< 0 ? x^C : 0 --> subus x, C
+ if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
+ isSplatVector(OpRHS.getNode())) {
+ APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
+ if (A.isSignBit())
+ return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
+ }
+ }
+ }
+
+ // Try to match a min/max vector operation.
+ if (!DCI.isBeforeLegalize() &&
+ N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC)
+ if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
+ return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
+
// If we know that this node is legal then we know that it is going to be
// matched by one of the SSE/AVX BLEND instructions. These instructions only
// depend on the highest bit in each word. Try to use SimplifyDemandedBits
@@ -15436,7 +15859,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-
/// PerformMulCombine - Optimize a single multiply with constant into two
/// in order to implement it with two cheaper instructions, e.g.
/// LEA + SHL, LEA + LEA.
@@ -15525,7 +15947,6 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
}
}
-
// Hardware support for vector shifts is sparse which makes us scalarize the
// vector operations in many cases. Also, on sandybridge ADD is faster than
// shl.
@@ -15669,7 +16090,6 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
}
}
-
// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
// where both setccs reference the same FP CMP, and rewrite for CMPEQSS
// and friends. Likewise for OR -> CMPNEQSS.
@@ -15778,9 +16198,92 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) {
return false;
}
+// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
+// register. In most cases we actually compare or select YMM-sized registers
+// and mixing the two types creates horrible code. This method optimizes
+// some of the transition sequences.
+static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (VT.getSizeInBits() != 256)
+ return SDValue();
+
+ assert((N->getOpcode() == ISD::ANY_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND ||
+ N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
+
+ SDValue Narrow = N->getOperand(0);
+ EVT NarrowVT = Narrow->getValueType(0);
+ if (NarrowVT.getSizeInBits() != 128)
+ return SDValue();
+
+ if (Narrow->getOpcode() != ISD::XOR &&
+ Narrow->getOpcode() != ISD::AND &&
+ Narrow->getOpcode() != ISD::OR)
+ return SDValue();
+
+ SDValue N0 = Narrow->getOperand(0);
+ SDValue N1 = Narrow->getOperand(1);
+ DebugLoc DL = Narrow->getDebugLoc();
+
+ // The Left side has to be a trunc.
+ if (N0.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+
+ // The type of the truncated inputs.
+ EVT WideVT = N0->getOperand(0)->getValueType(0);
+ if (WideVT != VT)
+ return SDValue();
+
+ // The right side has to be a 'trunc' or a constant vector.
+ bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
+ bool RHSConst = (isSplatVector(N1.getNode()) &&
+ isa<ConstantSDNode>(N1->getOperand(0)));
+ if (!RHSTrunc && !RHSConst)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT))
+ return SDValue();
+
+ // Set N0 and N1 to hold the inputs to the new wide operation.
+ N0 = N0->getOperand(0);
+ if (RHSConst) {
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
+ N1->getOperand(0));
+ SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, &C[0], C.size());
+ } else if (RHSTrunc) {
+ N1 = N1->getOperand(0);
+ }
+
+ // Generate the wide operation.
+ SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1);
+ unsigned Opcode = N->getOpcode();
+ switch (Opcode) {
+ case ISD::ANY_EXTEND:
+ return Op;
+ case ISD::ZERO_EXTEND: {
+ unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(InBits);
+ Mask = Mask.zext(VT.getScalarType().getSizeInBits());
+ return DAG.getNode(ISD::AND, DL, VT,
+ Op, DAG.getConstant(Mask, VT));
+ }
+ case ISD::SIGN_EXTEND:
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
+ Op, DAG.getValueType(NarrowVT));
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -15788,9 +16291,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
- EVT VT = N->getValueType(0);
-
- // Create ANDN, BLSI, and BLSR instructions
+ // Create BLSI, and BLSR instructions
// BLSI is X & (-X)
// BLSR is X & (X-1)
if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
@@ -15798,13 +16299,6 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
- // Check LHS for not
- if (N0.getOpcode() == ISD::XOR && isAllOnes(N0.getOperand(1)))
- return DAG.getNode(X86ISD::ANDN, DL, VT, N0.getOperand(0), N1);
- // Check RHS for not
- if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1)))
- return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0);
-
// Check LHS for neg
if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
isZero(N0.getOperand(0)))
@@ -15857,6 +16351,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -15864,8 +16359,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
- EVT VT = N->getValueType(0);
-
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -16051,6 +16544,7 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -16064,8 +16558,6 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
if (!Subtarget->hasBMI())
return SDValue();
- EVT VT = N->getValueType(0);
-
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
@@ -16100,11 +16592,14 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
ISD::LoadExtType Ext = Ld->getExtensionType();
// If this is a vector EXT Load then attempt to optimize it using a
- // shuffle. We need SSSE3 shuffles.
+ // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the
+ // expansion is still better than scalar code.
+ // We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll
+ // emit a shuffle and a arithmetic shift.
// TODO: It is possible to support ZExt by zeroing the undef values
// during the shuffle phase or after the shuffle.
- if (RegVT.isVector() && RegVT.isInteger() &&
- Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) {
+ if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() &&
+ (Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) {
assert(MemVT != RegVT && "Cannot extend to the same type");
assert(MemVT.isVector() && "Must load a vector from memory");
@@ -16113,6 +16608,9 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
unsigned MemSz = MemVT.getSizeInBits();
assert(RegSz > MemSz && "Register size must be greater than the mem size");
+ if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256())
+ return SDValue();
+
// All sizes must be a power of two.
if (!isPowerOf2_32(RegSz * MemSz * NumElems))
return SDValue();
@@ -16136,16 +16634,23 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
// Calculate the number of scalar loads that we need to perform
// in order to load our vector from memory.
unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
+ if (Ext == ISD::SEXTLOAD && NumLoads > 1)
+ return SDValue();
+
+ unsigned loadRegZize = RegSz;
+ if (Ext == ISD::SEXTLOAD && RegSz == 256)
+ loadRegZize /= 2;
// Represent our vector as a sequence of elements which are the
// largest scalar that we can load.
EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
- RegSz/SclrLoadTy.getSizeInBits());
+ loadRegZize/SclrLoadTy.getSizeInBits());
// Represent the data using the same element type that is stored in
// memory. In practice, we ''widen'' MemVT.
- EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- RegSz/MemVT.getScalarType().getSizeInBits());
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ loadRegZize/MemVT.getScalarType().getSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
"Invalid vector type");
@@ -16186,6 +16691,41 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
unsigned SizeRatio = RegSz/MemSz;
+ if (Ext == ISD::SEXTLOAD) {
+ // If we have SSE4.1 we can directly emit a VSEXT node.
+ if (Subtarget->hasSSE41()) {
+ SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
+ return DCI.CombineTo(N, Sext, TF, true);
+ }
+
+ // Otherwise we'll shuffle the small elements in the high bits of the
+ // larger type and perform an arithmetic shift. If the shift is not legal
+ // it's better to scalarize.
+ if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT))
+ return SDValue();
+
+ // Redistribute the loaded elements into the different locations.
+ SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i*SizeRatio + SizeRatio-1] = i;
+
+ SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(WideVecVT),
+ &ShuffleVec[0]);
+
+ Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+
+ // Build the arithmetic shift.
+ unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
+ MemVT.getVectorElementType().getSizeInBits();
+ SmallVector<SDValue, 8> C(NumElems,
+ DAG.getConstant(Amt, RegVT.getScalarType()));
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size());
+ Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV);
+
+ return DCI.CombineTo(N, Shuff, TF, true);
+ }
+
// Redistribute the loaded elements into the different locations.
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
@@ -16319,7 +16859,6 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
Chains.size());
}
-
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
@@ -16330,8 +16869,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->getFnAttributes().
- hasAttribute(Attributes::NoImplicitFloat);
+ bool NoImplicitFloatOps = F->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasSSE2();
if ((VT.isVector() ||
@@ -16625,7 +17164,6 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
N->getOperand(0), N->getOperand(1));
}
-
/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
@@ -16681,48 +17219,12 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT VT = N->getValueType(0);
- SDValue Op = N->getOperand(0);
- EVT OpVT = Op.getValueType();
- DebugLoc dl = N->getDebugLoc();
-
- if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
- (VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
-
- if (Subtarget->hasInt256())
- return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, Op);
-
- // Optimize vectors in AVX mode
- // Sign extend v8i16 to v8i32 and
- // v4i32 to v4i64
- //
- // Divide input vector into two parts
- // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
- // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
- // concat the vectors to original VT
-
- unsigned NumElems = OpVT.getVectorNumElements();
- SDValue Undef = DAG.getUNDEF(OpVT);
-
- SmallVector<int,8> ShufMask1(NumElems, -1);
- for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask1[i] = i;
-
- SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]);
-
- SmallVector<int,8> ShufMask2(NumElems, -1);
- for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask2[i] = i + NumElems/2;
-
- SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]);
-
- EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
- VT.getVectorNumElements()/2);
-
- OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
- OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
-
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+ if (VT.isVector() && VT.getSizeInBits() == 256) {
+ SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
}
+
return SDValue();
}
@@ -16776,58 +17278,26 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
DebugLoc dl = N->getDebugLoc();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- EVT OpVT = N0.getValueType();
if (N0.getOpcode() == ISD::AND &&
N0.hasOneUse() &&
N0.getOperand(0).hasOneUse()) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() != X86ISD::SETCC_CARRY)
- return SDValue();
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!C || C->getZExtValue() != 1)
- return SDValue();
- return DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
- N00.getOperand(0), N00.getOperand(1)),
- DAG.getConstant(1, VT));
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 1)
+ return SDValue();
+ return DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
+ N00.getOperand(0), N00.getOperand(1)),
+ DAG.getConstant(1, VT));
+ }
}
- // Optimize vectors in AVX mode:
- //
- // v8i16 -> v8i32
- // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
- // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
- // Concat upper and lower parts.
- //
- // v4i32 -> v4i64
- // Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
- // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
- // Concat upper and lower parts.
- //
- if (!DCI.isBeforeLegalizeOps())
- return SDValue();
-
- if (!Subtarget->hasFp256())
- return SDValue();
-
- if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
- ((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) {
-
- if (Subtarget->hasInt256())
- return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0);
-
- SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
- SDValue OpLo = getUnpackl(DAG, dl, OpVT, N0, ZeroVec);
- SDValue OpHi = getUnpackh(DAG, dl, OpVT, N0, ZeroVec);
-
- EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- VT.getVectorNumElements()/2);
-
- OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
- OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
-
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+ if (VT.isVector() && VT.getSizeInBits() == 256) {
+ SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
}
return SDValue();
@@ -17363,8 +17833,6 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
return false;
}
-
-
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
X86TargetLowering::ConstraintType
@@ -17441,13 +17909,13 @@ TargetLowering::ConstraintWeight
case 'f':
case 't':
case 'u':
- if (type->isFloatingPointTy())
- weight = CW_SpecificReg;
- break;
+ if (type->isFloatingPointTy())
+ weight = CW_SpecificReg;
+ break;
case 'y':
- if (type->isX86_MMXTy() && Subtarget->hasMMX())
- weight = CW_SpecificReg;
- break;
+ if (type->isX86_MMXTy() && Subtarget->hasMMX())
+ weight = CW_SpecificReg;
+ break;
case 'x':
case 'Y':
if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
@@ -17887,217 +18355,3 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return Res;
}
-
-//===----------------------------------------------------------------------===//
-//
-// X86 cost model.
-//
-//===----------------------------------------------------------------------===//
-
-struct X86CostTblEntry {
- int ISD;
- MVT Type;
- unsigned Cost;
-};
-
-static int
-FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) {
- for (unsigned int i = 0; i < len; ++i)
- if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty)
- return i;
-
- // Could not find an entry.
- return -1;
-}
-
-struct X86TypeConversionCostTblEntry {
- int ISD;
- MVT Dst;
- MVT Src;
- unsigned Cost;
-};
-
-static int
-FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
- int ISD, MVT Dst, MVT Src) {
- for (unsigned int i = 0; i < len; ++i)
- if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst)
- return i;
-
- // Could not find an entry.
- return -1;
-}
-
-ScalarTargetTransformInfo::PopcntHwSupport
-X86ScalarTargetTransformImpl::getPopcntHwSupport(unsigned TyWidth) const {
- assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
- const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
-
- // TODO: Currently the __builtin_popcount() implementation using SSE3
- // instructions is inefficient. Once the problem is fixed, we should
- // call ST.hasSSE3() instead of ST.hasSSE4().
- return ST.hasSSE41() ? Fast : None;
-}
-
-unsigned
-X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
- Type *Ty) const {
- // Legalize the type.
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty);
-
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
-
- static const X86CostTblEntry AVX1CostTable[] = {
- // We don't have to scalarize unsupported ops. We can issue two half-sized
- // operations and we only need to extract the upper YMM half.
- // Two ops + 1 extract + 1 insert = 4.
- { ISD::MUL, MVT::v8i32, 4 },
- { ISD::SUB, MVT::v8i32, 4 },
- { ISD::ADD, MVT::v8i32, 4 },
- { ISD::MUL, MVT::v4i64, 4 },
- { ISD::SUB, MVT::v4i64, 4 },
- { ISD::ADD, MVT::v4i64, 4 },
- };
-
- // Look for AVX1 lowering tricks.
- if (ST.hasAVX()) {
- int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
- LT.second);
- if (Idx != -1)
- return LT.first * AVX1CostTable[Idx].Cost;
- }
- // Fallback to the default implementation.
- return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
-}
-
-unsigned
-X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const {
- assert(Val->isVectorTy() && "This must be a vector type");
-
- if (Index != -1U) {
- // Legalize the type.
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Val);
-
- // This type is legalized to a scalar type.
- if (!LT.second.isVector())
- return 0;
-
- // The type may be split. Normalize the index to the new type.
- unsigned Width = LT.second.getVectorNumElements();
- Index = Index % Width;
-
- // Floating point scalars are already located in index #0.
- if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
- return 0;
- }
-
- return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index);
-}
-
-unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode,
- Type *ValTy,
- Type *CondTy) const {
- // Legalize the type.
- std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy);
-
- MVT MTy = LT.second;
-
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- const X86Subtarget &ST =
- TLI->getTargetMachine().getSubtarget<X86Subtarget>();
-
- static const X86CostTblEntry SSE42CostTbl[] = {
- { ISD::SETCC, MVT::v2f64, 1 },
- { ISD::SETCC, MVT::v4f32, 1 },
- { ISD::SETCC, MVT::v2i64, 1 },
- { ISD::SETCC, MVT::v4i32, 1 },
- { ISD::SETCC, MVT::v8i16, 1 },
- { ISD::SETCC, MVT::v16i8, 1 },
- };
-
- static const X86CostTblEntry AVX1CostTbl[] = {
- { ISD::SETCC, MVT::v4f64, 1 },
- { ISD::SETCC, MVT::v8f32, 1 },
- // AVX1 does not support 8-wide integer compare.
- { ISD::SETCC, MVT::v4i64, 4 },
- { ISD::SETCC, MVT::v8i32, 4 },
- { ISD::SETCC, MVT::v16i16, 4 },
- { ISD::SETCC, MVT::v32i8, 4 },
- };
-
- static const X86CostTblEntry AVX2CostTbl[] = {
- { ISD::SETCC, MVT::v4i64, 1 },
- { ISD::SETCC, MVT::v8i32, 1 },
- { ISD::SETCC, MVT::v16i16, 1 },
- { ISD::SETCC, MVT::v32i8, 1 },
- };
-
- if (ST.hasSSE42()) {
- int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
- if (Idx != -1)
- return LT.first * SSE42CostTbl[Idx].Cost;
- }
-
- if (ST.hasAVX()) {
- int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX1CostTbl[Idx].Cost;
- }
-
- if (ST.hasAVX2()) {
- int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX2CostTbl[Idx].Cost;
- }
-
- return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy);
-}
-
-unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode,
- Type *Dst,
- Type *Src) const {
- int ISD = InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- EVT SrcTy = TLI->getValueType(Src);
- EVT DstTy = TLI->getValueType(Dst);
-
- if (!SrcTy.isSimple() || !DstTy.isSimple())
- return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
-
- const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
-
- static const X86TypeConversionCostTblEntry AVXConversionTbl[] = {
- { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
- { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
- { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
- { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
- { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
- { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
- { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
- { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
- { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
- { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
- { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
- };
-
- if (ST.hasAVX()) {
- int Idx = FindInConvertTable(AVXConversionTbl,
- array_lengthof(AVXConversionTbl),
- ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return AVXConversionTbl[Idx].Cost;
- }
-
- return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
-}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index cad471df4a..5cd9623dfb 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
namespace X86ISD {
@@ -182,6 +181,9 @@ namespace llvm {
/// BLENDI - Blend where the selector is an immediate.
BLENDI,
+ // SUBUS - Integer sub with unsigned saturation.
+ SUBUS,
+
/// HADD - Integer horizontal add.
HADD,
@@ -194,6 +196,12 @@ namespace llvm {
/// FHSUB - Floating point horizontal sub.
FHSUB,
+ /// UMAX, UMIN - Unsigned integer max and min.
+ UMAX, UMIN,
+
+ /// SMAX, SMIN - Signed integer max and min.
+ SMAX, SMIN,
+
/// FMAX, FMIN - Floating point max and min.
///
FMAX, FMIN,
@@ -280,8 +288,6 @@ namespace llvm {
ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
- ANDN, // ANDN - Bitwise AND NOT with FLAGS results.
-
BLSI, // BLSI - Extract lowest set isolated bit
BLSMSK, // BLSMSK - Get mask up to lowest set bit
BLSR, // BLSR - Reset lowest set bit
@@ -505,18 +511,25 @@ namespace llvm {
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
- /// probably because the source does not need to be loaded. If
- /// 'IsZeroVal' is true, that means it's safe to return a
- /// non-scalar-integer type, e.g. empty string source, constant, or loaded
- /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
- /// constant so it does not need to be loaded.
+ /// probably because the source does not need to be loaded. If 'IsMemset' is
+ /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+ /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+ /// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
virtual EVT
- getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsZeroVal, bool MemcpyStrSrc,
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
+ /// isSafeMemOpType - Returns true if it's safe to use load / store of the
+ /// specified type to expand memcpy / memset inline. This is mostly true
+ /// for all types except for some special cases. For example, on X86
+ /// targets without SSE2 f64 load / store are done with fldl / fstpl which
+ /// also does type conversion. Note the specified type doesn't have to be
+ /// legal as the hook is used before type legalization.
+ virtual bool isSafeMemOpType(MVT VT) const;
+
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type. Returns whether it
/// is "fast" by reference in the second argument.
@@ -719,7 +732,7 @@ namespace llvm {
protected:
std::pair<const TargetRegisterClass*, uint8_t>
- findRepresentativeClass(EVT VT) const;
+ findRepresentativeClass(MVT VT) const;
private:
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
@@ -813,7 +826,9 @@ namespace llvm {
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
@@ -841,6 +856,7 @@ namespace llvm {
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
@@ -881,9 +897,8 @@ namespace llvm {
virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
- virtual EVT
- getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
- ISD::NodeType ExtendKind) const;
+ virtual MVT
+ getTypeForExtArgOrReturn(MVT VT, ISD::NodeType ExtendKind) const;
virtual bool
CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
@@ -958,31 +973,6 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
}
-
- class X86ScalarTargetTransformImpl : public ScalarTargetTransformImpl {
- public:
- explicit X86ScalarTargetTransformImpl(const TargetLowering *TL) :
- ScalarTargetTransformImpl(TL) {};
-
- virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const;
- };
-
- class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
- public:
- explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
- VectorTargetTransformImpl(TL) {}
-
- virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
-
- virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const;
-
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const;
-
- virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const;
- };
}
#endif // X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index f580b76d95..4955e76c87 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -58,7 +58,7 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
- "mul{w}\t$src",
+ "mul{w}\t$src",
[], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
@@ -159,7 +159,7 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
(X86smul_flag GR16:$src1, (load addr:$src2)))],
IIC_IMUL16_RM>,
TB, OpSize;
-def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
@@ -183,8 +183,8 @@ let Defs = [EFLAGS] in {
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, imm:$src2))],
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, imm:$src2))],
IIC_IMUL16_RRI>, OpSize;
def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
@@ -267,6 +267,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
// unsigned division/remainder
+let hasSideEffects = 1 in { // so that we don't speculatively execute
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"div{b}\t$src", [], IIC_DIV8_REG>;
@@ -320,12 +321,13 @@ let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
"idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
-def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
"idiv{l}\t$src", [], IIC_IDIV32>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
"idiv{q}\t$src", [], IIC_IDIV64>;
}
+} // hasSideEffects = 0
//===----------------------------------------------------------------------===//
// Two address Instructions.
@@ -413,11 +415,11 @@ def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
IIC_UNARY_REG>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>,
OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
IIC_UNARY_REG>,
@@ -431,22 +433,22 @@ def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
// In 64-bit mode, single byte INC and DEC cannot be encoded.
let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
// Can transform into LEA.
-def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))],
IIC_UNARY_REG>,
OpSize, Requires<[In64BitMode]>;
-def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
IIC_UNARY_REG>,
Requires<[In64BitMode]>;
-def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
IIC_UNARY_REG>,
OpSize, Requires<[In64BitMode]>;
-def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
IIC_UNARY_REG>,
@@ -470,7 +472,7 @@ let CodeSize = 2 in {
def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
[(store (add (loadi64 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
-
+
// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
// how to unfold them.
// FIXME: What is this for??
@@ -499,12 +501,12 @@ def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
[(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))],
IIC_UNARY_REG>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
IIC_UNARY_REG>,
OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
IIC_UNARY_REG>,
@@ -545,57 +547,57 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> {
/// VT - This is the value type itself.
ValueType VT = vt;
-
+
/// InstrSuffix - This is the suffix used on instructions with this type. For
/// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
string InstrSuffix = instrsuffix;
-
+
/// RegClass - This is the register class associated with this type. For
/// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
RegisterClass RegClass = regclass;
-
+
/// LoadNode - This is the load node associated with this type. For
/// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
PatFrag LoadNode = loadnode;
-
+
/// MemOperand - This is the memory operand associated with this type. For
/// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
X86MemOperand MemOperand = memoperand;
-
+
/// ImmEncoding - This is the encoding of an immediate of this type. For
/// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32
/// since the immediate fields of i64 instructions is a 32-bit sign extended
/// value.
ImmType ImmEncoding = immkind;
-
+
/// ImmOperand - This is the operand kind of an immediate of this type. For
/// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 ->
/// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
/// extended value.
Operand ImmOperand = immoperand;
-
+
/// ImmOperator - This is the operator that should be used to match an
/// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
SDPatternOperator ImmOperator = immoperator;
-
+
/// Imm8Operand - This is the operand kind to use for an imm8 of this type.
/// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is
/// only used for instructions that have a sign-extended imm8 field form.
Operand Imm8Operand = imm8operand;
-
+
/// Imm8Operator - This is the operator that should be used to match an 8-bit
/// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
SDPatternOperator Imm8Operator = imm8operator;
-
+
/// HasOddOpcode - This bit is true if the instruction should have an odd (as
/// opposed to even) opcode. Operations on i8 are usually even, operations on
/// other datatypes are odd.
bit HasOddOpcode = hasOddOpcode;
-
+
/// HasOpSizePrefix - This bit is set to true if the instruction should have
/// the 0x66 operand size prefix. This is set for i16 types.
bit HasOpSizePrefix = hasOpSizePrefix;
-
+
/// HasREX_WPrefix - This bit is set to true if the instruction should have
/// the 0x40 REX prefix. This is set for i64 types.
bit HasREX_WPrefix = hasREX_WPrefix;
@@ -625,12 +627,12 @@ def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
/// or 1 (for i16,i32,i64 operations).
-class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
string mnemonic, string args, list<dag> pattern,
InstrItinClass itin = IIC_BIN_NONMEM>
: I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
- f, outs, ins,
+ f, outs, ins,
!strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern,
itin> {
@@ -691,6 +693,7 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
}
// BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding).
@@ -700,6 +703,7 @@ class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
}
// BinOpRM - Instructions like "add reg, reg, [mem]".
@@ -765,13 +769,13 @@ class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
: BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
- [(set typeinfo.RegClass:$dst, EFLAGS,
+ [(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
: BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
- [(set typeinfo.RegClass:$dst, EFLAGS,
+ [(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
EFLAGS))]>;
@@ -790,7 +794,7 @@ class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst,
(opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
-
+
// BinOpRI8_F - Instructions like "cmp reg, imm8".
class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
@@ -853,14 +857,14 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
// BinOpMI_RMW - Instructions like "add [mem], imm".
class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
- : BinOpMI<mnemonic, typeinfo, f,
+ : BinOpMI<mnemonic, typeinfo, f,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src), addr:$dst),
(implicit EFLAGS)]>;
// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
- : BinOpMI<mnemonic, typeinfo, f,
+ : BinOpMI<mnemonic, typeinfo, f,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
(implicit EFLAGS)]>;
@@ -868,7 +872,7 @@ class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
// BinOpMI_F - Instructions like "cmp [mem], imm".
class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
- : BinOpMI<mnemonic, typeinfo, f,
+ : BinOpMI<mnemonic, typeinfo, f,
[(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src))],
opcode>;
@@ -914,6 +918,7 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
let ImmT = typeinfo.ImmEncoding;
let Uses = [areg];
let Defs = [areg];
+ let hasSideEffects = 0;
}
/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
@@ -929,61 +934,61 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
let Constraints = "$src1 = $dst" in {
let isCommutable = CommutableRR,
isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def #NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
- def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
- def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
- def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+ def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+ def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+ def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
} // isCommutable
- def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
- def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
- def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
- def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
- def #NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
- def #NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
- def #NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
- def #NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+ def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+ def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
+ def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
+ def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
- def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
- def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
-
- def #NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
- def #NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
- def #NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
- def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
+ def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+
+ def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+ def NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
}
} // Constraints = "$src1 = $dst"
- def #NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def #NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
- def #NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|AL, $src}">;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|AX, $src}">;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|EAX, $src}">;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|RAX, $src}">;
- }
+ def NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
}
/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
@@ -1000,61 +1005,61 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
let Constraints = "$src1 = $dst" in {
let isCommutable = CommutableRR,
isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def #NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
} // isCommutable
- def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
- def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
- def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
- def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
- def #NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
- def #NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
- def #NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
- def #NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+ def NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+ def NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+ def NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
- def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
- def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
-
- def #NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
- def #NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
- def #NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
- def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ def NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
}
} // Constraints = "$src1 = $dst"
- def #NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def #NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
- def #NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|AL, $src}">;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|AX, $src}">;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|EAX, $src}">;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|RAX, $src}">;
- }
+ def NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
}
/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
@@ -1068,60 +1073,60 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
let Defs = [EFLAGS] in {
let isCommutable = CommutableRR,
isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
- def #NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
} // isCommutable
- def #NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
- def #NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
- def #NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
- def #NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
- def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
- def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
- def #NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
- def #NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+ def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
+ def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
+ def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
- def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
- def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
-
- def #NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
- def #NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
- def #NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
- def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
}
- def #NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
- def #NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
- def #NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
- def #NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def #NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
- def #NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
- def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
- def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
-
- def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|AL, $src}">;
- def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|AX, $src}">;
- def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|EAX, $src}">;
- def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|RAX, $src}">;
- }
+ def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
+ def NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|AL, $src}">;
+ def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|AX, $src}">;
+ def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|EAX, $src}">;
+ def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|RAX, $src}">;
+ }
}
@@ -1181,7 +1186,7 @@ let isCompare = 1, Defs = [EFLAGS] in {
def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
-
+
def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL,
"{$src, %al|AL, $src}">;
def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX,
@@ -1205,12 +1210,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
PatFrag ld_frag> {
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))],
+ [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))],
IIC_BIN_NONMEM>;
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
- (X86andn_flag RC:$src1, (ld_frag addr:$src2)))], IIC_BIN_MEM>;
+ (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
@@ -1218,6 +1223,17 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W;
}
+let Predicates = [HasBMI] in {
+ def : Pat<(and (not GR32:$src1), GR32:$src2),
+ (ANDN32rr GR32:$src1, GR32:$src2)>;
+ def : Pat<(and (not GR64:$src1), GR64:$src2),
+ (ANDN64rr GR64:$src1, GR64:$src2)>;
+ def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
+ (ANDN32rm GR32:$src1, addr:$src2)>;
+ def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
+ (ANDN64rm GR64:$src1, addr:$src2)>;
+}
+
//===----------------------------------------------------------------------===//
// MULX Instruction
//
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index adeaf5410d..8f2d0a1aae 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -17,19 +17,19 @@
multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
isCommutable = 1 in {
- def #NAME#16rr
+ def NAME#16rr
: I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
[(set GR16:$dst,
(X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))],
IIC_CMOV16_RR>,TB,OpSize;
- def #NAME#32rr
+ def NAME#32rr
: I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
[(set GR32:$dst,
(X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))],
IIC_CMOV32_RR>, TB;
- def #NAME#64rr
+ def NAME#64rr
:RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
[(set GR64:$dst,
@@ -38,18 +38,18 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
}
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
- def #NAME#16rm
+ def NAME#16rm
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
[(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
CondNode, EFLAGS))], IIC_CMOV16_RM>,
TB, OpSize;
- def #NAME#32rm
+ def NAME#32rm
: I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
[(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
CondNode, EFLAGS))], IIC_CMOV32_RM>, TB;
- def #NAME#64rm
+ def NAME#64rm
:RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
[(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index a24ddf6f99..a2d34d6086 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -523,18 +523,18 @@ def CMOV_RFP80 : I<0, Pseudo,
multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
- def #NAME#8 : I<0, Pseudo, (outs GR8:$dst),
- (ins i8mem:$ptr, GR8:$val),
- !strconcat(mnemonic, "8 PSEUDO!"), []>;
- def #NAME#16 : I<0, Pseudo,(outs GR16:$dst),
- (ins i16mem:$ptr, GR16:$val),
- !strconcat(mnemonic, "16 PSEUDO!"), []>;
- def #NAME#32 : I<0, Pseudo, (outs GR32:$dst),
- (ins i32mem:$ptr, GR32:$val),
- !strconcat(mnemonic, "32 PSEUDO!"), []>;
- def #NAME#64 : I<0, Pseudo, (outs GR64:$dst),
- (ins i64mem:$ptr, GR64:$val),
- !strconcat(mnemonic, "64 PSEUDO!"), []>;
+ def NAME#8 : I<0, Pseudo, (outs GR8:$dst),
+ (ins i8mem:$ptr, GR8:$val),
+ !strconcat(mnemonic, "8 PSEUDO!"), []>;
+ def NAME#16 : I<0, Pseudo,(outs GR16:$dst),
+ (ins i16mem:$ptr, GR16:$val),
+ !strconcat(mnemonic, "16 PSEUDO!"), []>;
+ def NAME#32 : I<0, Pseudo, (outs GR32:$dst),
+ (ins i32mem:$ptr, GR32:$val),
+ !strconcat(mnemonic, "32 PSEUDO!"), []>;
+ def NAME#64 : I<0, Pseudo, (outs GR64:$dst),
+ (ins i64mem:$ptr, GR64:$val),
+ !strconcat(mnemonic, "64 PSEUDO!"), []>;
}
}
@@ -569,10 +569,10 @@ defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
- let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in
- def #NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- !strconcat(mnemonic, "6432 PSEUDO!"), []>;
+ let usesCustomInserter = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
+ def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ !strconcat(mnemonic, "6432 PSEUDO!"), []>;
}
defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">;
@@ -614,77 +614,77 @@ multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
Format ImmMod, string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
-def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
- RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
- MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- !strconcat(mnemonic, "{b}\t",
+def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
+ MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ !strconcat(mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_NONMEM>, LOCK;
+def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ !strconcat(mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_NONMEM>, OpSize, LOCK;
+def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ !strconcat(mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
-def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
- RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
- MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- !strconcat(mnemonic, "{w}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_NONMEM>, OpSize, LOCK;
-def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
- MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- !strconcat(mnemonic, "{l}\t",
+ MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_NONMEM>, LOCK;
-def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
- RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
- MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- !strconcat(mnemonic, "{q}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_NONMEM>, LOCK;
-
-def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
- ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
- ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
- !strconcat(mnemonic, "{b}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
-
-def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
- ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
- ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
- !strconcat(mnemonic, "{w}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, OpSize, LOCK;
-
-def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
- ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
- ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
- !strconcat(mnemonic, "{l}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
-def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
- ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
- ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
- !strconcat(mnemonic, "{q}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
-
-def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
- ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
- ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
- !strconcat(mnemonic, "{w}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, OpSize, LOCK;
-def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
+ ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
+ !strconcat(mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+
+def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
+ !strconcat(mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, OpSize, LOCK;
+
+def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
+ !strconcat(mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+
+def NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
+ !strconcat(mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+
+def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
+ !strconcat(mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, OpSize, LOCK;
+def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
+ !strconcat(mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [], IIC_ALU_MEM>, LOCK;
+def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
- ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
- !strconcat(mnemonic, "{l}\t",
+ ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
+ !strconcat(mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
[], IIC_ALU_MEM>, LOCK;
-def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
- ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
- ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
- !strconcat(mnemonic, "{q}\t",
- "{$src2, $dst|$dst, $src2}"),
- [], IIC_ALU_MEM>, LOCK;
}
@@ -701,18 +701,18 @@ multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
-def #NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
- !strconcat(mnemonic, "{b}\t$dst"),
- [], IIC_UNARY_MEM>, LOCK;
-def #NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
- !strconcat(mnemonic, "{w}\t$dst"),
- [], IIC_UNARY_MEM>, OpSize, LOCK;
-def #NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
- !strconcat(mnemonic, "{l}\t$dst"),
+def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
+ !strconcat(mnemonic, "{b}\t$dst"),
+ [], IIC_UNARY_MEM>, LOCK;
+def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
+ !strconcat(mnemonic, "{w}\t$dst"),
+ [], IIC_UNARY_MEM>, OpSize, LOCK;
+def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
+ !strconcat(mnemonic, "{l}\t$dst"),
+ [], IIC_UNARY_MEM>, LOCK;
+def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
+ !strconcat(mnemonic, "{q}\t$dst"),
[], IIC_UNARY_MEM>, LOCK;
-def #NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
- !strconcat(mnemonic, "{q}\t$dst"),
- [], IIC_UNARY_MEM>, LOCK;
}
}
@@ -724,9 +724,9 @@ multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
SDPatternOperator frag, X86MemOperand x86memop,
InstrItinClass itin> {
let isCodeGenOnly = 1 in {
- def #NAME# : I<Opc, Form, (outs), (ins x86memop:$ptr),
- !strconcat(mnemonic, "\t$ptr"),
- [(frag addr:$ptr)], itin>, TB, LOCK;
+ def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
+ !strconcat(mnemonic, "\t$ptr"),
+ [(frag addr:$ptr)], itin>, TB, LOCK;
}
}
@@ -735,21 +735,21 @@ multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
InstrItinClass itin8, InstrItinClass itin> {
let isCodeGenOnly = 1 in {
let Defs = [AL, EFLAGS], Uses = [AL] in
- def #NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
- !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
- [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
+ def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
+ !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
let Defs = [AX, EFLAGS], Uses = [AX] in
- def #NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
- !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
- [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
+ def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
+ !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
let Defs = [EAX, EFLAGS], Uses = [EAX] in
- def #NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
- !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
- [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
+ def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
+ !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
let Defs = [RAX, EFLAGS], Uses = [RAX] in
- def #NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
- !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
- [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
+ def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
+ !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
+ [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
}
}
@@ -774,33 +774,33 @@ multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
string frag,
InstrItinClass itin8, InstrItinClass itin> {
let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
- def #NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
- (ins GR8:$val, i8mem:$ptr),
- !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
- [(set GR8:$dst,
- (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
- itin8>;
- def #NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$val, i16mem:$ptr),
- !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR16:$dst,
- (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
- itin>, OpSize;
- def #NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$val, i32mem:$ptr),
- !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+ def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
+ (ins GR8:$val, i8mem:$ptr),
+ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+ [(set GR8:$dst,
+ (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+ itin8>;
+ def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$val, i16mem:$ptr),
+ !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR16:$dst,
+ (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+ itin>, OpSize;
+ def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$val, i32mem:$ptr),
+ !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR32:$dst,
+ (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ itin>;
+ def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$val, i64mem:$ptr),
+ !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
[(set
- GR32:$dst,
- (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ GR64:$dst,
+ (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
itin>;
- def #NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$val, i64mem:$ptr),
- !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR64:$dst,
- (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
- itin>;
}
}
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index d360a73b34..7759a8a2da 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -60,14 +60,14 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
PatFrag MemFrag128, PatFrag MemFrag256,
SDNode Op, ValueType OpTy128, ValueType OpTy256> {
defm r213 : fma3p_rm<opc213,
- !strconcat(OpcodeStr, !strconcat("213", PackTy)),
+ !strconcat(OpcodeStr, "213", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
let neverHasSideEffects = 1 in {
defm r132 : fma3p_rm<opc132,
- !strconcat(OpcodeStr, !strconcat("132", PackTy)),
+ !strconcat(OpcodeStr, "132", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256>;
defm r231 : fma3p_rm<opc231,
- !strconcat(OpcodeStr, !strconcat("231", PackTy)),
+ !strconcat(OpcodeStr, "231", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256>;
} // neverHasSideEffects = 1
}
@@ -160,15 +160,15 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
ComplexPattern mem_cpat> {
let neverHasSideEffects = 1 in {
- defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)),
+ defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
x86memop, RC, OpVT, mem_frag>;
- defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)),
+ defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
x86memop, RC, OpVT, mem_frag>;
}
-defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
x86memop, RC, OpVT, mem_frag, OpNode>,
- fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+ fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
memop, mem_cpat, Int, RC>;
}
@@ -220,7 +220,7 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst,
(OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>;
// For disassembler
-let isCodeGenOnly = 1 in
+let isCodeGenOnly = 1, hasSideEffects = 0 in
def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
@@ -294,7 +294,7 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR256:$dst, (OpNode VR256:$src1,
(ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L;
// For disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 09ab995166..7025e93fa1 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -27,6 +27,11 @@ def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>]>;
+def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>;
+def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>;
+def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>;
+def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>;
+
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
@@ -128,6 +133,7 @@ def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
+def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 72d8b5c7fd..0e833ffb73 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -24,8 +24,8 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
@@ -302,7 +302,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD },
{ X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
{ X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
- { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE },
{ X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
@@ -360,7 +360,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD },
{ X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
// AVX 128-bit versions of foldable instructions
- { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE },
{ X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
@@ -472,9 +472,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::RSQRTSSr, X86::RSQRTSSm, 0 },
{ X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
{ X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 },
- { X86::SQRTPDr_Int, X86::SQRTPDm_Int, TB_ALIGN_16 },
{ X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 },
- { X86::SQRTPSr_Int, X86::SQRTPSm_Int, TB_ALIGN_16 },
{ X86::SQRTSDr, X86::SQRTSDm, 0 },
{ X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
{ X86::SQRTSSr, X86::SQRTSSm, 0 },
@@ -515,27 +513,25 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
{ X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 },
{ X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 },
- { X86::VMOVUPDrr, X86::VMOVUPDrm, TB_ALIGN_16 },
+ { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
{ X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
{ X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 },
{ X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
{ X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
- { X86::VPABSBrr128, X86::VPABSBrm128, TB_ALIGN_16 },
- { X86::VPABSDrr128, X86::VPABSDrm128, TB_ALIGN_16 },
- { X86::VPABSWrr128, X86::VPABSWrm128, TB_ALIGN_16 },
- { X86::VPERMILPDri, X86::VPERMILPDmi, TB_ALIGN_16 },
- { X86::VPERMILPSri, X86::VPERMILPSmi, TB_ALIGN_16 },
- { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 },
- { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 },
- { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 },
- { X86::VRCPPSr, X86::VRCPPSm, TB_ALIGN_16 },
- { X86::VRCPPSr_Int, X86::VRCPPSm_Int, TB_ALIGN_16 },
- { X86::VRSQRTPSr, X86::VRSQRTPSm, TB_ALIGN_16 },
- { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, TB_ALIGN_16 },
- { X86::VSQRTPDr, X86::VSQRTPDm, TB_ALIGN_16 },
- { X86::VSQRTPDr_Int, X86::VSQRTPDm_Int, TB_ALIGN_16 },
- { X86::VSQRTPSr, X86::VSQRTPSm, TB_ALIGN_16 },
- { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 },
+ { X86::VPABSBrr128, X86::VPABSBrm128, 0 },
+ { X86::VPABSDrr128, X86::VPABSDrm128, 0 },
+ { X86::VPABSWrr128, X86::VPABSWrm128, 0 },
+ { X86::VPERMILPDri, X86::VPERMILPDmi, 0 },
+ { X86::VPERMILPSri, X86::VPERMILPSmi, 0 },
+ { X86::VPSHUFDri, X86::VPSHUFDmi, 0 },
+ { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 },
+ { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 },
+ { X86::VRCPPSr, X86::VRCPPSm, 0 },
+ { X86::VRCPPSr_Int, X86::VRCPPSm_Int, 0 },
+ { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 },
+ { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, 0 },
+ { X86::VSQRTPDr, X86::VSQRTPDm, 0 },
+ { X86::VSQRTPSr, X86::VSQRTPSm, 0 },
{ X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
{ X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
{ X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
@@ -546,28 +542,41 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
{ X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
{ X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
- { X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 },
- { X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 },
+ { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
+ { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
// AVX2 foldable instructions
- { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 },
- { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 },
- { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_32 },
- { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_32 },
- { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_32 },
- { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_32 },
- { X86::VRCPPSYr, X86::VRCPPSYm, TB_ALIGN_32 },
- { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, TB_ALIGN_32 },
- { X86::VRSQRTPSYr, X86::VRSQRTPSYm, TB_ALIGN_32 },
- { X86::VRSQRTPSYr_Int, X86::VRSQRTPSYm_Int, TB_ALIGN_32 },
- { X86::VSQRTPDYr, X86::VSQRTPDYm, TB_ALIGN_32 },
- { X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 },
- { X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 },
- { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 },
+ { X86::VPABSBrr256, X86::VPABSBrm256, 0 },
+ { X86::VPABSDrr256, X86::VPABSDrm256, 0 },
+ { X86::VPABSWrr256, X86::VPABSWrm256, 0 },
+ { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
+ { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
+ { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
+ { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
+ { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 },
+ { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
+ { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
+ { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
{ X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
- // BMI/BMI2 foldable instructions
+ // BMI/BMI2/LZCNT/POPCNT foldable instructions
+ { X86::BEXTR32rr, X86::BEXTR32rm, 0 },
+ { X86::BEXTR64rr, X86::BEXTR64rm, 0 },
+ { X86::BLSI32rr, X86::BLSI32rm, 0 },
+ { X86::BLSI64rr, X86::BLSI64rm, 0 },
+ { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 },
+ { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 },
+ { X86::BLSR32rr, X86::BLSR32rm, 0 },
+ { X86::BLSR64rr, X86::BLSR64rm, 0 },
+ { X86::BZHI32rr, X86::BZHI32rm, 0 },
+ { X86::BZHI64rr, X86::BZHI64rm, 0 },
+ { X86::LZCNT16rr, X86::LZCNT16rm, 0 },
+ { X86::LZCNT32rr, X86::LZCNT32rm, 0 },
+ { X86::LZCNT64rr, X86::LZCNT64rm, 0 },
+ { X86::POPCNT16rr, X86::POPCNT16rm, 0 },
+ { X86::POPCNT32rr, X86::POPCNT32rm, 0 },
+ { X86::POPCNT64rr, X86::POPCNT64rm, 0 },
{ X86::RORX32ri, X86::RORX32mi, 0 },
{ X86::RORX64ri, X86::RORX64mi, 0 },
{ X86::SARX32rr, X86::SARX32rm, 0 },
@@ -576,6 +585,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::SHRX64rr, X86::SHRX64rm, 0 },
{ X86::SHLX32rr, X86::SHLX32rm, 0 },
{ X86::SHLX64rr, X86::SHLX64rm, 0 },
+ { X86::TZCNT16rr, X86::TZCNT16rm, 0 },
+ { X86::TZCNT32rr, X86::TZCNT32rm, 0 },
+ { X86::TZCNT64rr, X86::TZCNT64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -696,21 +708,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
- { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 },
{ X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
- { X86::MAXPSrr_Int, X86::MAXPSrm_Int, TB_ALIGN_16 },
{ X86::MAXSDrr, X86::MAXSDrm, 0 },
- { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
{ X86::MAXSSrr, X86::MAXSSrm, 0 },
- { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
{ X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
- { X86::MINPDrr_Int, X86::MINPDrm_Int, TB_ALIGN_16 },
{ X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
- { X86::MINPSrr_Int, X86::MINPSrm_Int, TB_ALIGN_16 },
{ X86::MINSDrr, X86::MINSDrm, 0 },
- { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
{ X86::MINSSrr, X86::MINSSrm, 0 },
- { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
{ X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
{ X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
{ X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
@@ -761,6 +765,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
{ X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
{ X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
+ { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
+ { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
+ { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
+ { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
+ { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
+ { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
+ { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
+ { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
{ X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
{ X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 },
{ X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
@@ -832,31 +844,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
{ X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
{ X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 },
- { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
+ { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
{ X86::VSQRTSSr, X86::VSQRTSSm, 0 },
- { X86::VADDPDrr, X86::VADDPDrm, TB_ALIGN_16 },
- { X86::VADDPSrr, X86::VADDPSrm, TB_ALIGN_16 },
+ { X86::VADDPDrr, X86::VADDPDrm, 0 },
+ { X86::VADDPSrr, X86::VADDPSrm, 0 },
{ X86::VADDSDrr, X86::VADDSDrm, 0 },
{ X86::VADDSSrr, X86::VADDSSrm, 0 },
- { X86::VADDSUBPDrr, X86::VADDSUBPDrm, TB_ALIGN_16 },
- { X86::VADDSUBPSrr, X86::VADDSUBPSrm, TB_ALIGN_16 },
- { X86::VANDNPDrr, X86::VANDNPDrm, TB_ALIGN_16 },
- { X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 },
- { X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 },
- { X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 },
- { X86::VBLENDPDrri, X86::VBLENDPDrmi, TB_ALIGN_16 },
- { X86::VBLENDPSrri, X86::VBLENDPSrmi, TB_ALIGN_16 },
- { X86::VBLENDVPDrr, X86::VBLENDVPDrm, TB_ALIGN_16 },
- { X86::VBLENDVPSrr, X86::VBLENDVPSrm, TB_ALIGN_16 },
- { X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 },
- { X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 },
+ { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 },
+ { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 },
+ { X86::VANDNPDrr, X86::VANDNPDrm, 0 },
+ { X86::VANDNPSrr, X86::VANDNPSrm, 0 },
+ { X86::VANDPDrr, X86::VANDPDrm, 0 },
+ { X86::VANDPSrr, X86::VANDPSrm, 0 },
+ { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 },
+ { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 },
+ { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 },
+ { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 },
+ { X86::VCMPPDrri, X86::VCMPPDrmi, 0 },
+ { X86::VCMPPSrri, X86::VCMPPSrmi, 0 },
{ X86::VCMPSDrr, X86::VCMPSDrm, 0 },
{ X86::VCMPSSrr, X86::VCMPSSrm, 0 },
- { X86::VDIVPDrr, X86::VDIVPDrm, TB_ALIGN_16 },
- { X86::VDIVPSrr, X86::VDIVPSrm, TB_ALIGN_16 },
+ { X86::VDIVPDrr, X86::VDIVPDrm, 0 },
+ { X86::VDIVPSrr, X86::VDIVPSrm, 0 },
{ X86::VDIVSDrr, X86::VDIVSDrm, 0 },
{ X86::VDIVSSrr, X86::VDIVSSrm, 0 },
{ X86::VFsANDNPDrr, X86::VFsANDNPDrm, TB_ALIGN_16 },
@@ -867,263 +879,267 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFsORPSrr, X86::VFsORPSrm, TB_ALIGN_16 },
{ X86::VFsXORPDrr, X86::VFsXORPDrm, TB_ALIGN_16 },
{ X86::VFsXORPSrr, X86::VFsXORPSrm, TB_ALIGN_16 },
- { X86::VHADDPDrr, X86::VHADDPDrm, TB_ALIGN_16 },
- { X86::VHADDPSrr, X86::VHADDPSrm, TB_ALIGN_16 },
- { X86::VHSUBPDrr, X86::VHSUBPDrm, TB_ALIGN_16 },
- { X86::VHSUBPSrr, X86::VHSUBPSrm, TB_ALIGN_16 },
+ { X86::VHADDPDrr, X86::VHADDPDrm, 0 },
+ { X86::VHADDPSrr, X86::VHADDPSrm, 0 },
+ { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 },
+ { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 },
{ X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 },
{ X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 },
- { X86::VMAXPDrr, X86::VMAXPDrm, TB_ALIGN_16 },
- { X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, TB_ALIGN_16 },
- { X86::VMAXPSrr, X86::VMAXPSrm, TB_ALIGN_16 },
- { X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, TB_ALIGN_16 },
+ { X86::VMAXPDrr, X86::VMAXPDrm, 0 },
+ { X86::VMAXPSrr, X86::VMAXPSrm, 0 },
{ X86::VMAXSDrr, X86::VMAXSDrm, 0 },
- { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 },
{ X86::VMAXSSrr, X86::VMAXSSrm, 0 },
- { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 },
- { X86::VMINPDrr, X86::VMINPDrm, TB_ALIGN_16 },
- { X86::VMINPDrr_Int, X86::VMINPDrm_Int, TB_ALIGN_16 },
- { X86::VMINPSrr, X86::VMINPSrm, TB_ALIGN_16 },
- { X86::VMINPSrr_Int, X86::VMINPSrm_Int, TB_ALIGN_16 },
+ { X86::VMINPDrr, X86::VMINPDrm, 0 },
+ { X86::VMINPSrr, X86::VMINPSrm, 0 },
{ X86::VMINSDrr, X86::VMINSDrm, 0 },
- { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 },
{ X86::VMINSSrr, X86::VMINSSrm, 0 },
- { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 },
- { X86::VMPSADBWrri, X86::VMPSADBWrmi, TB_ALIGN_16 },
- { X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 },
- { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 },
+ { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 },
+ { X86::VMULPDrr, X86::VMULPDrm, 0 },
+ { X86::VMULPSrr, X86::VMULPSrm, 0 },
{ X86::VMULSDrr, X86::VMULSDrm, 0 },
{ X86::VMULSSrr, X86::VMULSSrm, 0 },
- { X86::VORPDrr, X86::VORPDrm, TB_ALIGN_16 },
- { X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 },
- { X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 },
- { X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 },
- { X86::VPACKUSDWrr, X86::VPACKUSDWrm, TB_ALIGN_16 },
- { X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 },
- { X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 },
- { X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 },
- { X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 },
- { X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 },
- { X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 },
- { X86::VPADDUSBrr, X86::VPADDUSBrm, TB_ALIGN_16 },
- { X86::VPADDUSWrr, X86::VPADDUSWrm, TB_ALIGN_16 },
- { X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 },
- { X86::VPALIGNR128rr, X86::VPALIGNR128rm, TB_ALIGN_16 },
- { X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 },
- { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 },
- { X86::VPAVGBrr, X86::VPAVGBrm, TB_ALIGN_16 },
- { X86::VPAVGWrr, X86::VPAVGWrm, TB_ALIGN_16 },
- { X86::VPBLENDWrri, X86::VPBLENDWrmi, TB_ALIGN_16 },
- { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 },
- { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 },
- { X86::VPCMPEQQrr, X86::VPCMPEQQrm, TB_ALIGN_16 },
- { X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 },
- { X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 },
- { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 },
- { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 },
- { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 },
- { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 },
- { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 },
- { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 },
- { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 },
- { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 },
- { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 },
- { X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 },
- { X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 },
- { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 },
- { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, TB_ALIGN_16 },
- { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 },
- { X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 },
- { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 },
- { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 },
- { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 },
- { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 },
- { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 },
- { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 },
- { X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 },
- { X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 },
- { X86::VPMULLWrr, X86::VPMULLWrm, TB_ALIGN_16 },
- { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 },
- { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 },
- { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 },
- { X86::VPSHUFBrr, X86::VPSHUFBrm, TB_ALIGN_16 },
- { X86::VPSIGNBrr, X86::VPSIGNBrm, TB_ALIGN_16 },
- { X86::VPSIGNWrr, X86::VPSIGNWrm, TB_ALIGN_16 },
- { X86::VPSIGNDrr, X86::VPSIGNDrm, TB_ALIGN_16 },
- { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 },
- { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 },
- { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 },
- { X86::VPSRADrr, X86::VPSRADrm, TB_ALIGN_16 },
- { X86::VPSRAWrr, X86::VPSRAWrm, TB_ALIGN_16 },
- { X86::VPSRLDrr, X86::VPSRLDrm, TB_ALIGN_16 },
- { X86::VPSRLQrr, X86::VPSRLQrm, TB_ALIGN_16 },
- { X86::VPSRLWrr, X86::VPSRLWrm, TB_ALIGN_16 },
- { X86::VPSUBBrr, X86::VPSUBBrm, TB_ALIGN_16 },
- { X86::VPSUBDrr, X86::VPSUBDrm, TB_ALIGN_16 },
- { X86::VPSUBSBrr, X86::VPSUBSBrm, TB_ALIGN_16 },
- { X86::VPSUBSWrr, X86::VPSUBSWrm, TB_ALIGN_16 },
- { X86::VPSUBWrr, X86::VPSUBWrm, TB_ALIGN_16 },
- { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, TB_ALIGN_16 },
- { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, TB_ALIGN_16 },
- { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, TB_ALIGN_16 },
- { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, TB_ALIGN_16 },
- { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, TB_ALIGN_16 },
- { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, TB_ALIGN_16 },
- { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, TB_ALIGN_16 },
- { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, TB_ALIGN_16 },
- { X86::VPXORrr, X86::VPXORrm, TB_ALIGN_16 },
- { X86::VSHUFPDrri, X86::VSHUFPDrmi, TB_ALIGN_16 },
- { X86::VSHUFPSrri, X86::VSHUFPSrmi, TB_ALIGN_16 },
- { X86::VSUBPDrr, X86::VSUBPDrm, TB_ALIGN_16 },
- { X86::VSUBPSrr, X86::VSUBPSrm, TB_ALIGN_16 },
+ { X86::VORPDrr, X86::VORPDrm, 0 },
+ { X86::VORPSrr, X86::VORPSrm, 0 },
+ { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 },
+ { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 },
+ { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 },
+ { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 },
+ { X86::VPADDBrr, X86::VPADDBrm, 0 },
+ { X86::VPADDDrr, X86::VPADDDrm, 0 },
+ { X86::VPADDQrr, X86::VPADDQrm, 0 },
+ { X86::VPADDSBrr, X86::VPADDSBrm, 0 },
+ { X86::VPADDSWrr, X86::VPADDSWrm, 0 },
+ { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 },
+ { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 },
+ { X86::VPADDWrr, X86::VPADDWrm, 0 },
+ { X86::VPALIGNR128rr, X86::VPALIGNR128rm, 0 },
+ { X86::VPANDNrr, X86::VPANDNrm, 0 },
+ { X86::VPANDrr, X86::VPANDrm, 0 },
+ { X86::VPAVGBrr, X86::VPAVGBrm, 0 },
+ { X86::VPAVGWrr, X86::VPAVGWrm, 0 },
+ { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 },
+ { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 },
+ { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 },
+ { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 },
+ { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 },
+ { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 },
+ { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 },
+ { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 },
+ { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 },
+ { X86::VPHADDDrr, X86::VPHADDDrm, 0 },
+ { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 },
+ { X86::VPHADDWrr, X86::VPHADDWrm, 0 },
+ { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 },
+ { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 },
+ { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 },
+ { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 },
+ { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 },
+ { X86::VPINSRWrri, X86::VPINSRWrmi, 0 },
+ { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, 0 },
+ { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 },
+ { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 },
+ { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 },
+ { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
+ { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
+ { X86::VPMINSBrr, X86::VPMINSBrm, 0 },
+ { X86::VPMINSDrr, X86::VPMINSDrm, 0 },
+ { X86::VPMINUDrr, X86::VPMINUDrm, 0 },
+ { X86::VPMINUWrr, X86::VPMINUWrm, 0 },
+ { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
+ { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
+ { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
+ { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
+ { X86::VPMULDQrr, X86::VPMULDQrm, 0 },
+ { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, 0 },
+ { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 },
+ { X86::VPMULHWrr, X86::VPMULHWrm, 0 },
+ { X86::VPMULLDrr, X86::VPMULLDrm, 0 },
+ { X86::VPMULLWrr, X86::VPMULLWrm, 0 },
+ { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 },
+ { X86::VPORrr, X86::VPORrm, 0 },
+ { X86::VPSADBWrr, X86::VPSADBWrm, 0 },
+ { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 },
+ { X86::VPSIGNBrr, X86::VPSIGNBrm, 0 },
+ { X86::VPSIGNWrr, X86::VPSIGNWrm, 0 },
+ { X86::VPSIGNDrr, X86::VPSIGNDrm, 0 },
+ { X86::VPSLLDrr, X86::VPSLLDrm, 0 },
+ { X86::VPSLLQrr, X86::VPSLLQrm, 0 },
+ { X86::VPSLLWrr, X86::VPSLLWrm, 0 },
+ { X86::VPSRADrr, X86::VPSRADrm, 0 },
+ { X86::VPSRAWrr, X86::VPSRAWrm, 0 },
+ { X86::VPSRLDrr, X86::VPSRLDrm, 0 },
+ { X86::VPSRLQrr, X86::VPSRLQrm, 0 },
+ { X86::VPSRLWrr, X86::VPSRLWrm, 0 },
+ { X86::VPSUBBrr, X86::VPSUBBrm, 0 },
+ { X86::VPSUBDrr, X86::VPSUBDrm, 0 },
+ { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 },
+ { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 },
+ { X86::VPSUBWrr, X86::VPSUBWrm, 0 },
+ { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 },
+ { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 },
+ { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 },
+ { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 },
+ { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 },
+ { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 },
+ { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 },
+ { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 },
+ { X86::VPXORrr, X86::VPXORrm, 0 },
+ { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
+ { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
+ { X86::VSUBPDrr, X86::VSUBPDrm, 0 },
+ { X86::VSUBPSrr, X86::VSUBPSrm, 0 },
{ X86::VSUBSDrr, X86::VSUBSDrm, 0 },
{ X86::VSUBSSrr, X86::VSUBSSrm, 0 },
- { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, TB_ALIGN_16 },
- { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, TB_ALIGN_16 },
- { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 },
- { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 },
- { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 },
- { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 },
+ { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 },
+ { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 },
+ { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 },
+ { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 },
+ { X86::VXORPDrr, X86::VXORPDrm, 0 },
+ { X86::VXORPSrr, X86::VXORPSrm, 0 },
// AVX 256-bit foldable instructions
- { X86::VADDPDYrr, X86::VADDPDYrm, TB_ALIGN_32 },
- { X86::VADDPSYrr, X86::VADDPSYrm, TB_ALIGN_32 },
- { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, TB_ALIGN_32 },
- { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, TB_ALIGN_32 },
- { X86::VANDNPDYrr, X86::VANDNPDYrm, TB_ALIGN_32 },
- { X86::VANDNPSYrr, X86::VANDNPSYrm, TB_ALIGN_32 },
- { X86::VANDPDYrr, X86::VANDPDYrm, TB_ALIGN_32 },
- { X86::VANDPSYrr, X86::VANDPSYrm, TB_ALIGN_32 },
- { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, TB_ALIGN_32 },
- { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, TB_ALIGN_32 },
- { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, TB_ALIGN_32 },
- { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, TB_ALIGN_32 },
- { X86::VCMPPDYrri, X86::VCMPPDYrmi, TB_ALIGN_32 },
- { X86::VCMPPSYrri, X86::VCMPPSYrmi, TB_ALIGN_32 },
- { X86::VDIVPDYrr, X86::VDIVPDYrm, TB_ALIGN_32 },
- { X86::VDIVPSYrr, X86::VDIVPSYrm, TB_ALIGN_32 },
- { X86::VHADDPDYrr, X86::VHADDPDYrm, TB_ALIGN_32 },
- { X86::VHADDPSYrr, X86::VHADDPSYrm, TB_ALIGN_32 },
- { X86::VHSUBPDYrr, X86::VHSUBPDYrm, TB_ALIGN_32 },
- { X86::VHSUBPSYrr, X86::VHSUBPSYrm, TB_ALIGN_32 },
- { X86::VINSERTF128rr, X86::VINSERTF128rm, TB_ALIGN_32 },
- { X86::VMAXPDYrr, X86::VMAXPDYrm, TB_ALIGN_32 },
- { X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, TB_ALIGN_32 },
- { X86::VMAXPSYrr, X86::VMAXPSYrm, TB_ALIGN_32 },
- { X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, TB_ALIGN_32 },
- { X86::VMINPDYrr, X86::VMINPDYrm, TB_ALIGN_32 },
- { X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, TB_ALIGN_32 },
- { X86::VMINPSYrr, X86::VMINPSYrm, TB_ALIGN_32 },
- { X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, TB_ALIGN_32 },
- { X86::VMULPDYrr, X86::VMULPDYrm, TB_ALIGN_32 },
- { X86::VMULPSYrr, X86::VMULPSYrm, TB_ALIGN_32 },
- { X86::VORPDYrr, X86::VORPDYrm, TB_ALIGN_32 },
- { X86::VORPSYrr, X86::VORPSYrm, TB_ALIGN_32 },
- { X86::VPERM2F128rr, X86::VPERM2F128rm, TB_ALIGN_32 },
- { X86::VPERMILPDYrr, X86::VPERMILPDYrm, TB_ALIGN_32 },
- { X86::VPERMILPSYrr, X86::VPERMILPSYrm, TB_ALIGN_32 },
- { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, TB_ALIGN_32 },
- { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, TB_ALIGN_32 },
- { X86::VSUBPDYrr, X86::VSUBPDYrm, TB_ALIGN_32 },
- { X86::VSUBPSYrr, X86::VSUBPSYrm, TB_ALIGN_32 },
- { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, TB_ALIGN_32 },
- { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, TB_ALIGN_32 },
- { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, TB_ALIGN_32 },
- { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, TB_ALIGN_32 },
- { X86::VXORPDYrr, X86::VXORPDYrm, TB_ALIGN_32 },
- { X86::VXORPSYrr, X86::VXORPSYrm, TB_ALIGN_32 },
+ { X86::VADDPDYrr, X86::VADDPDYrm, 0 },
+ { X86::VADDPSYrr, X86::VADDPSYrm, 0 },
+ { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 },
+ { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 },
+ { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 },
+ { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 },
+ { X86::VANDPDYrr, X86::VANDPDYrm, 0 },
+ { X86::VANDPSYrr, X86::VANDPSYrm, 0 },
+ { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 },
+ { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 },
+ { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 },
+ { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 },
+ { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 },
+ { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 },
+ { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 },
+ { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 },
+ { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 },
+ { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 },
+ { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 },
+ { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 },
+ { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 },
+ { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
+ { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
+ { X86::VMINPDYrr, X86::VMINPDYrm, 0 },
+ { X86::VMINPSYrr, X86::VMINPSYrm, 0 },
+ { X86::VMULPDYrr, X86::VMULPDYrm, 0 },
+ { X86::VMULPSYrr, X86::VMULPSYrm, 0 },
+ { X86::VORPDYrr, X86::VORPDYrm, 0 },
+ { X86::VORPSYrr, X86::VORPSYrm, 0 },
+ { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 },
+ { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 },
+ { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 },
+ { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 },
+ { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 },
+ { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 },
+ { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 },
+ { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 },
+ { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 },
+ { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 },
+ { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 },
+ { X86::VXORPDYrr, X86::VXORPDYrm, 0 },
+ { X86::VXORPSYrr, X86::VXORPSYrm, 0 },
// AVX2 foldable instructions
- { X86::VINSERTI128rr, X86::VINSERTI128rm, TB_ALIGN_16 },
- { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_32 },
- { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_32 },
- { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_32 },
- { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_32 },
- { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_32 },
- { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_32 },
- { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_32 },
- { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_32 },
- { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_32 },
- { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_32 },
- { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_32 },
- { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_32 },
- { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_32 },
- { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_32 },
- { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_32 },
- { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_32 },
- { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_32 },
- { X86::VPBLENDDrri, X86::VPBLENDDrmi, TB_ALIGN_32 },
- { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, TB_ALIGN_32 },
- { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, TB_ALIGN_32 },
- { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_32 },
- { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_32 },
- { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_32 },
- { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_32 },
- { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_32 },
- { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_32 },
- { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_32 },
- { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 },
- { X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 },
- { X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 },
- { X86::VPERMPDYri, X86::VPERMPDYmi, TB_ALIGN_32 },
- { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 },
- { X86::VPERMQYri, X86::VPERMQYmi, TB_ALIGN_32 },
- { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 },
- { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 },
- { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 },
- { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 },
- { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 },
- { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 },
- { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 },
- { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 },
- { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 },
- { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 },
- { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 },
- { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 },
- { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 },
- { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 },
- { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 },
- { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_32 },
- { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_32 },
- { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_32 },
- { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_32 },
- { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_32 },
- { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_32 },
- { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_32 },
- { X86::VPSHUFBYrr, X86::VPSHUFBYrm, TB_ALIGN_32 },
- { X86::VPSIGNBYrr, X86::VPSIGNBYrm, TB_ALIGN_32 },
- { X86::VPSIGNWYrr, X86::VPSIGNWYrm, TB_ALIGN_32 },
- { X86::VPSIGNDYrr, X86::VPSIGNDYrm, TB_ALIGN_32 },
- { X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 },
- { X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 },
- { X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 },
- { X86::VPSLLVDrr, X86::VPSLLVDrm, TB_ALIGN_16 },
- { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_32 },
- { X86::VPSLLVQrr, X86::VPSLLVQrm, TB_ALIGN_16 },
- { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_32 },
- { X86::VPSRADYrr, X86::VPSRADYrm, TB_ALIGN_16 },
- { X86::VPSRAWYrr, X86::VPSRAWYrm, TB_ALIGN_16 },
- { X86::VPSRAVDrr, X86::VPSRAVDrm, TB_ALIGN_16 },
- { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_32 },
- { X86::VPSRLDYrr, X86::VPSRLDYrm, TB_ALIGN_16 },
- { X86::VPSRLQYrr, X86::VPSRLQYrm, TB_ALIGN_16 },
- { X86::VPSRLWYrr, X86::VPSRLWYrm, TB_ALIGN_16 },
- { X86::VPSRLVDrr, X86::VPSRLVDrm, TB_ALIGN_16 },
- { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_32 },
- { X86::VPSRLVQrr, X86::VPSRLVQrm, TB_ALIGN_16 },
- { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_32 },
- { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_32 },
- { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_32 },
- { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_32 },
- { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_32 },
- { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_32 },
- { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_32 },
- { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_32 },
- { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, TB_ALIGN_16 },
- { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_32 },
- { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_32 },
- { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_32 },
- { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_32 },
- { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 },
- { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 },
+ { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 },
+ { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 },
+ { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 },
+ { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 },
+ { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 },
+ { X86::VPADDBYrr, X86::VPADDBYrm, 0 },
+ { X86::VPADDDYrr, X86::VPADDDYrm, 0 },
+ { X86::VPADDQYrr, X86::VPADDQYrm, 0 },
+ { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 },
+ { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 },
+ { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 },
+ { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 },
+ { X86::VPADDWYrr, X86::VPADDWYrm, 0 },
+ { X86::VPALIGNR256rr, X86::VPALIGNR256rm, 0 },
+ { X86::VPANDNYrr, X86::VPANDNYrm, 0 },
+ { X86::VPANDYrr, X86::VPANDYrm, 0 },
+ { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 },
+ { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 },
+ { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 },
+ { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 },
+ { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 },
+ { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 },
+ { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 },
+ { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 },
+ { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 },
+ { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 },
+ { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 },
+ { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 },
+ { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 },
+ { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 },
+ { X86::VPERMDYrr, X86::VPERMDYrm, 0 },
+ { X86::VPERMPDYri, X86::VPERMPDYmi, 0 },
+ { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 },
+ { X86::VPERMQYri, X86::VPERMQYmi, 0 },
+ { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 },
+ { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 },
+ { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 },
+ { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 },
+ { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 },
+ { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 },
+ { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, 0 },
+ { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 },
+ { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 },
+ { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 },
+ { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
+ { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
+ { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 },
+ { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 },
+ { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 },
+ { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 },
+ { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
+ { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
+ { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
+ { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
+ { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 },
+ { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 },
+ { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, 0 },
+ { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 },
+ { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 },
+ { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 },
+ { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 },
+ { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 },
+ { X86::VPORYrr, X86::VPORYrm, 0 },
+ { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 },
+ { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 },
+ { X86::VPSIGNBYrr, X86::VPSIGNBYrm, 0 },
+ { X86::VPSIGNWYrr, X86::VPSIGNWYrm, 0 },
+ { X86::VPSIGNDYrr, X86::VPSIGNDYrm, 0 },
+ { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 },
+ { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 },
+ { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 },
+ { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 },
+ { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 },
+ { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 },
+ { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 },
+ { X86::VPSRADYrr, X86::VPSRADYrm, 0 },
+ { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 },
+ { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 },
+ { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 },
+ { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 },
+ { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 },
+ { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 },
+ { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 },
+ { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 },
+ { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 },
+ { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 },
+ { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 },
+ { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 },
+ { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 },
+ { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 },
+ { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 },
+ { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 },
+ { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 },
+ { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 },
+ { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 },
+ { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 },
+ { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 },
+ { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 },
+ { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 },
+ { X86::VPXORYrr, X86::VPXORYrm, 0 },
// FIXME: add AVX 256-bit foldable instructions
// FMA4 foldable patterns
@@ -1161,8 +1177,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 },
// BMI/BMI2 foldable instructions
+ { X86::ANDN32rr, X86::ANDN32rm, 0 },
+ { X86::ANDN64rr, X86::ANDN64rm, 0 },
{ X86::MULX32rr, X86::MULX32rm, 0 },
{ X86::MULX64rr, X86::MULX64rm, 0 },
+ { X86::PDEP32rr, X86::PDEP32rm, 0 },
+ { X86::PDEP64rr, X86::PDEP64rm, 0 },
+ { X86::PEXT32rr, X86::PEXT32rm, 0 },
+ { X86::PEXT64rr, X86::PEXT64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -2848,6 +2870,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
// Moving EFLAGS to / from another register requires a push and a pop.
+ // Notice that we have to adjust the stack if we don't want to clobber the
+ // first frame index. See X86FrameLowering.cpp - colobbersTheStack.
if (SrcReg == X86::EFLAGS) {
if (X86::GR64RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
@@ -3158,19 +3182,15 @@ inline static bool isDefConvertible(MachineInstr *MI) {
case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
- case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
- case X86::DEC64m: case X86::DEC32m: case X86::DEC16m: case X86::DEC8m:
+ case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
case X86::DEC64_32r: case X86::DEC64_16r:
- case X86::DEC64_32m: case X86::DEC64_16m:
case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
- case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
- case X86::INC64m: case X86::INC32m: case X86::INC16m: case X86::INC8m:
+ case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
case X86::INC64_32r: case X86::INC64_16r:
- case X86::INC64_32m: case X86::INC64_16m:
case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
@@ -3186,6 +3206,8 @@ inline static bool isDefConvertible(MachineInstr *MI) {
case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
+ case X86::ANDN32rr: case X86::ANDN32rm:
+ case X86::ANDN64rr: case X86::ANDN64rm:
return true;
}
}
@@ -3508,43 +3530,44 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
/// to:
/// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
///
-static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) {
+static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
+ const MCInstrDesc &Desc) {
assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
- unsigned Reg = MI->getOperand(0).getReg();
- MI->setDesc(Desc);
+ unsigned Reg = MIB->getOperand(0).getReg();
+ MIB->setDesc(Desc);
// MachineInstr::addOperand() will insert explicit operands before any
// implicit operands.
- MachineInstrBuilder(MI).addReg(Reg, RegState::Undef)
- .addReg(Reg, RegState::Undef);
+ MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
// But we don't trust that.
- assert(MI->getOperand(1).getReg() == Reg &&
- MI->getOperand(2).getReg() == Reg && "Misplaced operand");
+ assert(MIB->getOperand(1).getReg() == Reg &&
+ MIB->getOperand(2).getReg() == Reg && "Misplaced operand");
return true;
}
bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
switch (MI->getOpcode()) {
case X86::SETB_C8r:
- return Expand2AddrUndef(MI, get(X86::SBB8rr));
+ return Expand2AddrUndef(MIB, get(X86::SBB8rr));
case X86::SETB_C16r:
- return Expand2AddrUndef(MI, get(X86::SBB16rr));
+ return Expand2AddrUndef(MIB, get(X86::SBB16rr));
case X86::SETB_C32r:
- return Expand2AddrUndef(MI, get(X86::SBB32rr));
+ return Expand2AddrUndef(MIB, get(X86::SBB32rr));
case X86::SETB_C64r:
- return Expand2AddrUndef(MI, get(X86::SBB64rr));
+ return Expand2AddrUndef(MIB, get(X86::SBB64rr));
case X86::V_SET0:
case X86::FsFLD0SS:
case X86::FsFLD0SD:
- return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
+ return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::AVX_SET0:
assert(HasAVX && "AVX not supported");
- return Expand2AddrUndef(MI, get(X86::VXORPSYrr));
+ return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
case X86::V_SETALLONES:
- return Expand2AddrUndef(MI, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
+ return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
- return Expand2AddrUndef(MI, get(X86::VPCMPEQDYrr));
+ return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
@@ -3570,9 +3593,10 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
MachineInstr *MI,
const TargetInstrInfo &TII) {
// Create the base instruction with the memory operand as the first part.
+ // Omit the implicit operands, something BuildMI can't do.
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
MI->getDebugLoc(), true);
- MachineInstrBuilder MIB(NewMI);
+ MachineInstrBuilder MIB(MF, NewMI);
unsigned NumAddrOps = MOs.size();
for (unsigned i = 0; i != NumAddrOps; ++i)
MIB.addOperand(MOs[i]);
@@ -3596,9 +3620,10 @@ static MachineInstr *FuseInst(MachineFunction &MF,
unsigned Opcode, unsigned OpNo,
const SmallVectorImpl<MachineOperand> &MOs,
MachineInstr *MI, const TargetInstrInfo &TII) {
+ // Omit the implicit operands, something BuildMI can't do.
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
MI->getDebugLoc(), true);
- MachineInstrBuilder MIB(NewMI);
+ MachineInstrBuilder MIB(MF, NewMI);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
@@ -3845,8 +3870,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize) &&
+ if (!MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
hasPartialRegUpdate(MI->getOpcode()))
return 0;
@@ -3887,8 +3912,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->getFnAttributes().
- hasAttribute(Attributes::OptimizeForSize) &&
+ if (!MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
hasPartialRegUpdate(MI->getOpcode()))
return 0;
@@ -4138,7 +4163,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
// Emit the data processing instruction.
MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
- MachineInstrBuilder MIB(DataMI);
+ MachineInstrBuilder MIB(MF, DataMI);
if (FoldedStore)
MIB.addReg(Reg, RegState::Define);
@@ -4644,13 +4669,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::DIVSSrr:
case X86::DIVSSrr_Int:
case X86::SQRTPDm:
- case X86::SQRTPDm_Int:
case X86::SQRTPDr:
- case X86::SQRTPDr_Int:
case X86::SQRTPSm:
- case X86::SQRTPSm_Int:
case X86::SQRTPSr:
- case X86::SQRTPSr_Int:
case X86::SQRTSDm:
case X86::SQRTSDm_Int:
case X86::SQRTSDr:
@@ -4669,13 +4690,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::VDIVSSrr:
case X86::VDIVSSrr_Int:
case X86::VSQRTPDm:
- case X86::VSQRTPDm_Int:
case X86::VSQRTPDr:
- case X86::VSQRTPDr_Int:
case X86::VSQRTPSm:
- case X86::VSQRTPSm_Int:
case X86::VSQRTPSr:
- case X86::VSQRTPSr_Int:
case X86::VSQRTSDm:
case X86::VSQRTSDm_Int:
case X86::VSQRTSDr:
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 921ec0d84b..ba5dfae137 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -262,9 +262,9 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>;
-def X86blsi_flag : SDNode<"X86ISD::BLSI", SDTUnaryArithWithFlags>;
-def X86blsmsk_flag : SDNode<"X86ISD::BLSMSK", SDTUnaryArithWithFlags>;
-def X86blsr_flag : SDNode<"X86ISD::BLSR", SDTUnaryArithWithFlags>;
+def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>;
+def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>;
+def X86blsr : SDNode<"X86ISD::BLSR", SDTIntUnaryOp>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
@@ -1066,7 +1066,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
*/
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
"mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1156,24 +1156,26 @@ def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
// perspective, this is pretty bizarre. Make these instructions disassembly
// only for now.
-def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "bt{w}\t{$src2, $src1|$src1, $src2}",
-// [(X86bt (loadi16 addr:$src1), GR16:$src2),
-// (implicit EFLAGS)]
- [], IIC_BT_MR
- >, OpSize, TB, Requires<[FastBTMem]>;
-def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "bt{l}\t{$src2, $src1|$src1, $src2}",
-// [(X86bt (loadi32 addr:$src1), GR32:$src2),
-// (implicit EFLAGS)]
- [], IIC_BT_MR
- >, TB, Requires<[FastBTMem]>;
-def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
-// [(X86bt (loadi64 addr:$src1), GR64:$src2),
-// (implicit EFLAGS)]
- [], IIC_BT_MR
- >, TB;
+let mayLoad = 1, hasSideEffects = 0 in {
+ def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ // [(X86bt (loadi16 addr:$src1), GR16:$src2),
+ // (implicit EFLAGS)]
+ [], IIC_BT_MR
+ >, OpSize, TB, Requires<[FastBTMem]>;
+ def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ // [(X86bt (loadi32 addr:$src1), GR32:$src2),
+ // (implicit EFLAGS)]
+ [], IIC_BT_MR
+ >, TB, Requires<[FastBTMem]>;
+ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ // [(X86bt (loadi64 addr:$src1), GR64:$src2),
+ // (implicit EFLAGS)]
+ [], IIC_BT_MR
+ >, TB;
+}
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -1204,7 +1206,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
[(set EFLAGS, (X86bt (loadi64 addr:$src1),
i64immSExt8:$src2))], IIC_BT_MI>, TB;
-
+let hasSideEffects = 0 in {
def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1212,6 +1214,8 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1219,6 +1223,8 @@ def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1226,6 +1232,8 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1233,6 +1241,7 @@ def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
@@ -1241,6 +1250,8 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1248,6 +1259,8 @@ def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1255,6 +1268,8 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1262,6 +1277,7 @@ def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
@@ -1270,6 +1286,8 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1277,6 +1295,8 @@ def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1284,6 +1304,8 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1291,6 +1313,8 @@ def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
+} // hasSideEffects = 0
} // Defs = [EFLAGS]
@@ -1303,34 +1327,34 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag,
InstrItinClass itin> {
let Constraints = "$val = $dst" in {
- def #NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst),
- (ins GR8:$val, i8mem:$ptr),
- !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR8:$dst,
- (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
- itin>;
- def #NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$val, i16mem:$ptr),
- !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR16:$dst,
- (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
- itin>, OpSize;
- def #NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$val, i32mem:$ptr),
- !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+ def NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst),
+ (ins GR8:$val, i8mem:$ptr),
+ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR8:$dst,
+ (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+ itin>;
+ def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$val, i16mem:$ptr),
+ !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR16:$dst,
+ (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+ itin>, OpSize;
+ def NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$val, i32mem:$ptr),
+ !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR32:$dst,
+ (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ itin>;
+ def NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$val, i64mem:$ptr),
+ !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
[(set
- GR32:$dst,
- (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+ GR64:$dst,
+ (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
itin>;
- def #NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$val, i64mem:$ptr),
- !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR64:$dst,
- (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
- itin>;
}
}
@@ -1509,10 +1533,10 @@ def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
Requires<[In32BitMode]>;
// Adjust RPL Field of Segment Selector
-def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
"arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>,
Requires<[In32BitMode]>;
-def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>,
Requires<[In32BitMode]>;
@@ -1628,26 +1652,26 @@ multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
PatFrag ld_frag> {
def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, EFLAGS, (OpNode RC:$src))]>, T8, VEX_4V;
+ [(set RC:$dst, (OpNode RC:$src)), (implicit EFLAGS)]>, T8, VEX_4V;
def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, EFLAGS, (OpNode (ld_frag addr:$src)))]>,
+ [(set RC:$dst, (OpNode (ld_frag addr:$src))), (implicit EFLAGS)]>,
T8, VEX_4V;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem,
- X86blsr_flag, loadi32>;
+ X86blsr, loadi32>;
defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem,
- X86blsr_flag, loadi64>, VEX_W;
+ X86blsr, loadi64>, VEX_W;
defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem,
- X86blsmsk_flag, loadi32>;
+ X86blsmsk, loadi32>;
defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem,
- X86blsmsk_flag, loadi64>, VEX_W;
+ X86blsmsk, loadi64>, VEX_W;
defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem,
- X86blsi_flag, loadi32>;
+ X86blsi, loadi32>;
defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem,
- X86blsi_flag, loadi64>, VEX_W;
+ X86blsi, loadi64>, VEX_W;
}
multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
@@ -1915,6 +1939,8 @@ def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>;
def : InstAlias<"fdivp", (DIVR_FPrST0 ST1)>;
def : InstAlias<"fdivrp", (DIV_FPrST0 ST1)>;
def : InstAlias<"fxch", (XCH_F ST1)>;
+def : InstAlias<"fcom", (COM_FST0r ST1)>;
+def : InstAlias<"fcomp", (COMP_FST0r ST1)>;
def : InstAlias<"fcomi", (COM_FIr ST1)>;
def : InstAlias<"fcompi", (COM_FIPr ST1)>;
def : InstAlias<"fucom", (UCOM_Fr ST1)>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 1912a936ce..89149c65bf 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -203,9 +203,8 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
string OpcodeStr, X86MemOperand x86memop,
list<dag> pat_rr, list<dag> pat_rm,
- bit Is2Addr = 1,
- bit rr_hasSideEffects = 0> {
- let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in
+ bit Is2Addr = 1> {
+ let isCommutable = 1, hasSideEffects = 0 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -218,27 +217,6 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
pat_rm, IIC_DEFAULT, d>;
}
-/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
-multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
- string asm, string SSEVer, string FPSizeStr,
- X86MemOperand x86memop, PatFrag mem_frag,
- Domain d, OpndItins itins, bit Is2Addr = 1> {
- def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!cast<Intrinsic>(
- !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, RC:$src2))], IIC_DEFAULT, d>;
- def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!cast<Intrinsic>(
- !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>;
-}
-
//===----------------------------------------------------------------------===//
// Non-instruction patterns
//===----------------------------------------------------------------------===//
@@ -481,7 +459,7 @@ def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
VEX_LIG;
// For the disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, FR32:$src2),
"movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
@@ -519,7 +497,7 @@ let Constraints = "$src1 = $dst" in {
"movsd\t{$src2, $dst|$dst, $src2}">, XD;
// For the disassembler
- let isCodeGenOnly = 1 in {
+ let isCodeGenOnly = 1, hasSideEffects = 0 in {
def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, FR32:$src2),
"movss\t{$src2, $dst|$dst, $src2}", [],
@@ -870,7 +848,7 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
// For disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
@@ -944,7 +922,7 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
IIC_SSE_MOVU_P_MR>;
// For disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>;
@@ -1132,34 +1110,41 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
// SSE 1 & 2 - Move Low packed FP Instructions
//===----------------------------------------------------------------------===//
-multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
- SDNode psnode, SDNode pdnode, string base_opc,
- string asm_opr, InstrItinClass itin> {
+multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
+ string base_opc, string asm_opr,
+ InstrItinClass itin> {
def PSrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "s", asm_opr),
- [(set RC:$dst,
- (psnode RC:$src1,
+ [(set VR128:$dst,
+ (psnode VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
itin, SSEPackedSingle>, TB;
def PDrm : PI<opc, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
- [(set RC:$dst, (v2f64 (pdnode RC:$src1,
+ [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
itin, SSEPackedDouble>, TB, OpSize;
+
}
-let AddedComplexity = 20 in {
- defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- IIC_SSE_MOV_LH>, VEX_4V;
+multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
+ string base_opc, InstrItinClass itin> {
+ defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ itin>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+ defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ "\t{$src2, $dst|$dst, $src2}",
+ itin>;
}
-let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
- defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
- "\t{$src2, $dst|$dst, $src2}",
- IIC_SSE_MOV_LH>;
+
+let AddedComplexity = 20 in {
+ defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp",
+ IIC_SSE_MOV_LH>;
}
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -1257,14 +1242,8 @@ let Predicates = [UseSSE2] in {
//===----------------------------------------------------------------------===//
let AddedComplexity = 20 in {
- defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- IIC_SSE_MOV_LH>, VEX_4V;
-}
-let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
- defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
- "\t{$src2, $dst|$dst, $src2}",
- IIC_SSE_MOV_LH>;
+ defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp",
+ IIC_SSE_MOV_LH>;
}
// v2f64 extract element 1 is always custom lowered to unpack high to low
@@ -1457,7 +1436,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
SSE_CVT_SS2SI_32>,
XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_64>,
XS, VEX, VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
@@ -1465,26 +1444,43 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
SSE_CVT_SD2SI>,
XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>,
XD, VEX, VEX_W, VEX_LIG;
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
+
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
// where appropriate to do so.
-defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">,
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">,
XS, VEX_4V, VEX_LIG;
defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
XS, VEX_4V, VEX_W, VEX_LIG;
-defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">,
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
XD, VEX_4V, VEX_LIG;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
XD, VEX_4V, VEX_W, VEX_LIG;
-def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>;
-def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
let Predicates = [HasAVX] in {
@@ -1511,27 +1507,49 @@ defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_32>, XS;
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_64>, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>, XD;
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SD2SI>, XD, REX_W;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
- "cvtsi2ss\t{$src, $dst|$dst, $src}",
+ "cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XS;
defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XS, REX_W;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
- "cvtsi2sd\t{$src, $dst|$dst, $src}",
+ "cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XD;
defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
SSE_CVT_Scalar>, XD, REX_W;
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
+
+def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
+ (CVTSI2SSrm FR64:$dst, i32mem:$src)>;
+def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
+ (CVTSI2SDrm FR64:$dst, i32mem:$src)>;
+
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
@@ -1566,27 +1584,27 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
}
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
- int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}",
+ int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}",
+ int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
+ sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
- sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+ sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W;
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss",
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
SSE_CVT_Scalar, 0>, XS, VEX_4V;
defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
SSE_CVT_Scalar, 0>, XS, VEX_4V,
VEX_W;
defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd",
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
SSE_CVT_Scalar, 0>, XD, VEX_4V;
defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
@@ -1596,13 +1614,13 @@ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32,
- "cvtsi2ss", SSE_CVT_Scalar>, XS;
+ "cvtsi2ss{l}", SSE_CVT_Scalar>, XS;
defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse_cvtsi642ss, i64mem, loadi64,
"cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W;
defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32,
- "cvtsi2sd", SSE_CVT_Scalar>, XD;
+ "cvtsi2sd{l}", SSE_CVT_Scalar>, XD;
defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64,
"cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W;
@@ -1616,40 +1634,40 @@ defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
SSE_CVT_SS2SI_32>, XS, VEX;
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
- "cvttss2si{q}", SSE_CVT_SS2SI_64>,
+ "cvttss2si", SSE_CVT_SS2SI_64>,
XS, VEX, VEX_W;
defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
sdmem, sse_load_f64, "cvttsd2si",
SSE_CVT_SD2SI>, XD, VEX;
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
- "cvttsd2si{q}", SSE_CVT_SD2SI>,
+ "cvttsd2si", SSE_CVT_SD2SI>,
XD, VEX, VEX_W;
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS;
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
- "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W;
+ "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
sdmem, sse_load_f64, "cvttsd2si",
SSE_CVT_SD2SI>, XD;
defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
- "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+ "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W;
defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- ssmem, sse_load_f32, "cvtss2si{l}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
- ssmem, sse_load_f32, "cvtss2si{q}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- ssmem, sse_load_f32, "cvtss2si{l}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS;
defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
- ssmem, sse_load_f32, "cvtss2si{q}",
+ ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_64>, XS, REX_W;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
@@ -1666,6 +1684,40 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
SSEPackedSingle, SSE_CVT_PS>,
TB, Requires<[UseSSE2]>;
+def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
+def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
+def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
+def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
+
+def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
+def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
+def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
+def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTSD2SI64rm GR64:$dst, sdmem:$src)>;
+
/// SSE 2 Only
// Convert scalar double to scalar single
@@ -2657,10 +2709,8 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop,
- OpndItins itins,
- bit IsCommutable = 0,
- bit Is2Addr = 1> {
+ X86MemOperand x86memop, OpndItins itins,
+ bit IsCommutable, bit Is2Addr> {
let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
@@ -2679,40 +2729,30 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
} // ExeDomain = SSEPackedInt
-// These are ordered here for pattern ordering requirements with the fp versions
+multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
+ ValueType OpVT128, ValueType OpVT256,
+ OpndItins itins, bit IsCommutable = 0> {
+let Predicates = [HasAVX] in
+ defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
+ VR128, memopv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
-let Predicates = [HasAVX] in {
-defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
+ memopv2i64, i128mem, itins, IsCommutable, 1>;
+
+let Predicates = [HasAVX2] in
+ defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
+ OpVT256, VR256, memopv4i64, i256mem, itins,
+ IsCommutable, 0>, VEX_4V, VEX_L;
}
-let Constraints = "$src1 = $dst" in {
-defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1>;
-defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1>;
-defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 1>;
-defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64,
- i128mem, SSE_BIT_ITINS_P, 0>;
-} // Constraints = "$src1 = $dst"
+// These are ordered here for pattern ordering requirements with the fp versions
-let Predicates = [HasAVX2] in {
-defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
- i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
+defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
+ SSE_BIT_ITINS_P, 0>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Logical Instructions
@@ -2757,6 +2797,20 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
+ defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f256mem,
+ [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
+
+ defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f256mem,
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (bc_v4i64 (v4f64 VR256:$src2))))],
+ [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+ (memopv4i64 addr:$src2)))], 0>,
+ TB, OpSize, VEX_4V, VEX_L;
+
// In AVX no need to add a pattern for 128-bit logical rr ps, because they
// are all promoted to v2i64, and the patterns are covered by the int
// version. This is needed in SSE only, because v2i64 isn't supported on
@@ -2764,7 +2818,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, [],
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V;
+ (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
@@ -2773,6 +2827,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
(memopv2i64 addr:$src2)))], 0>,
TB, OpSize, VEX_4V;
+
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
@@ -2789,31 +2844,6 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
}
}
-/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
-///
-multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
- SDNode OpNode> {
- defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f256mem,
- [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
- [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
-
- defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f256mem,
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (bc_v4i64 (v4f64 VR256:$src2))))],
- [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>,
- TB, OpSize, VEX_4V, VEX_L;
-}
-
-// AVX 256-bit packed logical ops forms
-defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>;
-defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>;
-defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>;
-defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
-
defm AND : sse12_fp_packed_logical<0x54, "and", and>;
defm OR : sse12_fp_packed_logical<0x56, "or", or>;
defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
@@ -2848,26 +2878,32 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
itins.d, Is2Addr>, XD;
}
-multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SizeItins itins,
- bit Is2Addr = 1> {
+multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SizeItins itins> {
+let Predicates = [HasAVX] in {
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ VR128, v4f32, f128mem, memopv4f32,
+ SSEPackedSingle, itins.s, 0>, TB, VEX_4V;
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ VR128, v2f64, f128mem, memopv2f64,
+ SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V;
+
+ defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
+ OpNode, VR256, v8f32, f256mem, memopv8f32,
+ SSEPackedSingle, itins.s, 0>, TB, VEX_4V, VEX_L;
+ defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
+ OpNode, VR256, v4f64, f256mem, memopv4f64,
+ SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
- v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>,
- TB;
+ v4f32, f128mem, memopv4f32, SSEPackedSingle,
+ itins.s, 1>, TB;
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
- v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>,
- TB, OpSize;
+ v2f64, f128mem, memopv2f64, SSEPackedDouble,
+ itins.d, 1>, TB, OpSize;
}
-
-multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- SizeItins itins> {
- defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
- v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>,
- TB, VEX_L;
- defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
- v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>,
- TB, OpSize, VEX_L;
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
@@ -2881,116 +2917,69 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
itins.d, Is2Addr>, XD;
}
-multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
- SizeItins itins,
- bit Is2Addr = 1> {
- defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
- SSEPackedSingle, itins.s, Is2Addr>,
- TB;
-
- defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
- SSEPackedDouble, itins.d, Is2Addr>,
- TB, OpSize;
+// Binary Arithmetic instructions
+defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>;
+defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>;
+let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>;
+ defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>;
+ defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>;
+ defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>;
}
-multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr,
- SizeItins itins> {
- defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
- !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
- SSEPackedSingle, itins.s, 0>, TB, VEX_L;
-
- defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
- !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
- SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_L;
+let isCodeGenOnly = 1 in {
+ defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
+ defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
}
-// Binary Arithmetic instructions
defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
-defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x58, "add", fadd, SSE_ALU_ITINS_P>,
- VEX_4V;
defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>,
VEX_4V, VEX_LIG;
-defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
- VEX_4V;
let isCommutable = 0 in {
defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
- VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
- VEX_4V;
defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>,
- VEX_4V;
defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>,
basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
- VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
- basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
let isCommutable = 0 in {
defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
- basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>;
+ basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
- basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>;
+ basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
}
}
let isCodeGenOnly = 1 in {
defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V;
defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
VEX_4V, VEX_LIG;
- defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>,
- basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V;
let Constraints = "$src1 = $dst" in {
- defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
- defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+ defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>;
+ defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>;
}
}
@@ -3021,6 +3010,26 @@ def SSE_RCPS : OpndItins<
/// sse1_fp_unop_s - SSE1 unops in scalar form.
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F32Int, OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ let mayLoad = 1 in {
+ def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1,f32mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ }
+}
+
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode FR32:$src))]>;
@@ -3040,49 +3049,115 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
}
-/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
-multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
- def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
+multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
let mayLoad = 1 in {
- def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1,f32mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ }
+}
+
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
+ Requires<[UseSSE1, OptForSize]>;
+ let Constraints = "$src1 = $dst" in {
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rr>;
+ let mayLoad = 1, hasSideEffects = 0 in
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rm>;
}
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {
+let Predicates = [HasAVX] in {
+ def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
+ itins.rr>, VEX;
+ def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
+ itins.rm>, VEX;
+ def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
+ itins.rr>, VEX, VEX_L;
+ def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
+ itins.rm>, VEX, VEX_L;
+}
+
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
}
-/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
-multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
- def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
- itins.rr>, VEX_L;
- def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
- itins.rm>, VEX_L;
-}
-
/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
- Intrinsic V4F32Int, OpndItins itins> {
+ Intrinsic V4F32Int, Intrinsic V8F32Int,
+ OpndItins itins> {
+let Predicates = [HasAVX] in {
+ def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))],
+ itins.rr>, VEX;
+ def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
+ itins.rm>, VEX;
+ def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V8F32Int VR256:$src))],
+ itins.rr>, VEX, VEX_L;
+ def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
+ itins.rm>, VEX, VEX_L;
+}
+
def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
@@ -3093,22 +3168,29 @@ multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
itins.rm>;
}
-/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
-multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
- Intrinsic V4F32Int, OpndItins itins> {
- def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V4F32Int VR256:$src))],
- itins.rr>, VEX_L;
- def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))],
- itins.rm>, VEX_L;
-}
-
/// sse2_fp_unop_s - SSE2 unops in scalar form.
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+ def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ let mayLoad = 1 in {
+ def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1,f64mem:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat("v", OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_LIG;
+ }
+}
+
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
@@ -3125,26 +3207,32 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
}
-/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
-let hasSideEffects = 0 in
-multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
- def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- let mayLoad = 1 in {
- def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- }
-}
-
/// sse2_fp_unop_p - SSE2 unops in vector forms.
multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, OpndItins itins> {
+let Predicates = [HasAVX] in {
+ def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
+ itins.rr>, VEX;
+ def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
+ itins.rm>, VEX;
+ def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
+ itins.rr>, VEX, VEX_L;
+ def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat("v", OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
+ itins.rm>, VEX, VEX_L;
+}
+
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
@@ -3153,82 +3241,24 @@ multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
}
-/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
-multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
- def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
- itins.rr>, VEX_L;
- def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
- itins.rm>, VEX_L;
-}
-
-/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
-multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
- Intrinsic V2F64Int, OpndItins itins> {
- def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int VR128:$src))],
- itins.rr>;
- def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))],
- itins.rm>;
-}
-
-/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
-multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
- Intrinsic V2F64Int, OpndItins itins> {
- def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V2F64Int VR256:$src))],
- itins.rr>, VEX_L;
- def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))],
- itins.rm>, VEX_L;
-}
+// Square root.
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
+ SSE_SQRTS>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
+ SSE_SQRTS>,
+ sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>;
-let Predicates = [HasAVX] in {
- // Square root.
- defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">,
- sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;
-
- defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps,
- SSE_SQRTP>,
- sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd,
- SSE_SQRTP>,
- sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256,
- SSE_SQRTP>,
- sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256,
- SSE_SQRTP>,
- VEX;
-
- // Reciprocal approximations. Note that these typically require refinement
- // in order to obtain suitable precision.
- defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
- defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
- sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256,
- SSE_SQRTP>,
- sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps,
- SSE_SQRTP>, VEX;
-
- defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
- defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, SSE_RCPP>,
- sse1_fp_unop_p_y<0x53, "vrcp", X86frcp, SSE_RCPP>,
- sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256,
- SSE_RCPP>,
- sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps,
- SSE_RCPP>, VEX;
-}
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>,
+ sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
+ int_x86_avx_rsqrt_ps_256, SSE_SQRTP>;
+defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
+ sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
+ sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
+ int_x86_avx_rcp_ps_256, SSE_RCPP>;
def : Pat<(f32 (fsqrt FR32:$src)),
(VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
@@ -3283,59 +3313,11 @@ let Predicates = [HasAVX] in {
(VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
}
-// Square root.
-defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
- SSE_SQRTS>,
- sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>,
- sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps, SSE_SQRTS>,
- sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
- SSE_SQRTS>,
- sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>,
- sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
-
-/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
-multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
- Intrinsic F32Int, OpndItins itins> {
- def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
- // For scalar unary operations, fold a load into the operation
- // only in OptForSize mode. It eliminates an instruction, but it also
- // eliminates a whole-register clobber (the load), so it introduces a
- // partial register update condition.
- def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
- let Constraints = "$src1 = $dst" in {
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rr>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rm>;
- }
-}
-
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
- SSE_SQRTS>,
- sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
- sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
- SSE_SQRTS>;
let Predicates = [UseSSE1] in {
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
(RSQRTSSr_Int VR128:$src, VR128:$src)>;
-}
-
-defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
- SSE_RCPS>,
- sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>,
- sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>;
-let Predicates = [UseSSE1] in {
def : Pat<(int_x86_sse_rcp_ss VR128:$src),
(RCPSSr_Int VR128:$src, VR128:$src)>;
}
@@ -3508,7 +3490,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
}
// For Disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>,
@@ -3571,7 +3553,7 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
// For Disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>;
@@ -3650,6 +3632,24 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
itins.rm>;
}
+multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
+ Intrinsic IntId256, OpndItins itins,
+ bit IsCommutable = 0> {
+let Predicates = [HasAVX] in
+ defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128,
+ VR128, memopv2i64, i128mem, itins,
+ IsCommutable, 0>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+ defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64,
+ i128mem, itins, IsCommutable, 1>;
+
+let Predicates = [HasAVX2] in
+ defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256,
+ VR256, memopv4i64, i256mem, itins,
+ IsCommutable, 0>, VEX_4V, VEX_L;
+}
+
multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, SDNode OpNode,
SDNode OpNode2, RegisterClass RC,
@@ -3679,7 +3679,7 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
[(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
}
-/// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types
+/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType DstVT, ValueType SrcVT, RegisterClass RC,
PatFrag memop_frag, X86MemOperand x86memop,
@@ -3702,249 +3702,75 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
} // ExeDomain = SSEPackedInt
-// 128-bit Integer Arithmetic
+defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 1>;
+defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
+ SSE_INTALUQ_ITINS_P, 1>;
+defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
+ SSE_INTMUL_ITINS_P, 1>;
+defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
+ SSE_INTALUQ_ITINS_P, 0>;
+defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
-let Predicates = [HasAVX] in {
-defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1, 0 /*3addr*/>,
- VEX_4V;
-defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
+// Intrinsic forms
+defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b,
+ int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>;
+defm PSUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
+ int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>;
+defm PADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
+ int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>;
+defm PADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w,
+ int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>;
+defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
+ int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>;
+defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
+ int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>;
+defm PMULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
+ int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>;
+defm PMULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
+ int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>;
+defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
+ int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
+defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
+ int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>;
+defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
+ int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
+defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
+ int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>;
+
+let Predicates = [HasAVX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
VEX_4V;
-
-// Intrinsic forms
-defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd,
- VR128, memopv2i64, i128mem,
- SSE_PMADD, 1, 0>, VEX_4V;
-defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
-defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64,
- i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
- i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
- i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64,
- i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V, VEX_L;
+let Predicates = [HasAVX2] in
defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
VR256, memopv4i64, i256mem,
SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-
-// Intrinsic forms
-defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w,
- VR256, memopv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w,
- VR256, memopv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd,
- VR256, memopv4i64, i256mem,
- SSE_PMADD, 1, 0>, VEX_4V, VEX_L;
-defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
-defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P, 1>;
-defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTMUL_ITINS_P, 1>;
-defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64,
- i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64,
- i128mem, SSE_INTALUQ_ITINS_P>;
+let Constraints = "$src1 = $dst" in
defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
-// Intrinsic forms
-defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1>;
-defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
- VR128, memopv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1>;
-defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
- VR128, memopv2i64, i128mem,
- SSE_PMADD, 1>;
-defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
-
-} // Constraints = "$src1 = $dst"
-
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Logical Instructions
//===---------------------------------------------------------------------===//
@@ -4126,186 +3952,106 @@ let Predicates = [UseSSE2] in {
// SSE2 - Packed Integer Comparison Instructions
//===---------------------------------------------------------------------===//
-let Predicates = [HasAVX] in {
- defm VPCMPEQB : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
- defm VPCMPEQW : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
- defm VPCMPEQD : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
- defm VPCMPGTB : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
- defm VPCMPGTW : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
- defm VPCMPGTD : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
- defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
- defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
- defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
- defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
- defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
- defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
- defm PCMPEQB : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
- defm PCMPEQW : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
- defm PCMPEQD : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 1>;
- defm PCMPGTB : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
- defm PCMPGTW : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
- defm PCMPGTD : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-} // Constraints = "$src1 = $dst"
+defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 1>;
+defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 0>;
+defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 0>;
+defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
+ SSE_INTALU_ITINS_P, 0>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Pack Instructions
//===---------------------------------------------------------------------===//
-let Predicates = [HasAVX] in {
-defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
-defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb,
- VR256, memopv4i64, i256mem,
- SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
-defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
- VR128, memopv2i64, i128mem,
- SSE_INTALU_ITINS_P>;
-} // Constraints = "$src1 = $dst"
+defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
+ int_x86_avx2_packsswb, SSE_INTALU_ITINS_P, 0>;
+defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
+ int_x86_avx2_packssdw, SSE_INTALU_ITINS_P, 0>;
+defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
+ int_x86_avx2_packuswb, SSE_INTALU_ITINS_P, 0>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Shuffle Instructions
//===---------------------------------------------------------------------===//
let ExeDomain = SSEPackedInt in {
-multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, SDNode OpNode> {
-def ri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>;
-def mi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (vt (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))],
- IIC_SSE_PSHUF>;
-}
-
-multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> {
-def Yri : Ii8<0x70, MRMSrcReg,
- (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>;
-def Ymi : Ii8<0x70, MRMSrcMem,
- (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst,
- (vt (OpNode (bitconvert (memopv4i64 addr:$src1)),
- (i8 imm:$src2))))]>;
-}
-} // ExeDomain = SSEPackedInt
-
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
+ SDNode OpNode> {
let Predicates = [HasAVX] in {
- let AddedComplexity = 5 in
- defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX;
-
- // SSE2 with ImmT == Imm8 and XS prefix.
- defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX;
-
- // SSE2 with ImmT == Imm8 and XD prefix.
- defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX;
-
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
- (VPSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (VPSHUFDri VR128:$src1, imm:$imm)>;
+ def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>, VEX;
+ def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX;
}
let Predicates = [HasAVX2] in {
- defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>,
- TB, OpSize, VEX,VEX_L;
- defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>,
- XS, VEX, VEX_L;
- defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>,
- XD, VEX, VEX_L;
+ def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>, VEX, VEX_L;
+ def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
+ (ins i256mem:$src1, i8imm:$src2),
+ !strconcat("v", OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L;
}
let Predicates = [UseSSE2] in {
- let AddedComplexity = 5 in
- defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize;
+ def ri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
+ IIC_SSE_PSHUF>;
+ def mi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>;
+}
+}
+} // ExeDomain = SSEPackedInt
- // SSE2 with ImmT == Imm8 and XS prefix.
- defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS;
+defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, TB, OpSize;
+defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS;
+defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD;
- // SSE2 with ImmT == Imm8 and XD prefix.
- defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD;
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>;
+}
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
- (PSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (PSHUFDri VR128:$src1, imm:$imm)>;
+let Predicates = [UseSSE2] in {
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
}
//===---------------------------------------------------------------------===//
@@ -5844,6 +5590,55 @@ defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
+let Predicates = [HasAVX2] in {
+ def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>;
+
+ def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
+
+ def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))),
+ (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))),
+ (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),
+ (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
+ (VPMOVSXWDYrm addr:$src)>;
+ def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
+ (VPMOVSXDQYrm addr:$src)>;
+
+ def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXBDYrm addr:$src)>;
+ def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXBDYrm addr:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXWQYrm addr:$src)>;
+ def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXWQYrm addr:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBQYrm addr:$src)>;
+}
+
let Predicates = [HasAVX] in {
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
@@ -5858,6 +5653,15 @@ let Predicates = [HasAVX] in {
}
let Predicates = [UseSSE41] in {
+ def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
+
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
(bitconvert (v4i32 (X86vzmovl
@@ -5868,6 +5672,34 @@ let Predicates = [UseSSE41] in {
(bitconvert (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
(PMOVZXBQrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXWDrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXWQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (extloadi32i16 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXDQrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (PMOVSXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (PMOVSXBWrm addr:$src)>;
}
let Predicates = [HasAVX2] in {
@@ -5928,6 +5760,44 @@ let Predicates = [HasAVX] in {
(VPMOVZXDQrm addr:$src)>;
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
(VPMOVZXDQrm addr:$src)>;
+
+ def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXWDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXDQrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))),
+ (VPMOVSXBWrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))),
+ (VPMOVSXBWrm addr:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXBDrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
+ (scalar_to_vector (loadi32 addr:$src))))))),
+ (VPMOVSXWQrm addr:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
+ (scalar_to_vector (extloadi32i16 addr:$src))))))),
+ (VPMOVSXBQrm addr:$src)>;
}
let Predicates = [UseSSE41] in {
@@ -6267,6 +6137,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
Intrinsic F64Int, bit Is2Addr = 1> {
let ExeDomain = GenericDomain in {
// Operation, reg.
+ let hasSideEffects = 0 in
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
!if(Is2Addr,
@@ -6300,6 +6171,7 @@ let ExeDomain = GenericDomain in {
OpSize;
// Operation, reg.
+ let hasSideEffects = 0 in
def SDr : SS4AIi8<opcsd, MRMSrcReg,
(outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
!if(Is2Addr,
@@ -6621,67 +6493,6 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
(bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
-let Predicates = [HasAVX] in {
- let isCommutable = 0 in
- defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
- 0>, VEX_4V;
- defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
- 0>, VEX_4V;
- defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
- 0>, VEX_4V;
- defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
- 0>, VEX_4V;
- defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
- 0>, VEX_4V;
- defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
- 0>, VEX_4V;
- defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
- 0>, VEX_4V;
- defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
- 0>, VEX_4V;
- defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
- 0>, VEX_4V;
- defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
- 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
- let isCommutable = 0 in
- defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
- int_x86_avx2_packusdw>, VEX_4V, VEX_L;
- defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb",
- int_x86_avx2_pmins_b>, VEX_4V, VEX_L;
- defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd",
- int_x86_avx2_pmins_d>, VEX_4V, VEX_L;
- defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud",
- int_x86_avx2_pminu_d>, VEX_4V, VEX_L;
- defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw",
- int_x86_avx2_pminu_w>, VEX_4V, VEX_L;
- defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
- int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L;
- defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
- int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L;
- defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
- int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L;
- defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
- int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L;
- defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
- int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
- let isCommutable = 0 in
- defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
- defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
- defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
- defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
- defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
- defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
- defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
- defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
- defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
- defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
-}
/// SS48I_binop_rm - Simple SSE41 binary operator.
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -6705,6 +6516,76 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
let Predicates = [HasAVX] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+ 0>, VEX_4V;
+ defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
+ memopv2i64, i128mem, 0>, VEX_4V;
+ defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
+ 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
+ int_x86_avx2_packusdw>, VEX_4V, VEX_L;
+ defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
+ memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
+ int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in
+ defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+ defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
+ memopv2i64, i128mem>;
+ defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
+ memopv2i64, i128mem>;
+ defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
+ memopv2i64, i128mem>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+}
+
+let Predicates = [HasAVX] in {
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
memopv2i64, i128mem, 0>, VEX_4V;
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index 893488c159..1185941d34 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -51,6 +51,7 @@ def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
// NOTE: We don't include patterns for shifts of a register by one, because
// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
+let hasSideEffects = 0 in {
def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
"shl{b}\t$dst", [], IIC_SR>;
def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
@@ -59,8 +60,9 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
"shl{l}\t$dst", [], IIC_SR>;
def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
"shl{q}\t$dst", [], IIC_SR>;
+} // hasSideEffects = 0
} // isConvertibleToThreeAddress = 1
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst"
// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
@@ -333,6 +335,7 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
// Rotate instructions
//===----------------------------------------------------------------------===//
+let hasSideEffects = 0 in {
let Constraints = "$src1 = $dst" in {
def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t$dst", [], IIC_SR>;
@@ -455,6 +458,7 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
}
+} // hasSideEffects = 0
let Constraints = "$src1 = $dst" in {
// FIXME: provide shorter instructions when imm8 == 1
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 4b528f6153..066830c845 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -16,7 +16,7 @@
#include "X86Relocations.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"//TODO(dschuff):don't forget to remove these
#include "llvm/Support/Disassembler.h"
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index 7b0f3434d0..f916327378 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -15,7 +15,7 @@
#define X86JITINFO_H
#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
#include "llvm/Target/TargetJITInfo.h"
namespace llvm {
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 9b6d1d4c13..8528002f6d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -17,6 +17,7 @@
#include "X86COFFMachineModuleInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -26,7 +27,6 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Type.h"
using namespace llvm;
namespace {
diff --git a/lib/Target/X86/X86NaClRewriteFinalPass.cpp b/lib/Target/X86/X86NaClRewriteFinalPass.cpp
index e850dcaf9e..0305a336e2 100644
--- a/lib/Target/X86/X86NaClRewriteFinalPass.cpp
+++ b/lib/Target/X86/X86NaClRewriteFinalPass.cpp
@@ -24,7 +24,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
new file mode 100644
index 0000000000..83e75ea994
--- /dev/null
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -0,0 +1,212 @@
+//===-------- X86PadShortFunction.cpp - pad short functions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which will pad short functions to prevent
+// a stall if a function returns before the return address is ready. This
+// is needed for some Intel Atom processors.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+
+#define DEBUG_TYPE "x86-pad-short-functions"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumBBsPadded, "Number of basic blocks padded");
+
+namespace {
+ struct VisitedBBInfo {
+ // HasReturn - Whether the BB contains a return instruction
+ bool HasReturn;
+
+ // Cycles - Number of cycles until return if HasReturn is true, otherwise
+ // number of cycles until end of the BB
+ unsigned int Cycles;
+
+ VisitedBBInfo() : HasReturn(false), Cycles(0) {}
+ VisitedBBInfo(bool HasReturn, unsigned int Cycles)
+ : HasReturn(HasReturn), Cycles(Cycles) {}
+ };
+
+ struct PadShortFunc : public MachineFunctionPass {
+ static char ID;
+ PadShortFunc() : MachineFunctionPass(ID)
+ , Threshold(4), TM(0), TII(0) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "X86 Atom pad short functions";
+ }
+
+ private:
+ void findReturns(MachineBasicBlock *MBB,
+ unsigned int Cycles = 0);
+
+ bool cyclesUntilReturn(MachineBasicBlock *MBB,
+ unsigned int &Cycles);
+
+ void addPadding(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned int NOOPsToAdd);
+
+ const unsigned int Threshold;
+
+ // ReturnBBs - Maps basic blocks that return to the minimum number of
+ // cycles until the return, starting from the entry block.
+ DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs;
+
+ // VisitedBBs - Cache of previously visited BBs.
+ DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs;
+
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ };
+
+ char PadShortFunc::ID = 0;
+}
+
+FunctionPass *llvm::createX86PadShortFunctions() {
+ return new PadShortFunc();
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, inserting
+/// NOOP instructions before early exits.
+bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
+ const AttributeSet &FnAttrs = MF.getFunction()->getAttributes();
+ if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize) ||
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize)) {
+ return false;
+ }
+
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+
+ // Search through basic blocks and mark the ones that have early returns
+ ReturnBBs.clear();
+ VisitedBBs.clear();
+ findReturns(MF.begin());
+
+ bool MadeChange = false;
+
+ MachineBasicBlock *MBB;
+ unsigned int Cycles = 0;
+
+ // Pad the identified basic blocks with NOOPs
+ for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin();
+ I != ReturnBBs.end(); ++I) {
+ MBB = I->first;
+ Cycles = I->second;
+
+ if (Cycles < Threshold) {
+ // BB ends in a return. Skip over any DBG_VALUE instructions
+ // trailing the terminator.
+ assert(MBB->size() > 0 &&
+ "Basic block should contain at least a RET but is empty");
+ MachineBasicBlock::iterator ReturnLoc = --MBB->end();
+
+ while (ReturnLoc->isDebugValue())
+ --ReturnLoc;
+ assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() &&
+ "Basic block does not end with RET");
+
+ addPadding(MBB, ReturnLoc, Threshold - Cycles);
+ NumBBsPadded++;
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// findReturn - Starting at MBB, follow control flow and add all
+/// basic blocks that contain a return to ReturnBBs.
+void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
+ // If this BB has a return, note how many cycles it takes to get there.
+ bool hasReturn = cyclesUntilReturn(MBB, Cycles);
+ if (Cycles >= Threshold)
+ return;
+
+ if (hasReturn) {
+ ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles);
+ return;
+ }
+
+ // Follow branches in BB and look for returns
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin();
+ I != MBB->succ_end(); ++I) {
+ if (*I == MBB)
+ continue;
+ findReturns(*I, Cycles);
+ }
+}
+
+/// cyclesUntilReturn - return true if the MBB has a return instruction,
+/// and return false otherwise.
+/// Cycles will be incremented by the number of cycles taken to reach the
+/// return or the end of the BB, whichever occurs first.
+bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
+ unsigned int &Cycles) {
+ // Return cached result if BB was previously visited
+ DenseMap<MachineBasicBlock*, VisitedBBInfo>::iterator it
+ = VisitedBBs.find(MBB);
+ if (it != VisitedBBs.end()) {
+ VisitedBBInfo BBInfo = it->second;
+ Cycles += BBInfo.Cycles;
+ return BBInfo.HasReturn;
+ }
+
+ unsigned int CyclesToEnd = 0;
+
+ for (MachineBasicBlock::iterator MBBI = MBB->begin();
+ MBBI != MBB->end(); ++MBBI) {
+ MachineInstr *MI = MBBI;
+ // Mark basic blocks with a return instruction. Calls to other
+ // functions do not count because the called function will be padded,
+ // if necessary.
+ if (MI->isReturn() && !MI->isCall()) {
+ VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd);
+ Cycles += CyclesToEnd;
+ return true;
+ }
+
+ CyclesToEnd += TII->getInstrLatency(TM->getInstrItineraryData(), MI);
+ }
+
+ VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
+ Cycles += CyclesToEnd;
+ return false;
+}
+
+/// addPadding - Add the given number of NOOP instructions to the function
+/// just prior to the return at MBBI
+void PadShortFunc::addPadding(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned int NOOPsToAdd) {
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ while (NOOPsToAdd-- > 0) {
+ BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP));
+ BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP));
+ }
+}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 1b16d6d8f7..f32bd664e1 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -28,8 +28,9 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -37,7 +38,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
#define GET_REGINFO_TARGET_DESC
#include "X86GenRegisterInfo.inc"
@@ -61,10 +61,12 @@ extern cl::opt<bool> FlagRestrictR15;
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const TargetInstrInfo &tii)
- : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::RIP : X86::EIP,
+ : X86GenRegisterInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::RIP : X86::EIP),
X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false),
- X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true)),
+ X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true),
+ (tm.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::RIP : X86::EIP)),
TM(tm), TII(tii) {
X86_MC::InitLLVM2SEHRegisterMapping(this);
@@ -441,7 +443,8 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
- F->getFnAttributes().hasAttribute(Attributes::StackAlignment));
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index c14407f9ac..d99d085298 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -470,12 +470,17 @@ def IIC_NOP : InstrItinClass;
// latencies. Since these latencies are not used for pipeline hazards,
// they do not need to be exact.
//
+// ILPWindow=10 is an arbitrary threshold that approximates cycles of
+// latency hidden by instruction buffers. The actual value is not very
+// important but should be zero for inorder and nonzero for OOO processors.
+//
// The GenericModel contains no instruciton itineraries.
def GenericModel : SchedMachineModel {
let IssueWidth = 4;
let MinLatency = 0;
let LoadLatency = 4;
let HighLatency = 10;
+ let ILPWindow = 10;
}
include "X86ScheduleAtom.td"
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 87102614cc..1e5f2d6c9a 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -525,6 +525,7 @@ def AtomModel : SchedMachineModel {
// OperandCycles may be used for expected latency.
let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+ let ILPWindow = 0; // Always try to hide expected latency.
let Itineraries = AtomItineraries;
}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 8c7e09c324..7814ca41cf 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -14,7 +14,7 @@
#define DEBUG_TYPE "x86-selectiondag-info"
#include "X86TargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/DerivedTypes.h"
using namespace llvm;
X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 52175bcd6c..b517019e85 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -14,7 +14,7 @@
#define DEBUG_TYPE "subtarget"
#include "X86Subtarget.h"
#include "X86InstrInfo.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
@@ -358,6 +358,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
, UseLeaForSP(false)
, HasSlowDivide(false)
, PostRAScheduler(false)
+ , PadShortFunctions(false)
, stackAlignment(4)
// FIXME: this is a known good value for Yonah. How about others?
, MaxInlineSizeThreshold(128)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 5e5485ea49..b5e3ec6270 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -15,7 +15,7 @@
#define X86SUBTARGET_H
#include "llvm/ADT/Triple.h"
-#include "llvm/CallingConv.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -146,6 +146,10 @@ protected:
/// PostRAScheduler - True if using post-register-allocation scheduler.
bool PostRAScheduler;
+ /// PadShortFunctions - True if the short functions should be padded to prevent
+ /// a stall when returning too early.
+ bool PadShortFunctions;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -234,6 +238,7 @@ public:
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
bool hasSlowDivide() const { return HasSlowDivide; }
+ bool padShortFunctions() const { return PadShortFunctions; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 38a4fb12ce..c293bce4f5 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -48,10 +48,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
"e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-"
"n8:16:32-S128"),
InstrInfo(*this),
- TSInfo(*this),
TLInfo(*this),
- JITInfo(*this),
- STTI(&TLInfo), VTTI(&TLInfo) {
+ TSInfo(*this),
+ JITInfo(*this) {
}
void X86_64TargetMachine::anchor() { }
@@ -68,10 +67,9 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
"e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
"n8:16:32:64-S128"),
InstrInfo(*this),
- TSInfo(*this),
TLInfo(*this),
- JITInfo(*this),
- STTI(&TLInfo), VTTI(&TLInfo){
+ TSInfo(*this),
+ JITInfo(*this) {
}
/// X86TargetMachine ctor - Create an X86 target.
@@ -126,6 +124,19 @@ X86EarlyIfConv("x86-early-ifcvt",
cl::desc("Enable early if-conversion on X86"));
//===----------------------------------------------------------------------===//
+// X86 Analysis Pass Setup
+//===----------------------------------------------------------------------===//
+
+void X86TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our X86 pass. This
+ // allows the X86 pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createX86TargetTransformInfoPass(this));
+}
+
+
+//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
@@ -196,6 +207,12 @@ bool X86PassConfig::addPreEmitPass() {
ShouldPrint = true;
}
+ if (getOptLevel() != CodeGenOpt::None &&
+ getX86Subtarget().padShortFunctions()) {
+ addPass(createX86PadShortFunctions());
+ ShouldPrint = true;
+ }
+
// @LOCALMOD-START
if (getX86Subtarget().isTargetNaCl()) {
addPass(createX86NaClRewritePass());
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 792f721e76..174d391831 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -21,10 +21,9 @@
#include "X86JITInfo.h"
#include "X86SelectionDAGInfo.h"
#include "X86Subtarget.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -65,6 +64,9 @@ public:
return &InstrItins;
}
+ /// \brief Register X86 analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
+
// Set up the pass pipeline.
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
@@ -78,11 +80,9 @@ class X86_32TargetMachine : public X86TargetMachine {
virtual void anchor();
const DataLayout DL; // Calculates type size & alignment
X86InstrInfo InstrInfo;
- X86SelectionDAGInfo TSInfo;
X86TargetLowering TLInfo;
+ X86SelectionDAGInfo TSInfo;
X86JITInfo JITInfo;
- ScalarTargetTransformImpl STTI;
- X86VectorTargetTransformInfo VTTI;
public:
X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -101,12 +101,6 @@ public:
virtual X86JITInfo *getJITInfo() {
return &JITInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
};
/// X86_64TargetMachine - X86 64-bit target machine.
@@ -115,11 +109,9 @@ class X86_64TargetMachine : public X86TargetMachine {
virtual void anchor();
const DataLayout DL; // Calculates type size & alignment
X86InstrInfo InstrInfo;
- X86SelectionDAGInfo TSInfo;
X86TargetLowering TLInfo;
+ X86SelectionDAGInfo TSInfo;
X86JITInfo JITInfo;
- X86ScalarTargetTransformImpl STTI;
- X86VectorTargetTransformInfo VTTI;
public:
X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -138,12 +130,6 @@ public:
virtual X86JITInfo *getJITInfo() {
return &JITInfo;
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 4a1788a85e..3347449cb7 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -8,17 +8,13 @@
//===----------------------------------------------------------------------===//
#include "X86TargetObjectFile.h"
-#include "X86TargetMachine.h"
#include "X86Subtarget.h" // @LOCALMOD
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Target/Mangler.h"
+
using namespace llvm;
using namespace dwarf;
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
new file mode 100644
index 0000000000..675c896d70
--- /dev/null
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -0,0 +1,383 @@
+//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// X86 target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86tti"
+#include "X86.h"
+#include "X86TargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeX86TTIPass(PassRegistry &);
+}
+
+namespace {
+
+class X86TTI : public ImmutablePass, public TargetTransformInfo {
+ const X86TargetMachine *TM;
+ const X86Subtarget *ST;
+ const X86TargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ X86TTI(const X86TargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializeX86TTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+ virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+ virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const;
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti",
+ "X86 Target Transform Info", true, true, false)
+char X86TTI::ID = 0;
+
+ImmutablePass *
+llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) {
+ return new X86TTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// X86 cost model.
+//
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct X86CostTblEntry {
+ int ISD;
+ MVT Type;
+ unsigned Cost;
+};
+}
+
+static int
+FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) {
+ for (unsigned int i = 0; i < len; ++i)
+ if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty)
+ return i;
+
+ // Could not find an entry.
+ return -1;
+}
+
+namespace {
+struct X86TypeConversionCostTblEntry {
+ int ISD;
+ MVT Dst;
+ MVT Src;
+ unsigned Cost;
+};
+}
+
+static int
+FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
+ int ISD, MVT Dst, MVT Src) {
+ for (unsigned int i = 0; i < len; ++i)
+ if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst)
+ return i;
+
+ // Could not find an entry.
+ return -1;
+}
+
+X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ // TODO: Currently the __builtin_popcount() implementation using SSE3
+ // instructions is inefficient. Once the problem is fixed, we should
+ // call ST->hasSSE3() instead of ST->hasSSE4().
+ return ST->hasSSE41() ? PSK_FastHardware : PSK_Software;
+}
+
+unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
+ if (Vector && !ST->hasSSE1())
+ return 0;
+
+ if (ST->is64Bit())
+ return 16;
+ return 8;
+}
+
+unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasAVX()) return 256;
+ if (ST->hasSSE1()) return 128;
+ return 0;
+ }
+
+ if (ST->is64Bit())
+ return 64;
+ return 32;
+
+}
+
+unsigned X86TTI::getMaximumUnrollFactor() const {
+ if (ST->isAtom())
+ return 1;
+
+ // Sandybridge and Haswell have multiple execution ports and pipelined
+ // vector units.
+ if (ST->hasAVX())
+ return 4;
+
+ return 2;
+}
+
+unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ static const X86CostTblEntry AVX1CostTable[] = {
+ // We don't have to scalarize unsupported ops. We can issue two half-sized
+ // operations and we only need to extract the upper YMM half.
+ // Two ops + 1 extract + 1 insert = 4.
+ { ISD::MUL, MVT::v8i32, 4 },
+ { ISD::SUB, MVT::v8i32, 4 },
+ { ISD::ADD, MVT::v8i32, 4 },
+ { ISD::MUL, MVT::v4i64, 4 },
+ { ISD::SUB, MVT::v4i64, 4 },
+ { ISD::ADD, MVT::v4i64, 4 },
+ };
+
+ // Look for AVX1 lowering tricks.
+ if (ST->hasAVX()) {
+ int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
+ LT.second);
+ if (Idx != -1)
+ return LT.first * AVX1CostTable[Idx].Cost;
+ }
+ // Fallback to the default implementation.
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty);
+}
+
+unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ // We only estimate the cost of reverse shuffles.
+ if (Kind != SK_Reverse)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ unsigned Cost = 1;
+ if (LT.second.getSizeInBits() > 128)
+ Cost = 3; // Extract + insert + copy.
+
+ // Multiple by the number of parts.
+ return Cost * LT.first;
+}
+
+unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ EVT SrcTy = TLI->getValueType(Src);
+ EVT DstTy = TLI->getValueType(Dst);
+
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+
+ static const X86TypeConversionCostTblEntry AVXConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
+ { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
+ };
+
+ if (ST->hasAVX()) {
+ int Idx = FindInConvertTable(AVXConversionTbl,
+ array_lengthof(AVXConversionTbl),
+ ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return AVXConversionTbl[Idx].Cost;
+ }
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+
+ MVT MTy = LT.second;
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ static const X86CostTblEntry SSE42CostTbl[] = {
+ { ISD::SETCC, MVT::v2f64, 1 },
+ { ISD::SETCC, MVT::v4f32, 1 },
+ { ISD::SETCC, MVT::v2i64, 1 },
+ { ISD::SETCC, MVT::v4i32, 1 },
+ { ISD::SETCC, MVT::v8i16, 1 },
+ { ISD::SETCC, MVT::v16i8, 1 },
+ };
+
+ static const X86CostTblEntry AVX1CostTbl[] = {
+ { ISD::SETCC, MVT::v4f64, 1 },
+ { ISD::SETCC, MVT::v8f32, 1 },
+ // AVX1 does not support 8-wide integer compare.
+ { ISD::SETCC, MVT::v4i64, 4 },
+ { ISD::SETCC, MVT::v8i32, 4 },
+ { ISD::SETCC, MVT::v16i16, 4 },
+ { ISD::SETCC, MVT::v32i8, 4 },
+ };
+
+ static const X86CostTblEntry AVX2CostTbl[] = {
+ { ISD::SETCC, MVT::v4i64, 1 },
+ { ISD::SETCC, MVT::v8i32, 1 },
+ { ISD::SETCC, MVT::v16i16, 1 },
+ { ISD::SETCC, MVT::v32i8, 1 },
+ };
+
+ if (ST->hasAVX2()) {
+ int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX2CostTbl[Idx].Cost;
+ }
+
+ if (ST->hasAVX()) {
+ int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX1CostTbl[Idx].Cost;
+ }
+
+ if (ST->hasSSE42()) {
+ int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
+ if (Idx != -1)
+ return LT.first * SSE42CostTbl[Idx].Cost;
+ }
+
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ if (Index != -1U) {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+
+ // This type is legalized to a scalar type.
+ if (!LT.second.isVector())
+ return 0;
+
+ // The type may be split. Normalize the index to the new type.
+ unsigned Width = LT.second.getVectorNumElements();
+ Index = Index % Width;
+
+ // Floating point scalars are already located in index #0.
+ if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
+ return 0;
+ }
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ // Each load/store unit costs 1.
+ unsigned Cost = LT.first * 1;
+
+ // On Sandybridge 256bit load/stores are double pumped
+ // (but not on Haswell).
+ if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
+ Cost*=2;
+
+ return Cost;
+}
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index ca94f03a64..099ad390d2 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -2,6 +2,7 @@ set(LLVM_TARGET_DEFINITIONS XCore.td)
tablegen(LLVM XCoreGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM XCoreGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM XCoreGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM XCoreGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM XCoreGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM XCoreGenCallingConv.inc -gen-callingconv)
@@ -15,6 +16,7 @@ add_llvm_target(XCoreCodeGen
XCoreISelDAGToDAG.cpp
XCoreISelLowering.cpp
XCoreMachineFunctionInfo.cpp
+ XCoreMCInstLower.cpp
XCoreRegisterInfo.cpp
XCoreSubtarget.cpp
XCoreTargetMachine.cpp
@@ -24,5 +26,7 @@ add_llvm_target(XCoreCodeGen
add_dependencies(LLVMXCoreCodeGen intrinsics_gen)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/Disassembler/CMakeLists.txt b/lib/Target/XCore/Disassembler/CMakeLists.txt
new file mode 100644
index 0000000000..cdc5d993b8
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMXCoreDisassembler
+ XCoreDisassembler.cpp
+ )
+
+add_dependencies(LLVMXCoreDisassembler XCoreCommonTableGen)
diff --git a/lib/Target/XCore/Disassembler/LLVMBuild.txt b/lib/Target/XCore/Disassembler/LLVMBuild.txt
new file mode 100644
index 0000000000..028de2cb34
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/XCore/Disassembler/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = XCoreDisassembler
+parent = XCore
+required_libraries = MC Support XCoreInfo
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/Disassembler/Makefile b/lib/Target/XCore/Disassembler/Makefile
new file mode 100644
index 0000000000..4caffdd1da
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/Disassembler/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreDisassembler
+
+# Hack: we need to include 'main' XCore target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
new file mode 100644
index 0000000000..094f18ceee
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -0,0 +1,324 @@
+//===- XCoreDisassembler.cpp - Disassembler for XCore -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file is part of the XCore Disassembler.
+///
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreRegisterInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+
+/// \brief A disassembler class for XCore.
+class XCoreDisassembler : public MCDisassembler {
+ const MCRegisterInfo *RegInfo;
+public:
+ XCoreDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) :
+ MCDisassembler(STI), RegInfo(Info) {}
+
+ /// \brief See MCDisassembler.
+ virtual DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+};
+}
+
+static bool readInstruction16(const MemoryObject &region,
+ uint64_t address,
+ uint64_t &size,
+ uint16_t &insn) {
+ uint8_t Bytes[4];
+
+ // We want to read exactly 2 Bytes of data.
+ if (region.readBytes(address, 2, Bytes, NULL) == -1) {
+ size = 0;
+ return false;
+ }
+ // Encoded as a little-endian 16-bit word in the stream.
+ insn = (Bytes[0] << 0) | (Bytes[1] << 8);
+ return true;
+}
+
+static bool readInstruction32(const MemoryObject &region,
+ uint64_t address,
+ uint64_t &size,
+ uint32_t &insn) {
+ uint8_t Bytes[4];
+
+ // We want to read exactly 4 Bytes of data.
+ if (region.readBytes(address, 4, Bytes, NULL) == -1) {
+ size = 0;
+ return false;
+ }
+ // Encoded as a little-endian 32-bit word in the stream.
+ insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) |
+ (Bytes[3] << 24);
+ return true;
+}
+
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+ const XCoreDisassembler *Dis = static_cast<const XCoreDisassembler*>(D);
+ return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo);
+}
+
+static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus Decode2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeR2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode2RSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRUSInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRUSBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRUSSrcDstBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLR2RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+#include "XCoreGenDisassemblerTables.inc"
+
+static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder)
+{
+ if (RegNo > 11)
+ return MCDisassembler::Fail;
+ unsigned Reg = getReg(Decoder, XCore::GRRegsRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ if (Val > 11)
+ return MCDisassembler::Fail;
+ static unsigned Values[] = {
+ 32 /*bpw*/, 1, 2, 3, 4, 5, 6, 7, 8, 16, 24, 32
+ };
+ Inst.addOperand(MCOperand::CreateImm(Values[Val]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) {
+ unsigned Combined = fieldFromInstruction(Insn, 6, 5) +
+ fieldFromInstruction(Insn, 5, 1) * 5 - 27;
+ if (Combined >= 9)
+ return MCDisassembler::Fail;
+
+ unsigned Op1High = Combined % 3;
+ unsigned Op2High = Combined / 3;
+ Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 2, 2);
+ Op2 = (Op2High << 2) | fieldFromInstruction(Insn, 0, 2);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op2, Op1);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(Op2));
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeBitpOperand(Inst, Op2, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeBitpOperand(Inst, Op2, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
+ Op1, Op2);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
+ Op1, Op2);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ }
+ return S;
+}
+
+MCDisassembler::DecodeStatus
+XCoreDisassembler::getInstruction(MCInst &instr,
+ uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const {
+ uint16_t insn16;
+
+ if (!readInstruction16(Region, Address, Size, insn16)) {
+ return Fail;
+ }
+
+ // Calling the auto-generated decoder function.
+ DecodeStatus Result = decodeInstruction(DecoderTable16, instr, insn16,
+ Address, this, STI);
+ if (Result != Fail) {
+ Size = 2;
+ return Result;
+ }
+
+ uint32_t insn32;
+
+ if (!readInstruction32(Region, Address, Size, insn32)) {
+ return Fail;
+ }
+
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTable32, instr, insn32, Address, this, STI);
+ if (Result != Fail) {
+ Size = 4;
+ return Result;
+ }
+
+ return Fail;
+}
+
+namespace llvm {
+ extern Target TheXCoreTarget;
+}
+
+static MCDisassembler *createXCoreDisassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new XCoreDisassembler(STI, T.createMCRegInfo(""));
+}
+
+extern "C" void LLVMInitializeXCoreDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheXCoreTarget,
+ createXCoreDisassembler);
+}
diff --git a/lib/Target/XCore/InstPrinter/CMakeLists.txt b/lib/Target/XCore/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000000..930e733cd7
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMXCoreAsmPrinter
+ XCoreInstPrinter.cpp
+ )
+
+add_dependencies(LLVMXCoreAsmPrinter XCoreCommonTableGen)
diff --git a/lib/Target/XCore/InstPrinter/LLVMBuild.txt b/lib/Target/XCore/InstPrinter/LLVMBuild.txt
new file mode 100644
index 0000000000..8750bc7ace
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/XCore/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = XCoreAsmPrinter
+parent = XCore
+required_libraries = MC Support
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/InstPrinter/Makefile b/lib/Target/XCore/InstPrinter/Makefile
new file mode 100644
index 0000000000..1c1c61299c
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/AsmPrinter/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreAsmPrinter
+
+# Hack: we need to include 'main' xcore target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
new file mode 100644
index 0000000000..1592351c38
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -0,0 +1,97 @@
+//===-- XCoreInstPrinter.cpp - Convert XCore MCInst to assembly syntax ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an XCore MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCoreInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#include "XCoreGenAsmWriter.inc"
+
+void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << StringRef(getRegisterName(RegNo)).lower();
+}
+
+void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void XCoreInstPrinter::
+printInlineJT(const MCInst *MI, int opNum, raw_ostream &O) {
+ report_fatal_error("can't handle InlineJT");
+}
+
+void XCoreInstPrinter::
+printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O) {
+ report_fatal_error("can't handle InlineJT32");
+}
+
+static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
+ int Offset = 0;
+ const MCSymbolRefExpr *SRE;
+
+ if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+ SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+ assert(SRE && CE && "Binary expression must be sym+const.");
+ Offset = CE->getValue();
+ } else {
+ SRE = dyn_cast<MCSymbolRefExpr>(Expr);
+ assert(SRE && "Unexpected MCExpr type.");
+ }
+ assert(SRE->getKind() == MCSymbolRefExpr::VK_None);
+
+ OS << SRE->getSymbol();
+
+ if (Offset) {
+ if (Offset > 0)
+ OS << '+';
+ OS << Offset;
+ }
+}
+
+void XCoreInstPrinter::
+printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ printRegName(O, Op.getReg());
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ printExpr(Op.getExpr(), O);
+}
+
+void XCoreInstPrinter::
+printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+ printOperand(MI, opNum, O);
+
+ if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+ return;
+
+ O << "+";
+ printOperand(MI, opNum+1, O);
+}
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
new file mode 100644
index 0000000000..772c515b5c
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -0,0 +1,44 @@
+//== XCoreInstPrinter.h - Convert XCore MCInst to assembly syntax -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the XCoreInstPrinter class,
+/// which is used to print XCore MCInst to a .s file.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREINSTPRINTER_H
+#define XCOREINSTPRINTER_H
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class TargetMachine;
+
+class XCoreInstPrinter : public MCInstPrinter {
+public:
+ XCoreInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+private:
+ void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
+ void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt
index 53b4a9e3f5..59e64ad085 100644
--- a/lib/Target/XCore/LLVMBuild.txt
+++ b/lib/Target/XCore/LLVMBuild.txt
@@ -16,13 +16,14 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = MCTargetDesc TargetInfo
+subdirectories = Disassembler InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
name = XCore
parent = Target
has_asmprinter = 1
+has_disassembler = 1
[component_1]
type = Library
diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
index a80c939b43..8213f9e428 100644
--- a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = XCoreDesc
parent = XCore
-required_libraries = MC XCoreInfo
+required_libraries = MC XCoreAsmPrinter XCoreInfo
add_to_library_groups = XCore
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index bbfdd4356f..b5b072dcbd 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "XCoreMCTargetDesc.h"
+#include "InstPrinter/XCoreInstPrinter.h"
#include "XCoreMCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -69,6 +70,15 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
+static MCInstPrinter *createXCoreMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new XCoreInstPrinter(MAI, MII, MRI);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeXCoreTargetMC() {
// Register the MC asm info.
@@ -87,4 +97,8 @@ extern "C" void LLVMInitializeXCoreTargetMC() {
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
createXCoreMCSubtargetInfo);
+
+ // Register the MCInstPrinter
+ TargetRegistry::RegisterMCInstPrinter(TheXCoreTarget,
+ createXCoreMCInstPrinter);
}
diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile
index b823c4ed37..92ddc88608 100644
--- a/lib/Target/XCore/Makefile
+++ b/lib/Target/XCore/Makefile
@@ -14,10 +14,10 @@ TARGET = XCore
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = XCoreGenRegisterInfo.inc XCoreGenInstrInfo.inc \
XCoreGenAsmWriter.inc \
- XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
- XCoreGenSubtargetInfo.inc
+ XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
+ XCoreGenDisassemblerTables.inc XCoreGenSubtargetInfo.inc
-DIRS = TargetInfo MCTargetDesc
+DIRS = Disassembler InstPrinter TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index 9a0971d1e4..00e34e04fb 100644
--- a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "XCore.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td
index 04a1dd5e95..e9a6d88fd6 100644
--- a/lib/Target/XCore/XCore.td
+++ b/lib/Target/XCore/XCore.td
@@ -41,7 +41,13 @@ def : Proc<"xs1b-generic", []>;
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
+def XCoreAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
def XCore : Target {
// Pull in Instruction Info:
let InstructionSet = XCoreInstrInfo;
+ let AssemblyWriters = [XCoreAsmWriter];
}
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 6760641efe..0d146ba4d9 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -14,7 +14,9 @@
#define DEBUG_TYPE "asm-printer"
#include "XCore.h"
+#include "InstPrinter/XCoreInstPrinter.h"
#include "XCoreInstrInfo.h"
+#include "XCoreMCInstLower.h"
#include "XCoreSubtarget.h"
#include "XCoreTargetMachine.h"
#include "llvm/ADT/SmallString.h"
@@ -25,14 +27,15 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -52,16 +55,17 @@ static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
namespace {
class XCoreAsmPrinter : public AsmPrinter {
const XCoreSubtarget &Subtarget;
+ XCoreMCInstLower MCInstLowering;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
public:
explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){}
+ : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()),
+ MCInstLowering(*this) {}
virtual const char *getPassName() const {
return "XCore Assembly Printer";
}
- void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
const std::string &directive = ".jmptable");
void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) {
@@ -75,18 +79,14 @@ namespace {
void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
virtual void EmitGlobalVariable(const GlobalVariable *GV);
- void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
- static const char *getRegisterName(unsigned RegNo);
-
void EmitFunctionEntryLabel();
void EmitInstruction(const MachineInstr *MI);
+ void EmitFunctionBodyStart();
void EmitFunctionBodyEnd();
virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
};
} // end of anonymous namespace
-#include "XCoreGenAsmWriter.inc"
-
void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
assert(((GV->hasExternalLinkage() ||
GV->hasWeakLinkage()) ||
@@ -171,12 +171,16 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// The ABI requires that unsigned scalar types smaller than 32 bits
// are padded to 32 bits.
if (Size < 4)
- OutStreamer.EmitZeros(4 - Size, 0);
+ OutStreamer.EmitZeros(4 - Size);
// Mark the end of the global
OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data");
}
+void XCoreAsmPrinter::EmitFunctionBodyStart() {
+ MCInstLowering.Initialize(Mang, &MF->getContext());
+}
+
/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
/// the last basic block in the function.
void XCoreAsmPrinter::EmitFunctionBodyEnd() {
@@ -192,17 +196,6 @@ void XCoreAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.EmitLabel(CurrentFnSym);
}
-void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O) {
- printOperand(MI, opNum, O);
-
- if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
- return;
-
- O << "+";
- printOperand(MI, opNum+1, O);
-}
-
void XCoreAsmPrinter::
printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
const std::string &directive) {
@@ -225,7 +218,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
case MachineOperand::MO_Register:
- O << getRegisterName(MO.getReg());
+ O << XCoreInstPrinter::getRegisterName(MO.getReg());
break;
case MachineOperand::MO_Immediate:
O << MO.getImm();
@@ -270,7 +263,7 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
}
-printOperand(MI, OpNo, O);
+ printOperand(MI, OpNo, O);
return false;
}
@@ -317,15 +310,30 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case XCore::ADD_2rus:
if (MI->getOperand(2).getImm() == 0) {
- O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
- << getRegisterName(MI->getOperand(1).getReg());
+ O << "\tmov "
+ << XCoreInstPrinter::getRegisterName(MI->getOperand(0).getReg()) << ", "
+ << XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg());
OutStreamer.EmitRawText(O.str());
return;
}
break;
+ case XCore::BR_JT:
+ case XCore::BR_JT32:
+ O << "\tbru "
+ << XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg()) << '\n';
+ if (MI->getOpcode() == XCore::BR_JT)
+ printInlineJT(MI, 0, O);
+ else
+ printInlineJT32(MI, 0, O);
+ O << '\n';
+ OutStreamer.EmitRawText(O.str());
+ return;
}
- printInstruction(MI, O);
- OutStreamer.EmitRawText(O.str());
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+
+ OutStreamer.EmitInstruction(TmpInst);
}
// Force static initialization.
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 9257226f80..bb9c77a32f 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -22,8 +22,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Function.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetOptions.h"
@@ -100,11 +100,8 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
bool FP = hasFP(MF);
const AttributeSet &PAL = MF.getFunction()->getAttributes();
- for (unsigned I = 0, E = PAL.getNumAttrs(); I != E; ++I)
- if (PAL.getAttributesAtIndex(I).hasAttribute(Attributes::Nest)) {
- loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
- break;
- }
+ if (PAL.hasAttrSomewhere(Attribute::Nest))
+ loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
// Work out frame sizes.
int FrameSize = MFI->getStackSize();
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 459664bf58..472ce6305c 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -13,18 +13,18 @@
#include "XCore.h"
#include "XCoreTargetMachine.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 391660a095..6e894acedb 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -19,7 +19,6 @@
#include "XCoreSubtarget.h"
#include "XCoreTargetMachine.h"
#include "XCoreTargetObjectFile.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -28,11 +27,12 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td
index 1963a70fb3..44ac45c72f 100644
--- a/lib/Target/XCore/XCoreInstrFormats.td
+++ b/lib/Target/XCore/XCoreInstrFormats.td
@@ -10,7 +10,7 @@
//===----------------------------------------------------------------------===//
// Instruction format superclass
//===----------------------------------------------------------------------===//
-class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+class InstXCore<int sz, dag outs, dag ins, string asmstr, list<dag> pattern>
: Instruction {
field bits<32> Inst;
@@ -19,102 +19,143 @@ class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
dag InOperandList = ins;
let AsmString = asmstr;
let Pattern = pattern;
+ let Size = sz;
+ field bits<32> SoftFail = 0;
}
// XCore pseudo instructions format
class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern>;
+ : InstXCore<0, outs, ins, asmstr, pattern> {
+ let isPseudo = 1;
+}
//===----------------------------------------------------------------------===//
// Instruction formats
//===----------------------------------------------------------------------===//
class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<2, outs, ins, asmstr, pattern> {
}
class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<4, outs, ins, asmstr, pattern> {
}
class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<2, outs, ins, asmstr, pattern> {
}
class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<4, outs, ins, asmstr, pattern> {
}
class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<2, outs, ins, asmstr, pattern> {
}
class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<4, outs, ins, asmstr, pattern> {
}
class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<2, outs, ins, asmstr, pattern> {
}
class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<4, outs, ins, asmstr, pattern> {
}
class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<2, outs, ins, asmstr, pattern> {
}
class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+}
+
+class _F2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc{5-1};
+ let Inst{4} = opc{0};
+ let DecoderMethod = "Decode2RInstruction";
+}
+
+// 2R with first operand as both a source and a destination.
+class _F2RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : _F2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode2RSrcDstInstruction";
+}
+
+// Same as 2R with last two operands swapped
+class _FR2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _F2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeR2RInstruction";
}
-class _F2R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _FRUS<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc{5-1};
+ let Inst{4} = opc{0};
+ let DecoderMethod = "DecodeRUSInstruction";
}
-class _FRUS<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+// RUS with bitp operand
+class _FRUSBitp<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FRUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeRUSBitpInstruction";
}
-class _FL2R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+// RUS with first operand as both a source and a destination and a bitp second
+// operand
+class _FRUSSrcDstBitp<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FRUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeRUSSrcDstBitpInstruction";
}
-class _F1R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _FL2R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{9-5};
+ let Inst{26-20} = 0b1111110;
+ let Inst{19-16} = opc{4-1};
+
+ let Inst{15-11} = 0b11111;
+ let Inst{4} = opc{0};
+ let DecoderMethod = "DecodeL2RInstruction";
+}
+
+// Same as L2R with last two operands swapped
+class _FLR2R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _FL2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeLR2RInstruction";
+}
+
+class _F1R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<4> a;
+
+ let Inst{15-11} = opc{5-1};
+ let Inst{10-5} = 0b111111;
+ let Inst{4} = opc{0};
+ let Inst{3-0} = a;
}
-class _F0R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+class _F0R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc{9-5};
+ let Inst{10-5} = 0b111111;
+ let Inst{4-0} = opc{4-0};
}
class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<4, outs, ins, asmstr, pattern> {
}
class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<4, outs, ins, asmstr, pattern> {
}
class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstXCore<outs, ins, asmstr, pattern> {
- let Inst{31-0} = 0;
+ : InstXCore<4, outs, ins, asmstr, pattern> {
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 3e7666bdb9..95b076fdb4 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -344,10 +344,9 @@ multiclass FU10_LU10_np<string OpcStr> {
// Two operand short
-class F2R_np<string OpcStr> : _F2R<
- (outs GRRegs:$dst), (ins GRRegs:$b),
- !strconcat(OpcStr, " $dst, $b"),
- []>;
+class F2R_np<bits<6> opc, string OpcStr> :
+ _F2R<opc, (outs GRRegs:$dst), (ins GRRegs:$b),
+ !strconcat(OpcStr, " $dst, $b"), []>;
// Two operand long
@@ -357,23 +356,23 @@ class F2R_np<string OpcStr> : _F2R<
let Defs = [SP], Uses = [SP] in {
def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt),
- "${:comment} ADJCALLSTACKDOWN $amt",
+ "# ADJCALLSTACKDOWN $amt",
[(callseq_start timm:$amt)]>;
def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- "${:comment} ADJCALLSTACKUP $amt1",
+ "# ADJCALLSTACKUP $amt1",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
- "${:comment} LDWFI $dst, $addr",
+ "# LDWFI $dst, $addr",
[(set GRRegs:$dst, (load ADDRspii:$addr))]>;
def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
- "${:comment} LDAWFI $dst, $addr",
+ "# LDAWFI $dst, $addr",
[(set GRRegs:$dst, ADDRspii:$addr)]>;
def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
- "${:comment} STWFI $src, $addr",
+ "# STWFI $src, $addr",
[(store GRRegs:$src, ADDRspii:$addr)]>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
@@ -381,7 +380,7 @@ def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
let usesCustomInserter = 1 in {
def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst),
(ins GRRegs:$cond, GRRegs:$T, GRRegs:$F),
- "${:comment} SELECT_CC PSEUDO!",
+ "# SELECT_CC PSEUDO!",
[(set GRRegs:$dst,
(select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
}
@@ -753,210 +752,210 @@ def BL_lu10 : _FLU10<
// Two operand short
// TODO eet, eef, tsetmr
-def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
- "not $dst, $b",
- [(set GRRegs:$dst, (not GRRegs:$b))]>;
+def NOT : _F2R<0b100010, (outs GRRegs:$dst), (ins GRRegs:$b),
+ "not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>;
-def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
- "neg $dst, $b",
- [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
+def NEG : _F2R<0b100100, (outs GRRegs:$dst), (ins GRRegs:$b),
+ "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
let Constraints = "$src1 = $dst" in {
-def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
- "sext $dst, $src2",
- [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
- immBitp:$src2))]>;
-
-def SEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
- "sext $dst, $src2",
- [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
- GRRegs:$src2))]>;
-
-def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
- "zext $dst, $src2",
- [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
- immBitp:$src2))]>;
-
-def ZEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
- "zext $dst, $src2",
- [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
- GRRegs:$src2))]>;
-
-def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
- "andnot $dst, $src2",
- [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
+def SEXT_rus :
+ _FRUSSrcDstBitp<0b001101, (outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def SEXT_2r :
+ _F2RSrcDst<0b001100, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "sext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, GRRegs:$src2))]>;
+
+def ZEXT_rus :
+ _FRUSSrcDstBitp<0b010001, (outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
+ immBitp:$src2))]>;
+
+def ZEXT_2r :
+ _F2RSrcDst<0b010000, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "zext $dst, $src2",
+ [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, GRRegs:$src2))]>;
+
+def ANDNOT_2r :
+ _F2RSrcDst<0b001010, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "andnot $dst, $src2",
+ [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
}
let isReMaterializable = 1, neverHasSideEffects = 1 in
-def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size),
- "mkmsk $dst, $size",
- []>;
+def MKMSK_rus : _FRUSBitp<0b101001, (outs GRRegs:$dst), (ins i32imm:$size),
+ "mkmsk $dst, $size", []>;
-def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
- "mkmsk $dst, $size",
- [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>;
+def MKMSK_2r : _F2R<0b101000, (outs GRRegs:$dst), (ins GRRegs:$size),
+ "mkmsk $dst, $size",
+ [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>;
-def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
- "getr $dst, $type",
- [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
+def GETR_rus : _FRUS<0b100000, (outs GRRegs:$dst), (ins i32imm:$type),
+ "getr $dst, $type",
+ [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
-def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
- "getts $dst, res[$r]",
- [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
+def GETTS_2r : _F2R<0b001110, (outs GRRegs:$dst), (ins GRRegs:$r),
+ "getts $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
-def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "setpt res[$r], $val",
- [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
+def SETPT_2r : _FR2R<0b001111, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "setpt res[$r], $val",
+ [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
-def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "outct res[$r], $val",
- [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
+def OUTCT_2r : _F2R<0b010010, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
-def OUTCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
- "outct res[$r], $val",
- [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
+def OUTCT_rus : _FRUS<0b010011, (outs), (ins GRRegs:$r, i32imm:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
-def OUTT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "outt res[$r], $val",
- [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
+def OUTT_2r : _FR2R<0b000011, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "outt res[$r], $val",
+ [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
-def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "out res[$r], $val",
- [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
+def OUT_2r : _FR2R<0b101010, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "out res[$r], $val",
+ [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
let Constraints = "$src = $dst" in
-def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
- "outshr res[$r], $src",
- [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r,
- GRRegs:$src))]>;
+def OUTSHR_2r :
+ _F2RSrcDst<0b101011, (outs GRRegs:$dst), (ins GRRegs:$src, GRRegs:$r),
+ "outshr res[$r], $src",
+ [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>;
-def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
- "inct $dst, res[$r]",
- [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
+def INCT_2r : _F2R<0b100001, (outs GRRegs:$dst), (ins GRRegs:$r),
+ "inct $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
-def INT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
- "int $dst, res[$r]",
- [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
+def INT_2r : _F2R<0b100011, (outs GRRegs:$dst), (ins GRRegs:$r),
+ "int $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
-def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+def IN_2r : _F2R<0b101100, (outs GRRegs:$dst), (ins GRRegs:$r),
"in $dst, res[$r]",
[(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
let Constraints = "$src = $dst" in
-def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
- "inshr $dst, res[$r]",
- [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r,
- GRRegs:$src))]>;
+def INSHR_2r :
+ _F2RSrcDst<0b101101, (outs GRRegs:$dst), (ins GRRegs:$src, GRRegs:$r),
+ "inshr $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>;
-def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "chkct res[$r], $val",
- [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
+def CHKCT_2r : _F2R<0b110010, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
-def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
- "chkct res[$r], $val",
- [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+def CHKCT_rus : _FRUSBitp<0b110011, (outs), (ins GRRegs:$r, i32imm:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
-def TESTCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+def TESTCT_2r : _F2R<0b101111, (outs GRRegs:$dst), (ins GRRegs:$src),
"testct $dst, res[$src]",
[(set GRRegs:$dst, (int_xcore_testct GRRegs:$src))]>;
-def TESTWCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+def TESTWCT_2r : _F2R<0b110001, (outs GRRegs:$dst), (ins GRRegs:$src),
"testwct $dst, res[$src]",
[(set GRRegs:$dst, (int_xcore_testwct GRRegs:$src))]>;
-def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "setd res[$r], $val",
- [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
+def SETD_2r : _FR2R<0b000101, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "setd res[$r], $val",
+ [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
-def GETST_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+def SETPSC_l2r : _FR2R<0b110000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setpsc res[$src1], $src2",
+ [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+
+def GETST_2r : _F2R<0b000001, (outs GRRegs:$dst), (ins GRRegs:$r),
"getst $dst, res[$r]",
[(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>;
-def INITSP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITSP_2r : _F2R<0b000100, (outs), (ins GRRegs:$src, GRRegs:$t),
"init t[$t]:sp, $src",
[(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>;
-def INITPC_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITPC_2r : _F2R<0b000000, (outs), (ins GRRegs:$src, GRRegs:$t),
"init t[$t]:pc, $src",
[(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>;
-def INITCP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITCP_2r : _F2R<0b000110, (outs), (ins GRRegs:$src, GRRegs:$t),
"init t[$t]:cp, $src",
[(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>;
-def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITDP_2r : _F2R<0b000010, (outs), (ins GRRegs:$src, GRRegs:$t),
"init t[$t]:dp, $src",
[(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>;
+def PEEK_2r : _F2R<0b101110, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "peek $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>;
+
+def ENDIN_2r : _F2R<0b100101, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "endin $dst, res[$src]",
+ [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>;
+
// Two operand long
// getd, testlcl
-def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "bitrev $dst, $src",
- [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+def BITREV_l2r : _FL2R<0b0000011000, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "bitrev $dst, $src",
+ [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
-def BYTEREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "byterev $dst, $src",
- [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
+def BYTEREV_l2r : _FL2R<0b0000011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "byterev $dst, $src",
+ [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
-def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "clz $dst, $src",
- [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+def CLZ_l2r : _FL2R<0b000111000, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "clz $dst, $src",
+ [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
-def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "setc res[$r], $val",
- [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
+def SETC_l2r : _FL2R<0b0010111001, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
-def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
- "settw res[$r], $val",
- [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
+def SETTW_l2r : _FLR2R<0b0010011001, (outs), (ins GRRegs:$r, GRRegs:$val),
+ "settw res[$r], $val",
+ [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
-def GETPS_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "get $dst, ps[$src]",
- [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
+def GETPS_l2r : _FL2R<0b0001011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "get $dst, ps[$src]",
+ [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
-def SETPS_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
- "set ps[$src1], $src2",
- [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
+def SETPS_l2r : _FLR2R<0b0001111000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "set ps[$src1], $src2",
+ [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
-def INITLR_l2r : _FL2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITLR_l2r : _FL2R<0b0001011000, (outs), (ins GRRegs:$src, GRRegs:$t),
"init t[$t]:lr, $src",
[(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>;
-def SETCLK_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
- "setclk res[$src1], $src2",
- [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
-
-def SETRDY_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
- "setrdy res[$src1], $src2",
- [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
-
-def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
- "setpsc res[$src1], $src2",
- [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
-
-def PEEK_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "peek $dst, res[$src]",
- [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>;
+def SETCLK_l2r : _FLR2R<0b0000111001, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setclk res[$src1], $src2",
+ [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
-def ENDIN_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
- "endin $dst, res[$src]",
- [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>;
+def SETRDY_l2r : _FLR2R<0b0010111000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setrdy res[$src1], $src2",
+ [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
// One operand short
// TODO edu, eeu, waitet, waitef, tstart, clrtp
// setdp, setcp, setev, kcall
// dgetreg
-def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i),
- "msync res[$i]",
- [(int_xcore_msync GRRegs:$i)]>;
-def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i),
- "mjoin res[$i]",
- [(int_xcore_mjoin GRRegs:$i)]>;
+def MSYNC_1r : _F1R<0b000111, (outs), (ins GRRegs:$a),
+ "msync res[$a]",
+ [(int_xcore_msync GRRegs:$a)]>;
+def MJOIN_1r : _F1R<0b000101, (outs), (ins GRRegs:$a),
+ "mjoin res[$a]",
+ [(int_xcore_mjoin GRRegs:$a)]>;
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
-def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
- "bau $addr",
- [(brind GRRegs:$addr)]>;
+def BAU_1r : _F1R<0b001001, (outs), (ins GRRegs:$a),
+ "bau $a",
+ [(brind GRRegs:$a)]>;
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i),
@@ -969,80 +968,80 @@ def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
[(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
let Defs=[SP], neverHasSideEffects=1 in
-def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
- "set sp, $src",
+def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a),
+ "set sp, $a",
[]>;
let hasCtrlDep = 1 in
-def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
- "ecallt $src",
+def ECALLT_1r : _F1R<0b010011, (outs), (ins GRRegs:$a),
+ "ecallt $a",
[]>;
let hasCtrlDep = 1 in
-def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
- "ecallf $src",
+def ECALLF_1r : _F1R<0b010010, (outs), (ins GRRegs:$a),
+ "ecallf $a",
[]>;
let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
-def BLA_1r : _F1R<(outs), (ins GRRegs:$addr),
- "bla $addr",
- [(XCoreBranchLink GRRegs:$addr)]>;
+def BLA_1r : _F1R<0b001000, (outs), (ins GRRegs:$a),
+ "bla $a",
+ [(XCoreBranchLink GRRegs:$a)]>;
}
-def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r),
- "syncr res[$r]",
- [(int_xcore_syncr GRRegs:$r)]>;
+def SYNCR_1r : _F1R<0b100001, (outs), (ins GRRegs:$a),
+ "syncr res[$a]",
+ [(int_xcore_syncr GRRegs:$a)]>;
-def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
- "freer res[$r]",
- [(int_xcore_freer GRRegs:$r)]>;
+def FREER_1r : _F1R<0b000100, (outs), (ins GRRegs:$a),
+ "freer res[$a]",
+ [(int_xcore_freer GRRegs:$a)]>;
let Uses=[R11] in {
-def SETV_1r : _F1R<(outs), (ins GRRegs:$r),
- "setv res[$r], r11",
- [(int_xcore_setv GRRegs:$r, R11)]>;
+def SETV_1r : _F1R<0b010001, (outs), (ins GRRegs:$a),
+ "setv res[$a], r11",
+ [(int_xcore_setv GRRegs:$a, R11)]>;
-def SETEV_1r : _F1R<(outs), (ins GRRegs:$r),
- "setev res[$r], r11",
- [(int_xcore_setev GRRegs:$r, R11)]>;
+def SETEV_1r : _F1R<0b001111, (outs), (ins GRRegs:$a),
+ "setev res[$a], r11",
+ [(int_xcore_setev GRRegs:$a, R11)]>;
}
-def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
- "eeu res[$r]",
- [(int_xcore_eeu GRRegs:$r)]>;
+def EEU_1r : _F1R<0b000001, (outs), (ins GRRegs:$a),
+ "eeu res[$a]",
+ [(int_xcore_eeu GRRegs:$a)]>;
// Zero operand short
// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
// stet, getkep, getksp, setkep, getid, kret, dcall, dret,
// dentsp, drestsp
-def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>;
+def CLRE_0R : _F0R<0b0000001101, (outs), (ins), "clre", [(int_xcore_clre)]>;
let Defs = [R11] in {
-def GETID_0R : _F0R<(outs), (ins),
+def GETID_0R : _F0R<0b0001001110, (outs), (ins),
"get r11, id",
[(set R11, (int_xcore_getid))]>;
-def GETED_0R : _F0R<(outs), (ins),
+def GETED_0R : _F0R<0b0000111110, (outs), (ins),
"get r11, ed",
[(set R11, (int_xcore_geted))]>;
-def GETET_0R : _F0R<(outs), (ins),
+def GETET_0R : _F0R<0b0000111111, (outs), (ins),
"get r11, et",
[(set R11, (int_xcore_getet))]>;
}
-def SSYNC_0r : _F0R<(outs), (ins),
+def SSYNC_0r : _F0R<0b0000001110, (outs), (ins),
"ssync",
[(int_xcore_ssync)]>;
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
hasSideEffects = 1 in
-def WAITEU_0R : _F0R<(outs), (ins),
- "waiteu",
- [(brind (int_xcore_waitevent))]>;
+def WAITEU_0R : _F0R<0b0000001100, (outs), (ins),
+ "waiteu",
+ [(brind (int_xcore_waitevent))]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/lib/Target/XCore/XCoreMCInstLower.cpp b/lib/Target/XCore/XCoreMCInstLower.cpp
new file mode 100644
index 0000000000..f96eda9fcb
--- /dev/null
+++ b/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -0,0 +1,117 @@
+//===-- XCoreMCInstLower.cpp - Convert XCore MachineInstr to MCInst -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains code to lower XCore MachineInstrs to their
+/// corresponding MCInst records.
+///
+//===----------------------------------------------------------------------===//
+#include "XCoreMCInstLower.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+XCoreMCInstLower::XCoreMCInstLower(class AsmPrinter &asmprinter)
+: Printer(asmprinter) {}
+
+void XCoreMCInstLower::Initialize(Mangler *M, MCContext *C) {
+ Mang = M;
+ Ctx = C;
+}
+
+MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy,
+ unsigned Offset) const {
+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+ const MCSymbol *Symbol;
+
+ switch (MOTy) {
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = Mang->getSymbol(MO.getGlobal());
+ Offset += MO.getOffset();
+ break;
+ case MachineOperand::MO_BlockAddress:
+ Symbol = Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+ Offset += MO.getOffset();
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+ Offset += MO.getOffset();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = Printer.GetJTISymbol(MO.getIndex());
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = Printer.GetCPISymbol(MO.getIndex());
+ Offset += MO.getOffset();
+ break;
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+
+ const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx);
+
+ if (!Offset)
+ return MCOperand::CreateExpr(MCSym);
+
+ // Assume offset is never negative.
+ assert(Offset > 0);
+
+ const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx);
+ const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
+ return MCOperand::CreateExpr(Add);
+}
+
+MCOperand XCoreMCInstLower::LowerOperand(const MachineOperand &MO,
+ unsigned offset) const {
+ MachineOperandType MOTy = MO.getType();
+
+ switch (MOTy) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) break;
+ return MCOperand::CreateReg(MO.getReg());
+ case MachineOperand::MO_Immediate:
+ return MCOperand::CreateImm(MO.getImm() + offset);
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return LowerSymbolOperand(MO, MOTy, offset);
+ case MachineOperand::MO_RegisterMask:
+ break;
+ }
+
+ return MCOperand();
+}
+
+void XCoreMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp = LowerOperand(MO);
+
+ if (MCOp.isValid())
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/lib/Target/XCore/XCoreMCInstLower.h b/lib/Target/XCore/XCoreMCInstLower.h
new file mode 100644
index 0000000000..28e702bb98
--- /dev/null
+++ b/lib/Target/XCore/XCoreMCInstLower.h
@@ -0,0 +1,42 @@
+//===-- XCoreMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMCINSTLOWER_H
+#define XCOREMCINSTLOWER_H
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+ class MCContext;
+ class MCInst;
+ class MCOperand;
+ class MachineInstr;
+ class MachineFunction;
+ class Mangler;
+ class AsmPrinter;
+
+/// \brief This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY XCoreMCInstLower {
+ typedef MachineOperand::MachineOperandType MachineOperandType;
+ MCContext *Ctx;
+ Mangler *Mang;
+ AsmPrinter &Printer;
+public:
+ XCoreMCInstLower(class AsmPrinter &asmprinter);
+ void Initialize(Mangler *mang, MCContext *C);
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+ MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
+
+private:
+ MCOperand LowerSymbolOperand(const MachineOperand &MO,
+ MachineOperandType MOTy, unsigned Offset) const;
+};
+}
+
+#endif
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index d8517d7b4e..e637d9a5be 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -22,7 +22,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -30,7 +31,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Type.h"
#define GET_REGINFO_TARGET_DESC
#include "XCoreGenRegisterInfo.inc"
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
index 9edfda1f50..4c771e9700 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -45,10 +45,10 @@ def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
def GRRegs : RegisterClass<"XCore", [i32], 32,
// Return values and arguments
(add R0, R1, R2, R3,
- // Not preserved across procedure calls
- R11,
// Callee save
- R4, R5, R6, R7, R8, R9, R10)>;
+ R4, R5, R6, R7, R8, R9, R10,
+ // Not preserved across procedure calls
+ R11)>;
// Reserved
def RRegs : RegisterClass<"XCore", [i32], 32, (add CP, DP, SP, LR)> {
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index b130fb9daa..28c3d12c05 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -13,7 +13,7 @@
#include "XCoreTargetMachine.h"
#include "XCore.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
InstrInfo(),
FrameLowering(Subtarget),
TLInfo(*this),
- TSInfo(*this), STTI(&TLInfo), VTTI(&TLInfo) {
+ TSInfo(*this) {
}
namespace {
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 42bfcb4a03..eb9a1aa420 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -19,9 +19,8 @@
#include "XCoreInstrInfo.h"
#include "XCoreSelectionDAGInfo.h"
#include "XCoreSubtarget.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
namespace llvm {
@@ -32,8 +31,6 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreFrameLowering FrameLowering;
XCoreTargetLowering TLInfo;
XCoreSelectionDAGInfo TSInfo;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -56,12 +53,6 @@ public:
virtual const TargetRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration