aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAlexander Kornienko <alexfh@google.com>2013-04-03 14:07:16 +0000
committerAlexander Kornienko <alexfh@google.com>2013-04-03 14:07:16 +0000
commite133bc868944822bf8961f825d3aa63d6fa48fb7 (patch)
treeebbd4a8040181471467a9737d90d94dc6b58b316 /lib
parent647735c781c5b37061ee03d6e9e6c7dda92218e2 (diff)
parent080e3c523e87ec68ca1ea5db4cd49816028dd8bd (diff)
Updating branches/google/stable to r178511stable
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/stable@178655 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp72
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp10
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp276
-rw-r--r--lib/Analysis/PathProfileVerifier.cpp4
-rw-r--r--lib/Analysis/ProfileInfo.cpp8
-rw-r--r--lib/Analysis/ScalarEvolution.cpp30
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp4
-rw-r--r--lib/Analysis/ValueTracking.cpp2
-rw-r--r--lib/Bitcode/Reader/BitstreamReader.cpp2
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp2
-rw-r--r--lib/CMakeLists.txt1
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt1
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h18
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h5
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp74
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h4
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp120
-rw-r--r--lib/CodeGen/CMakeLists.txt2
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp423
-rw-r--r--lib/CodeGen/ErlangGC.cpp81
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp2
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp2
-rw-r--r--lib/CodeGen/MachineFunction.cpp48
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp1
-rw-r--r--lib/CodeGen/MachineScheduler.cpp2
-rw-r--r--lib/CodeGen/Passes.cpp21
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp75
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp51
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp195
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp29
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeOrdering.h6
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp28
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp7
-rw-r--r--lib/CodeGen/SpillPlacement.cpp1
-rw-r--r--lib/CodeGen/StackColoring.cpp18
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp1
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp7
-rw-r--r--lib/CodeGen/TargetSchedule.cpp5
-rw-r--r--lib/DebugInfo/DWARFDebugArangeSet.cpp32
-rw-r--r--lib/DebugInfo/DWARFDebugArangeSet.h8
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.cpp2
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp151
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp33
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.h1
-rw-r--r--lib/IR/Attributes.cpp7
-rw-r--r--lib/IR/Constants.cpp10
-rw-r--r--lib/IR/DIBuilder.cpp144
-rw-r--r--lib/IR/DataLayout.cpp54
-rw-r--r--lib/IR/DebugInfo.cpp127
-rw-r--r--lib/IR/Function.cpp4
-rw-r--r--lib/IR/Instructions.cpp12
-rw-r--r--lib/IRReader/CMakeLists.txt3
-rw-r--r--lib/IRReader/IRReader.cpp78
-rw-r--r--lib/IRReader/LLVMBuild.txt22
-rw-r--r--lib/IRReader/Makefile14
-rw-r--r--lib/LLVMBuild.txt2
-rw-r--r--lib/Linker/LinkModules.cpp2
-rw-r--r--lib/Linker/Linker.cpp21
-rw-r--r--lib/MC/MCContext.cpp6
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp24
-rw-r--r--lib/MC/MCDwarf.cpp8
-rw-r--r--lib/MC/MCExpr.cpp2
-rw-r--r--lib/MC/MCObjectFileInfo.cpp13
-rw-r--r--lib/MC/MCParser/AsmParser.cpp49
-rw-r--r--lib/MC/MCStreamer.cpp10
-rw-r--r--lib/Makefile3
-rw-r--r--lib/Support/APFloat.cpp6
-rw-r--r--lib/Support/APInt.cpp8
-rw-r--r--lib/Support/CMakeLists.txt3
-rw-r--r--lib/Support/ErrorHandling.cpp16
-rw-r--r--lib/Support/FileOutputBuffer.cpp4
-rw-r--r--lib/Support/MemoryBuffer.cpp80
-rw-r--r--lib/Support/PathV2.cpp24
-rw-r--r--lib/Support/PrettyStackTrace.cpp6
-rw-r--r--lib/Support/Program.cpp11
-rw-r--r--lib/Support/SmallPtrSet.cpp24
-rw-r--r--lib/Support/Unix/Memory.inc9
-rw-r--r--lib/Support/Unix/PathV2.inc17
-rw-r--r--lib/Support/Unix/Watchdog.inc32
-rw-r--r--lib/Support/Watchdog.cpp23
-rw-r--r--lib/Support/Windows/PathV2.inc61
-rw-r--r--lib/Support/Windows/Watchdog.inc24
-rw-r--r--lib/Support/raw_ostream.cpp9
-rw-r--r--lib/TableGen/Error.cpp6
-rw-r--r--lib/TableGen/Main.cpp7
-rw-r--r--lib/TableGen/TGParser.cpp40
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp57
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp6
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp83
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp72
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h74
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp704
-rw-r--r--lib/Target/ARM/ARM.h1
-rw-r--r--lib/Target/ARM/ARM.td10
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp14
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp59
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp57
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td45
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp2
-rw-r--r--lib/Target/ARM/ARMSchedule.td64
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td47
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td23
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp11
-rw-r--r--lib/Target/ARM/ARMSubtarget.h9
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp13
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp94
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp250
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp6
-rw-r--r--lib/Target/ARM/README-Thumb.txt2
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp2
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp1
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp3
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp40
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp149
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h7
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td525
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td1741
-rw-r--r--lib/Target/Hexagon/HexagonMCInst.h41
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp66
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp17
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp49
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp177
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp1
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp50
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h3
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp4
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h3
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp307
-rw-r--r--lib/Target/Mips/CMakeLists.txt4
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp18
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp10
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h2
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp308
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.h51
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp689
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h80
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp19
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h24
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td6
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp6
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h2
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td34
-rw-r--r--lib/Target/Mips/MipsDSPInstrInfo.td274
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp6
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h5
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp670
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h93
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp1309
-rw-r--r--lib/Target/Mips/MipsISelLowering.h87
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td27
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h30
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td180
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h6
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td54
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp168
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h2
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp473
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h57
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp442
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h62
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp38
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h24
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp9
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h4
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h38
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp7
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp11
-rw-r--r--lib/Target/NVPTX/ManagedStringPool.h1
-rw-r--r--lib/Target/NVPTX/NVPTX.h27
-rw-r--r--lib/Target/NVPTX/NVPTX.td12
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.cpp16
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp891
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h107
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp49
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h8
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp1447
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h8
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp869
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h26
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp101
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h31
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp53
-rw-r--r--lib/Target/NVPTX/NVPTXNumRegisters.h6
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp81
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h23
-rw-r--r--lib/Target/NVPTX/NVPTXSplitBBatBar.cpp8
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp22
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h11
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp64
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h41
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h77
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp108
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h10
-rw-r--r--lib/Target/NVPTX/NVPTXutil.cpp32
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp193
-rw-r--r--lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp4
-rw-r--r--lib/Target/NVPTX/cl_common_defines.h123
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp30
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp12
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h70
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp33
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h18
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp48
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h5
-rw-r--r--lib/Target/PowerPC/PPC.h1
-rw-r--r--lib/Target/PowerPC/PPC.td51
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp45
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp81
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td6
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp4
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp159
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h12
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp31
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp716
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h88
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td683
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td578
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td22
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp258
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td884
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h22
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp301
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h40
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td32
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp5
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h10
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp5
-rw-r--r--lib/Target/PowerPC/README.txt1
-rw-r--r--lib/Target/R600/AMDGPU.h2
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp2
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td4
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.cpp22
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.h29
-rw-r--r--lib/Target/R600/AMDGPUStructurizeCFG.cpp6
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp2
-rw-r--r--lib/Target/R600/AMDILISelDAGToDAG.cpp33
-rw-r--r--lib/Target/R600/CMakeLists.txt3
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp2
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp201
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp4
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp264
-rw-r--r--lib/Target/R600/R600EmitClauseMarkers.cpp253
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp12
-rw-r--r--lib/Target/R600/R600ISelLowering.h1
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp54
-rw-r--r--lib/Target/R600/R600InstrInfo.h3
-rw-r--r--lib/Target/R600/R600Instructions.td330
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.cpp6
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h6
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp75
-rw-r--r--lib/Target/R600/R600MachineScheduler.h3
-rw-r--r--lib/Target/R600/R600RegisterInfo.td63
-rw-r--r--lib/Target/R600/SIISelLowering.cpp116
-rw-r--r--lib/Target/R600/SIISelLowering.h4
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp16
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp42
-rw-r--r--lib/Target/R600/SIInstrInfo.h4
-rw-r--r--lib/Target/R600/SIInstrInfo.td55
-rw-r--r--lib/Target/R600/SIInstructions.td310
-rw-r--r--lib/Target/R600/SIIntrinsics.td12
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp134
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp20
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.h7
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp5
-rw-r--r--lib/Target/R600/SIRegisterInfo.h3
-rw-r--r--lib/Target/R600/SIRegisterInfo.td14
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td169
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h3
-rw-r--r--lib/Target/TargetLibraryInfo.cpp3
-rw-r--r--lib/Target/TargetMachine.cpp26
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp517
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c347
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h23
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp45
-rw-r--r--lib/Target/X86/X86.td89
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp2
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp24
-rw-r--r--lib/Target/X86/X86FastISel.cpp13
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp515
-rw-r--r--lib/Target/X86/X86ISelLowering.h7
-rw-r--r--lib/Target/X86/X86Instr3DNow.td13
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td191
-rw-r--r--lib/Target/X86/X86InstrCMovSetCC.td9
-rw-r--r--lib/Target/X86/X86InstrCompiler.td50
-rw-r--r--lib/Target/X86/X86InstrControl.td72
-rw-r--r--lib/Target/X86/X86InstrExtension.td73
-rw-r--r--lib/Target/X86/X86InstrFPStack.td26
-rw-r--r--lib/Target/X86/X86InstrFormats.td161
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp9
-rw-r--r--lib/Target/X86/X86InstrInfo.td182
-rw-r--r--lib/Target/X86/X86InstrMMX.td80
-rw-r--r--lib/Target/X86/X86InstrSSE.td774
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td54
-rw-r--r--lib/Target/X86/X86InstrSystem.td24
-rw-r--r--lib/Target/X86/X86InstrTSX.td7
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp51
-rw-r--r--lib/Target/X86/X86SchedHaswell.td126
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td122
-rw-r--r--lib/Target/X86/X86Schedule.td89
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td1
-rw-r--r--lib/Target/X86/X86Subtarget.cpp20
-rw-r--r--lib/Target/X86/X86Subtarget.h17
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp58
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp2
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp741
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp149
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp108
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp7
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp76
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp216
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp15
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp62
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.cpp1
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.h18
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCContract.cpp8
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp461
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCUtil.cpp13
-rw-r--r--lib/Transforms/Scalar/GVN.cpp10
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp82
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp39
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp54
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp17
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp326
-rw-r--r--lib/Transforms/Scalar/SROA.cpp688
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp703
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp42
-rw-r--r--lib/Transforms/Utils/Local.cpp19
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp2
347 files changed, 19128 insertions, 12718 deletions
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index e58dde3d93..a571463dfe 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -44,6 +44,8 @@ static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
+static cl::opt<bool> EvalTBAA("evaluate-tbaa", cl::ReallyHidden);
+
namespace {
class AAEval : public FunctionPass {
unsigned NoAlias, MayAlias, PartialAlias, MustAlias;
@@ -123,6 +125,15 @@ PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
}
}
+static inline void
+PrintLoadStoreResults(const char *Msg, bool P, const Value *V1,
+ const Value *V2, const Module *M) {
+ if (P) {
+ errs() << " " << Msg << ": " << *V1
+ << " <-> " << *V2 << '\n';
+ }
+}
+
static inline bool isInterestingPointer(Value *V) {
return V->getType()->isPointerTy()
&& !isa<ConstantPointerNull>(V);
@@ -133,6 +144,8 @@ bool AAEval::runOnFunction(Function &F) {
SetVector<Value *> Pointers;
SetVector<CallSite> CallSites;
+ SetVector<Value *> Loads;
+ SetVector<Value *> Stores;
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
if (I->getType()->isPointerTy()) // Add all pointer arguments.
@@ -141,6 +154,10 @@ bool AAEval::runOnFunction(Function &F) {
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
if (I->getType()->isPointerTy()) // Add all pointer instructions.
Pointers.insert(&*I);
+ if (EvalTBAA && isa<LoadInst>(&*I))
+ Loads.insert(&*I);
+ if (EvalTBAA && isa<StoreInst>(&*I))
+ Stores.insert(&*I);
Instruction &Inst = *I;
if (CallSite CS = cast<Value>(&Inst)) {
Value *Callee = CS.getCalledValue();
@@ -197,6 +214,61 @@ bool AAEval::runOnFunction(Function &F) {
}
}
+ if (EvalTBAA) {
+ // iterate over all pairs of load, store
+ for (SetVector<Value *>::iterator I1 = Loads.begin(), E = Loads.end();
+ I1 != E; ++I1) {
+ for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end();
+ I2 != E2; ++I2) {
+ switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)),
+ AA.getLocation(cast<StoreInst>(*I2)))) {
+ case AliasAnalysis::NoAlias:
+ PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
+ F.getParent());
+ ++NoAlias; break;
+ case AliasAnalysis::MayAlias:
+ PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
+ F.getParent());
+ ++MayAlias; break;
+ case AliasAnalysis::PartialAlias:
+ PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+ F.getParent());
+ ++PartialAlias; break;
+ case AliasAnalysis::MustAlias:
+ PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
+ F.getParent());
+ ++MustAlias; break;
+ }
+ }
+ }
+
+ // iterate over all pairs of store, store
+ for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end();
+ I1 != E; ++I1) {
+ for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) {
+ switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)),
+ AA.getLocation(cast<StoreInst>(*I2)))) {
+ case AliasAnalysis::NoAlias:
+ PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
+ F.getParent());
+ ++NoAlias; break;
+ case AliasAnalysis::MayAlias:
+ PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
+ F.getParent());
+ ++MayAlias; break;
+ case AliasAnalysis::PartialAlias:
+ PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+ F.getParent());
+ ++PartialAlias; break;
+ case AliasAnalysis::MustAlias:
+ PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
+ F.getParent());
+ ++MustAlias; break;
+ }
+ }
+ }
+ }
+
// Mod/ref alias analysis: compare all pairs of calls and values
for (SetVector<CallSite>::iterator C = CallSites.begin(),
Ce = CallSites.end(); C != Ce; ++C) {
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 4139336f26..ae6da1af0c 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -851,9 +851,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// pointers, figure out if the indexes to the GEP tell us anything about the
// derived pointer.
if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
+ // Do the base pointers alias?
+ AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
+ UnderlyingV2, UnknownSize, 0);
+
// Check for geps of non-aliasing underlying pointers where the offsets are
// identical.
- if (V1Size == V2Size) {
+ if ((BaseAlias == MayAlias) && V1Size == V2Size) {
// Do the base pointers alias assuming type and size.
AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size,
V1TBAAInfo, UnderlyingV2,
@@ -881,10 +885,6 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
GEP1VariableIndices.clear();
}
}
-
- // Do the base pointers alias?
- AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
- UnderlyingV2, UnknownSize, 0);
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 1faa04623e..2240e9de33 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This file implements an analysis that determines, for a given memory
-// operation, what preceding memory operations it depends on. It builds on
+// operation, what preceding memory operations it depends on. It builds on
// alias analysis information, and tries to provide a lazy, caching interface to
// a common kind of alias information query.
//
@@ -52,7 +52,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
static const int BlockScanLimit = 500;
char MemoryDependenceAnalysis::ID = 0;
-
+
// Register this pass...
INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
@@ -99,7 +99,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) {
/// RemoveFromReverseMap - This is a helper function that removes Val from
/// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry.
template <typename KeyTy>
-static void RemoveFromReverseMap(DenseMap<Instruction*,
+static void RemoveFromReverseMap(DenseMap<Instruction*,
SmallPtrSet<KeyTy, 4> > &ReverseMap,
Instruction *Inst, KeyTy Val) {
typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
@@ -123,7 +123,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
if (LI->isUnordered()) {
Loc = AA->getLocation(LI);
return AliasAnalysis::Ref;
- } else if (LI->getOrdering() == Monotonic) {
+ }
+ if (LI->getOrdering() == Monotonic) {
Loc = AA->getLocation(LI);
return AliasAnalysis::ModRef;
}
@@ -135,7 +136,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
if (SI->isUnordered()) {
Loc = AA->getLocation(SI);
return AliasAnalysis::Mod;
- } else if (SI->getOrdering() == Monotonic) {
+ }
+ if (SI->getOrdering() == Monotonic) {
Loc = AA->getLocation(SI);
return AliasAnalysis::ModRef;
}
@@ -196,13 +198,13 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
// Walk backwards through the block, looking for dependencies
while (ScanIt != BB->begin()) {
// Limit the amount of scanning we do so we don't end up with quadratic
- // running time on extreme testcases.
+ // running time on extreme testcases.
--Limit;
if (!Limit)
return MemDepResult::getUnknown();
Instruction *Inst = --ScanIt;
-
+
// If this inst is a memory op, get the pointer it accessed
AliasAnalysis::Location Loc;
AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
@@ -251,7 +253,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
///
/// MemLocBase, MemLocOffset are lazily computed here the first time the
/// base/offs of memloc is needed.
-static bool
+static bool
isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
const Value *&MemLocBase,
int64_t &MemLocOffs,
@@ -289,25 +291,25 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
if (LI->getParent()->getParent()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread))
return 0;
-
+
// Get the base of this load.
int64_t LIOffs = 0;
- const Value *LIBase =
+ const Value *LIBase =
GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD);
-
+
// If the two pointers are not based on the same pointer, we can't tell that
// they are related.
if (LIBase != MemLocBase) return 0;
-
+
// Okay, the two values are based on the same pointer, but returned as
// no-alias. This happens when we have things like two byte loads at "P+1"
// and "P+3". Check to see if increasing the size of the "LI" load up to its
// alignment (or the largest native integer type) will allow us to load all
// the bits required by MemLoc.
-
+
// If MemLoc is before LI, then no widening of LI will help us out.
if (MemLocOffs < LIOffs) return 0;
-
+
// Get the alignment of the load in bytes. We assume that it is safe to load
// any legal integer up to this size without a problem. For example, if we're
// looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
@@ -316,15 +318,15 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
unsigned LoadAlign = LI->getAlignment();
int64_t MemLocEnd = MemLocOffs+MemLocSize;
-
+
// If no amount of rounding up will let MemLoc fit into LI, then bail out.
if (LIOffs+LoadAlign < MemLocEnd) return 0;
-
+
// This is the size of the load to try. Start with the next larger power of
// two.
unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
-
+
while (1) {
// If this load size is bigger than our known alignment or would not fit
// into a native integer register, then we fail.
@@ -343,7 +345,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
// If a load of this width would include all of MemLoc, then we succeed.
if (LIOffs+NewLoadByteSize >= MemLocEnd)
return NewLoadByteSize;
-
+
NewLoadByteSize <<= 1;
}
}
@@ -355,7 +357,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
/// instruction as well; this function may take advantage of the metadata
/// annotated to the query instruction to refine the result.
MemDepResult MemoryDependenceAnalysis::
-getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
BasicBlock::iterator ScanIt, BasicBlock *BB,
Instruction *QueryInst) {
@@ -382,7 +384,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Debug intrinsics don't (and can't) cause dependences.
if (isa<DbgInfoIntrinsic>(II)) continue;
-
+
// If we reach a lifetime begin or end marker, then the query ends here
// because the value is undefined.
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
@@ -406,10 +408,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
return MemDepResult::getClobber(LI);
AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
-
+
// If we found a pointer, check if it could be the same as our pointer.
AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
-
+
if (isLoad) {
if (R == AliasAnalysis::NoAlias) {
// If this is an over-aligned integer load (for example,
@@ -423,10 +425,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
MemLocOffset, LI, TD))
return MemDepResult::getClobber(Inst);
-
+
continue;
}
-
+
// Must aliased loads are defs of each other.
if (R == AliasAnalysis::MustAlias)
return MemDepResult::getDef(Inst);
@@ -441,7 +443,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (R == AliasAnalysis::PartialAlias)
return MemDepResult::getClobber(Inst);
#endif
-
+
// Random may-alias loads don't depend on each other without a
// dependence.
continue;
@@ -458,7 +460,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Stores depend on may/must aliased loads.
return MemDepResult::getDef(Inst);
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Atomic stores have complications involved.
// FIXME: This is overly conservative.
@@ -474,10 +476,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Ok, this store might clobber the query pointer. Check to see if it is
// a must alias: in this case, we want to return this as a def.
AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
-
+
// If we found a pointer, check if it could be the same as our pointer.
AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
-
+
if (R == AliasAnalysis::NoAlias)
continue;
if (R == AliasAnalysis::MustAlias)
@@ -498,7 +500,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
-
+
if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
return MemDepResult::getDef(Inst);
// Be conservative if the accessed pointer may alias the allocation.
@@ -532,7 +534,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
return MemDepResult::getClobber(Inst);
}
}
-
+
// No dependence found. If this is the entry block of the function, it is
// unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())
@@ -544,25 +546,25 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
/// depends.
MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
Instruction *ScanPos = QueryInst;
-
+
// Check for a cached result
MemDepResult &LocalCache = LocalDeps[QueryInst];
-
+
// If the cached entry is non-dirty, just return it. Note that this depends
// on MemDepResult's default constructing to 'dirty'.
if (!LocalCache.isDirty())
return LocalCache;
-
+
// Otherwise, if we have a dirty entry, we know we can start the scan at that
// instruction, which may save us some work.
if (Instruction *Inst = LocalCache.getInst()) {
ScanPos = Inst;
-
+
RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
}
-
+
BasicBlock *QueryParent = QueryInst->getParent();
-
+
// Do the scan.
if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
// No dependence found. If this is the entry block of the function, it is
@@ -591,11 +593,11 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
// Non-memory instruction.
LocalCache = MemDepResult::getUnknown();
}
-
+
// Remember the result!
if (Instruction *I = LocalCache.getInst())
ReverseLocalDeps[I].insert(QueryInst);
-
+
return LocalCache;
}
@@ -636,7 +638,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
/// the uncached case, this starts out as the set of predecessors we care
/// about.
SmallVector<BasicBlock*, 32> DirtyBlocks;
-
+
if (!Cache.empty()) {
// Okay, we have a cache entry. If we know it is not dirty, just return it
// with no computation.
@@ -644,17 +646,17 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
++NumCacheNonLocal;
return Cache;
}
-
+
// If we already have a partially computed set of results, scan them to
// determine what is dirty, seeding our initial DirtyBlocks worklist.
for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
I != E; ++I)
if (I->getResult().isDirty())
DirtyBlocks.push_back(I->getBB());
-
+
// Sort the cache so that we can do fast binary search lookups below.
std::sort(Cache.begin(), Cache.end());
-
+
++NumCacheDirtyNonLocal;
//cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
// << Cache.size() << " cached: " << *QueryInst;
@@ -665,45 +667,45 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
DirtyBlocks.push_back(*PI);
++NumUncacheNonLocal;
}
-
+
// isReadonlyCall - If this is a read-only call, we can be more aggressive.
bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
SmallPtrSet<BasicBlock*, 64> Visited;
-
+
unsigned NumSortedEntries = Cache.size();
DEBUG(AssertSorted(Cache));
-
+
// Iterate while we still have blocks to update.
while (!DirtyBlocks.empty()) {
BasicBlock *DirtyBB = DirtyBlocks.back();
DirtyBlocks.pop_back();
-
+
// Already processed this block?
if (!Visited.insert(DirtyBB))
continue;
-
+
// Do a binary search to see if we already have an entry for this block in
// the cache set. If so, find it.
DEBUG(AssertSorted(Cache, NumSortedEntries));
- NonLocalDepInfo::iterator Entry =
+ NonLocalDepInfo::iterator Entry =
std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
NonLocalDepEntry(DirtyBB));
if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
--Entry;
-
+
NonLocalDepEntry *ExistingResult = 0;
- if (Entry != Cache.begin()+NumSortedEntries &&
+ if (Entry != Cache.begin()+NumSortedEntries &&
Entry->getBB() == DirtyBB) {
// If we already have an entry, and if it isn't already dirty, the block
// is done.
if (!Entry->getResult().isDirty())
continue;
-
+
// Otherwise, remember this slot so we can update the value.
ExistingResult = &*Entry;
}
-
+
// If the dirty entry has a pointer, start scanning from it so we don't have
// to rescan the entire block.
BasicBlock::iterator ScanPos = DirtyBB->end();
@@ -715,10 +717,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
QueryCS.getInstruction());
}
}
-
+
// Find out if this block has a local dependency for QueryInst.
MemDepResult Dep;
-
+
if (ScanPos != DirtyBB->begin()) {
Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
} else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
@@ -728,14 +730,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
} else {
Dep = MemDepResult::getNonFuncLocal();
}
-
+
// If we had a dirty entry for the block, update it. Otherwise, just add
// a new entry.
if (ExistingResult)
ExistingResult->setResult(Dep);
else
Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
-
+
// If the block has a dependency (i.e. it isn't completely transparent to
// the value), remember the association!
if (!Dep.isNonLocal()) {
@@ -744,14 +746,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
if (Instruction *Inst = Dep.getInst())
ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
} else {
-
+
// If the block *is* completely transparent to the load, we need to check
// the predecessors of this block. Add them to our worklist.
for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
DirtyBlocks.push_back(*PI);
}
}
-
+
return Cache;
}
@@ -769,9 +771,9 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
assert(Loc.Ptr->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!");
Result.clear();
-
+
PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
-
+
// This is the set of blocks we've inspected, and the pointer we consider in
// each block. Because of critical edges, we currently bail out if querying
// a block with multiple different pointers. This can happen during PHI
@@ -794,7 +796,7 @@ MemDepResult MemoryDependenceAnalysis::
GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
bool isLoad, BasicBlock *BB,
NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
-
+
// Do a binary search to see if we already have an entry for this block in
// the cache set. If so, find it.
NonLocalDepInfo::iterator Entry =
@@ -802,18 +804,18 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
NonLocalDepEntry(BB));
if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
--Entry;
-
+
NonLocalDepEntry *ExistingResult = 0;
if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
ExistingResult = &*Entry;
-
+
// If we have a cached entry, and it is non-dirty, use it as the value for
// this dependency.
if (ExistingResult && !ExistingResult->getResult().isDirty()) {
++NumCacheNonLocalPtr;
return ExistingResult->getResult();
- }
-
+ }
+
// Otherwise, we have to scan for the value. If we have a dirty cache
// entry, start scanning from its position, otherwise we scan from the end
// of the block.
@@ -823,30 +825,30 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
"Instruction invalidated?");
++NumCacheDirtyNonLocalPtr;
ScanPos = ExistingResult->getResult().getInst();
-
+
// Eliminating the dirty entry from 'Cache', so update the reverse info.
ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
} else {
++NumUncacheNonLocalPtr;
}
-
+
// Scan the block for the dependency.
MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
-
+
// If we had a dirty entry for the block, update it. Otherwise, just add
// a new entry.
if (ExistingResult)
ExistingResult->setResult(Dep);
else
Cache->push_back(NonLocalDepEntry(BB, Dep));
-
+
// If the block has a dependency (i.e. it isn't completely transparent to
// the value), remember the reverse association because we just added it
// to Cache!
if (!Dep.isDef() && !Dep.isClobber())
return Dep;
-
+
// Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
// update MemDep when we remove instructions.
Instruction *Inst = Dep.getInst();
@@ -859,7 +861,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
/// number of elements in the array that are already properly ordered. This is
/// optimized for the case when only a few entries are added.
-static void
+static void
SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
unsigned NumSortedEntries) {
switch (Cache.size() - NumSortedEntries) {
@@ -911,7 +913,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
SmallVectorImpl<NonLocalDepResult> &Result,
DenseMap<BasicBlock*, Value*> &Visited,
bool SkipFirstBlock) {
-
+
// Look up the cached info for Pointer.
ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
@@ -925,7 +927,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// Get the NLPI for CacheKey, inserting one into the map if it doesn't
// already have one.
- std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
+ std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
NonLocalPointerInfo *CacheInfo = &Pair.first->second;
@@ -987,14 +989,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
if (VI == Visited.end() || VI->second == Pointer.getAddr())
continue;
-
+
// We have a pointer mismatch in a block. Just return clobber, saying
// that something was clobbered in this result. We could also do a
// non-fully cached query, but there is little point in doing this.
return true;
}
}
-
+
Value *Addr = Pointer.getAddr();
for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
I != E; ++I) {
@@ -1005,7 +1007,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
++NumCacheCompleteNonLocalPtr;
return false;
}
-
+
// Otherwise, either this is a new block, a block with an invalid cache
// pointer or one that we're about to invalidate by putting more info into it
// than its valid cache info. If empty, the result will be valid cache info,
@@ -1014,10 +1016,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
else
CacheInfo->Pair = BBSkipFirstBlockPair();
-
+
SmallVector<BasicBlock*, 32> Worklist;
Worklist.push_back(StartBB);
-
+
// PredList used inside loop.
SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
@@ -1028,10 +1030,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// revisit blocks after we insert info for them.
unsigned NumSortedEntries = Cache->size();
DEBUG(AssertSorted(*Cache));
-
+
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
-
+
// Skip the first block if we have it.
if (!SkipFirstBlock) {
// Analyze the dependency of *Pointer in FromBB. See if we already have
@@ -1043,14 +1045,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
DEBUG(AssertSorted(*Cache, NumSortedEntries));
MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
NumSortedEntries);
-
+
// If we got a Def or Clobber, add this to the list of results.
if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) {
Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
continue;
}
}
-
+
// If 'Pointer' is an instruction defined in this block, then we need to do
// phi translation to change it into a value live in the predecessor block.
// If not, we just add the predecessors to the worklist and scan them with
@@ -1067,7 +1069,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
NewBlocks.push_back(*PI);
continue;
}
-
+
// If we have seen this block before, but it was with a different
// pointer then we have a phi translation failure and we have to treat
// this as a clobber.
@@ -1082,12 +1084,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
Worklist.append(NewBlocks.begin(), NewBlocks.end());
continue;
}
-
+
// We do need to do phi translation, if we know ahead of time we can't phi
// translate this value, don't even try.
if (!Pointer.IsPotentiallyPHITranslatable())
goto PredTranslationFailure;
-
+
// We may have added values to the cache list before this PHI translation.
// If so, we haven't done anything to ensure that the cache remains sorted.
// Sort it now (if needed) so that recursive invocations of
@@ -1110,7 +1112,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
PredPointer.PHITranslateValue(BB, Pred, 0);
Value *PredPtrVal = PredPointer.getAddr();
-
+
// Check to see if we have already visited this pred block with another
// pointer. If so, we can't do this lookup. This failure can occur
// with PHI translation when a critical edge exists and the PHI node in
@@ -1127,14 +1129,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// the analysis and can ignore it.
if (InsertRes.first->second == PredPtrVal)
continue;
-
+
// Otherwise, the block was previously analyzed with a different
// pointer. We can't represent the result of this case, so we just
// treat this as a phi translation failure.
// Make sure to clean up the Visited map before continuing on to
// PredTranslationFailure.
- for (unsigned i = 0; i < PredList.size(); i++)
+ for (unsigned i = 0, n = PredList.size(); i < n; ++i)
Visited.erase(PredList[i].first);
goto PredTranslationFailure;
@@ -1143,10 +1145,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// Actually process results here; this need to be a separate loop to avoid
// calling getNonLocalPointerDepFromBB for blocks we don't want to return
- // any results for. (getNonLocalPointerDepFromBB will modify our
+ // any results for. (getNonLocalPointerDepFromBB will modify our
// datastructures in ways the code after the PredTranslationFailure label
// doesn't expect.)
- for (unsigned i = 0; i < PredList.size(); i++) {
+ for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
BasicBlock *Pred = PredList[i].first;
PHITransAddr &PredPointer = PredList[i].second;
Value *PredPtrVal = PredPointer.getAddr();
@@ -1186,12 +1188,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
continue;
}
}
-
+
// Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
CacheInfo = &NonLocalPointerDeps[CacheKey];
Cache = &CacheInfo->NonLocalDeps;
NumSortedEntries = Cache->size();
-
+
// Since we did phi translation, the "Cache" set won't contain all of the
// results for the query. This is ok (we can still use it to accelerate
// specific block queries) but we can't do the fastpath "return all
@@ -1204,20 +1206,20 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// The following code is "failure"; we can't produce a sane translation
// for the given block. It assumes that we haven't modified any of
// our datastructures while processing the current block.
-
+
if (Cache == 0) {
// Refresh the CacheInfo/Cache pointer if it got invalidated.
CacheInfo = &NonLocalPointerDeps[CacheKey];
Cache = &CacheInfo->NonLocalDeps;
NumSortedEntries = Cache->size();
}
-
+
// Since we failed phi translation, the "Cache" set won't contain all of the
// results for the query. This is ok (we can still use it to accelerate
// specific block queries) but we can't do the fastpath "return all
// results from the set". Clear out the indicator for this.
CacheInfo->Pair = BBSkipFirstBlockPair();
-
+
// If *nothing* works, mark the pointer as unknown.
//
// If this is the magic first block, return this as a clobber of the whole
@@ -1225,12 +1227,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// we have to bail out.
if (SkipFirstBlock)
return true;
-
+
for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
assert(I != Cache->rend() && "Didn't find current block??");
if (I->getBB() != BB)
continue;
-
+
assert(I->getResult().isNonLocal() &&
"Should only be here with transparent block");
I->setResult(MemDepResult::getUnknown());
@@ -1250,23 +1252,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
/// CachedNonLocalPointerInfo, remove it.
void MemoryDependenceAnalysis::
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
- CachedNonLocalPointerInfo::iterator It =
+ CachedNonLocalPointerInfo::iterator It =
NonLocalPointerDeps.find(P);
if (It == NonLocalPointerDeps.end()) return;
-
+
// Remove all of the entries in the BB->val map. This involves removing
// instructions from the reverse map.
NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
-
+
for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
Instruction *Target = PInfo[i].getResult().getInst();
if (Target == 0) continue; // Ignore non-local dep results.
assert(Target->getParent() == PInfo[i].getBB());
-
+
// Eliminating the dirty entry from 'Cache', so update the reverse info.
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
}
-
+
// Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
NonLocalPointerDeps.erase(It);
}
@@ -1321,20 +1323,20 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
// Remove this local dependency info.
LocalDeps.erase(LocalDepEntry);
}
-
+
// If we have any cached pointer dependencies on this instruction, remove
// them. If the instruction has non-pointer type, then it can't be a pointer
// base.
-
+
// Remove it from both the load info and the store info. The instruction
// can't be in either of these maps if it is non-pointer.
if (RemInst->getType()->isPointerTy()) {
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
}
-
+
// Loop over all of the things that depend on the instruction we're removing.
- //
+ //
SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
// If we find RemInst as a clobber or Def in any of the maps for other values,
@@ -1346,29 +1348,29 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
MemDepResult NewDirtyVal;
if (!RemInst->isTerminator())
NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
-
+
ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseLocalDeps.end()) {
SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
// RemInst can't be the terminator if it has local stuff depending on it.
assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
"Nothing can locally depend on a terminator");
-
+
for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
E = ReverseDeps.end(); I != E; ++I) {
Instruction *InstDependingOnRemInst = *I;
assert(InstDependingOnRemInst != RemInst &&
"Already removed our local dep info");
-
+
LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
-
+
// Make sure to remember that new things depend on NewDepInst.
assert(NewDirtyVal.getInst() && "There is no way something else can have "
"a local dep on this if it is a terminator!");
- ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
+ ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
InstDependingOnRemInst));
}
-
+
ReverseLocalDeps.erase(ReverseDepIt);
// Add new reverse deps after scanning the set, to avoid invalidating the
@@ -1379,25 +1381,25 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseDepsToAdd.pop_back();
}
}
-
+
ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseNonLocalDeps.end()) {
SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
I != E; ++I) {
assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
-
+
PerInstNLInfo &INLD = NonLocalDeps[*I];
// The information is now dirty!
INLD.second = true;
-
- for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
+
+ for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
DE = INLD.first.end(); DI != DE; ++DI) {
if (DI->getResult().getInst() != RemInst) continue;
-
+
// Convert to a dirty entry for the subsequent instruction.
DI->setResult(NewDirtyVal);
-
+
if (Instruction *NextI = NewDirtyVal.getInst())
ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
}
@@ -1412,7 +1414,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseDepsToAdd.pop_back();
}
}
-
+
// If the instruction is in ReverseNonLocalPtrDeps then it appears as a
// value in the NonLocalPointerDeps info.
ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
@@ -1420,45 +1422,45 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
-
+
for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
E = Set.end(); I != E; ++I) {
ValueIsLoadPair P = *I;
assert(P.getPointer() != RemInst &&
"Already removed NonLocalPointerDeps info for RemInst");
-
+
NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
-
+
// The cache is not valid for any specific block anymore.
NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
-
+
// Update any entries for RemInst to use the instruction after it.
for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
DI != DE; ++DI) {
if (DI->getResult().getInst() != RemInst) continue;
-
+
// Convert to a dirty entry for the subsequent instruction.
DI->setResult(NewDirtyVal);
-
+
if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
}
-
+
// Re-sort the NonLocalDepInfo. Changing the dirty entry to its
// subsequent value may invalidate the sortedness.
std::sort(NLPDI.begin(), NLPDI.end());
}
-
+
ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
-
+
while (!ReversePtrDepsToAdd.empty()) {
ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
.insert(ReversePtrDepsToAdd.back().second);
ReversePtrDepsToAdd.pop_back();
}
}
-
-
+
+
assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
AA->deleteValue(RemInst);
DEBUG(verifyRemoved(RemInst));
@@ -1472,7 +1474,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
assert(I->second.getInst() != D &&
"Inst occurs in data structures");
}
-
+
for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
E = NonLocalPointerDeps.end(); I != E; ++I) {
assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
@@ -1481,7 +1483,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
II != E; ++II)
assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
}
-
+
for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
E = NonLocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
@@ -1490,7 +1492,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
EE = INLD.first.end(); II != EE; ++II)
assert(II->getResult().getInst() != D && "Inst occurs in data structures");
}
-
+
for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
E = ReverseLocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
@@ -1498,7 +1500,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
EE = I->second.end(); II != EE; ++II)
assert(*II != D && "Inst occurs in data structures");
}
-
+
for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
E = ReverseNonLocalDeps.end();
I != E; ++I) {
@@ -1507,17 +1509,17 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
EE = I->second.end(); II != EE; ++II)
assert(*II != D && "Inst occurs in data structures");
}
-
+
for (ReverseNonLocalPtrDepTy::const_iterator
I = ReverseNonLocalPtrDeps.begin(),
E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in rev NLPD map");
-
+
for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
E = I->second.end(); II != E; ++II)
assert(*II != ValueIsLoadPair(D, false) &&
*II != ValueIsLoadPair(D, true) &&
"Inst occurs in ReverseNonLocalPtrDeps map");
}
-
+
}
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
index 745d8c60bb..48d7d05d78 100644
--- a/lib/Analysis/PathProfileVerifier.cpp
+++ b/lib/Analysis/PathProfileVerifier.cpp
@@ -84,7 +84,7 @@ bool PathProfileVerifier::runOnModule (Module &M) {
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
if (F->isDeclaration()) continue;
- arrayMap[0][F->begin()][0] = i++;
+ arrayMap[(BasicBlock*)0][F->begin()][0] = i++;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
TerminatorInst *TI = BB->getTerminator();
@@ -125,7 +125,7 @@ bool PathProfileVerifier::runOnModule (Module &M) {
<< currentPath->getCount() << "\n");
// setup the entry edge (normally path profiling doesn't care about this)
if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
- edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
+ edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]]
+= currentPath->getCount();
for( ProfilePathEdgeIterator nextEdge = pev->begin(),
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 2daa7d4f6b..9626a48b9d 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -249,7 +249,7 @@ const BasicBlock *ProfileInfoT<Function,BasicBlock>::
succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
if (Succ == End) {
- P[0] = BB;
+ P[(const BasicBlock*)0] = BB;
if (Mode & GetPathToExit) {
hasFoundPath = true;
BB = 0;
@@ -752,10 +752,10 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
Succ != End; ++Succ) {
Path P;
GetPath(*Succ, 0, P, GetPathToExit);
- if (Dest && Dest != P[0]) {
+ if (Dest && Dest != P[(const BasicBlock*)0]) {
AllEdgesHaveSameReturn = false;
}
- Dest = P[0];
+ Dest = P[(const BasicBlock*)0];
}
if (AllEdgesHaveSameReturn) {
if(EstimateMissingEdges(BB)) {
@@ -927,7 +927,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
Path P;
const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
- Dest = P[0];
+ Dest = P[(const BasicBlock*)0];
if (!Dest) continue;
if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 07d83296bc..6ea915fdb0 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -4230,6 +4230,25 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
return Max ? Max : SE->getCouldNotCompute();
}
+bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
+ ScalarEvolution *SE) const {
+ if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
+ return true;
+
+ if (!ExitNotTaken.ExitingBlock)
+ return false;
+
+ for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+ ENT != 0; ENT = ENT->getNextExit()) {
+
+ if (ENT->ExactNotTaken != SE->getCouldNotCompute()
+ && SE->hasOperand(ENT->ExactNotTaken, S)) {
+ return true;
+ }
+ }
+ return false;
+}
+
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
@@ -6940,6 +6959,17 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
BlockDispositions.erase(S);
UnsignedRanges.erase(S);
SignedRanges.erase(S);
+
+ for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
+ BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
+ BackedgeTakenInfo &BEInfo = I->second;
+ if (BEInfo.hasOperand(S, this)) {
+ BEInfo.clear();
+ BackedgeTakenCounts.erase(I++);
+ }
+ else
+ ++I;
+ }
}
typedef DenseMap<const Loop *, std::string> VerifyMap;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 72421a00c7..976cd87321 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -263,8 +263,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
case Instruction::PtrToInt:
// A ptrtoint cast is free so long as the result is large enough to store
// the pointer, and a legal integer type.
- if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) &&
- OpTy->getScalarSizeInBits() >= DL->getPointerSizeInBits())
+ if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) &&
+ Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits())
return TCC_Free;
// Otherwise it's not a no-op.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 45b75df508..45dcc5e37e 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -953,6 +953,8 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
// Check for pointer simplifications.
if (V->getType()->isPointerTy()) {
+ if (isKnownNonNull(V))
+ return true;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
if (isGEPKnownNonNull(GEP, TD, Depth))
return true;
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
index 942346b44e..9dafe2a036 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -292,7 +292,7 @@ void BitstreamCursor::ReadAbbrevRecord() {
Abbv->Add(BitCodeAbbrevOp(0));
continue;
}
-
+
Abbv->Add(BitCodeAbbrevOp(E, Data));
} else
Abbv->Add(BitCodeAbbrevOp(E));
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 4f19dd00e6..8bac6da892 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -60,7 +60,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
I != E; ++I)
EnumerateValue(I->getAliasee());
- // Insert constants and metadata that are named at module level into the slot
+ // Insert constants and metadata that are named at module level into the slot
// pool so that the module symbol table can refer to them...
EnumerateValueSymbolTable(M->getValueSymbolTable());
EnumerateNamedMetadata(M);
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index d1ea027338..76ebe9aca9 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -1,6 +1,7 @@
# `Support' and `TableGen' libraries are added on the top-level CMakeLists.txt
add_subdirectory(IR)
+add_subdirectory(IRReader)
add_subdirectory(CodeGen)
add_subdirectory(Bitcode)
add_subdirectory(Transforms)
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 58fe2ed9d3..8d15c069c6 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMAsmPrinter
DwarfCompileUnit.cpp
DwarfDebug.cpp
DwarfException.cpp
+ ErlangGCPrinter.cpp
OcamlGCPrinter.cpp
Win64Exception.cpp
)
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 4ded2818ed..57e0acda89 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -112,6 +112,17 @@ DIE::~DIE() {
delete Children[i];
}
+/// Climb up the parent chain to get the compile unit DIE this DIE belongs to.
+DIE *DIE::getCompileUnit() const{
+ DIE *p = getParent();
+ while (p) {
+ if (p->getTag() == dwarf::DW_TAG_compile_unit)
+ return p;
+ p = p->getParent();
+ }
+ llvm_unreachable("We should not have orphaned DIEs.");
+}
+
#ifndef NDEBUG
void DIE::print(raw_ostream &O, unsigned IncIndent) {
IndentCount += IncIndent;
@@ -133,7 +144,7 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) {
O << "Size: " << Size << "\n";
}
- const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
+ const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData();
IndentCount += 2;
for (unsigned i = 0, N = Data.size(); i < N; ++i) {
@@ -313,7 +324,7 @@ void DIEEntry::print(raw_ostream &O) {
///
unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
if (!Size) {
- const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
}
@@ -332,7 +343,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
}
- const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
}
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 35d7959ac1..18b6966e18 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -66,7 +66,7 @@ namespace llvm {
/// Data - Raw data bytes for abbreviation.
///
- SmallVector<DIEAbbrevData, 8> Data;
+ SmallVector<DIEAbbrevData, 12> Data;
public:
DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
@@ -75,7 +75,7 @@ namespace llvm {
uint16_t getTag() const { return Tag; }
unsigned getNumber() const { return Number; }
uint16_t getChildrenFlag() const { return ChildrenFlag; }
- const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
+ const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
void setTag(uint16_t T) { Tag = T; }
void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
void setNumber(unsigned N) { Number = N; }
@@ -133,7 +133,7 @@ namespace llvm {
/// Attribute values.
///
- SmallVector<DIEValue*, 32> Values;
+ SmallVector<DIEValue*, 12> Values;
// Private data for print()
mutable unsigned IndentCount;
@@ -150,8 +150,11 @@ namespace llvm {
unsigned getOffset() const { return Offset; }
unsigned getSize() const { return Size; }
const std::vector<DIE *> &getChildren() const { return Children; }
- const SmallVector<DIEValue*, 32> &getValues() const { return Values; }
+ const SmallVectorImpl<DIEValue*> &getValues() const { return Values; }
DIE *getParent() const { return Parent; }
+ /// Climb up the parent chain to get the compile unit DIE this DIE belongs
+ /// to.
+ DIE *getCompileUnit() const;
void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
void setOffset(unsigned O) { Offset = O; }
void setSize(unsigned S) { Size = S; }
@@ -232,9 +235,10 @@ namespace llvm {
///
static unsigned BestForm(bool IsSigned, uint64_t Int) {
if (IsSigned) {
- if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1;
- if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2;
- if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4;
+ const int64_t SignedInt = Int;
+ if ((char)Int == SignedInt) return dwarf::DW_FORM_data1;
+ if ((short)Int == SignedInt) return dwarf::DW_FORM_data2;
+ if ((int)Int == SignedInt) return dwarf::DW_FORM_data4;
} else {
if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1;
if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 01f15e52e8..1c743c2414 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -35,7 +35,7 @@ using namespace llvm;
CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A,
DwarfDebug *DW, DwarfUnits *DWU)
: UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU),
- IndexTyDie(0) {
+ IndexTyDie(0), DebugInfoOffset(0) {
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 77bf6a9e50..2b180c6cc3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -87,6 +87,9 @@ class CompileUnit {
/// corresponds to the MDNode mapped with the subprogram DIE.
DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+ /// Offset of the CUDie from beginning of debug info section.
+ unsigned DebugInfoOffset;
+
/// getLowerBoundDefault - Return the default lower bound for an array. If the
/// DWARF version doesn't handle the language, return -1.
int64_t getDefaultLowerBound() const;
@@ -103,6 +106,7 @@ public:
unsigned getUniqueID() const { return UniqueID; }
unsigned getLanguage() const { return Language; }
DIE* getCUDie() const { return CUDie.get(); }
+ unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; }
const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
@@ -120,6 +124,7 @@ public:
return AccelTypes;
}
+ void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
/// hasContent - Return true if this compile unit has something to write out.
///
bool hasContent() const { return !CUDie->getChildren().empty(); }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 30bfa78989..585a92a92f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -352,11 +352,16 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
// If we're updating an abstract DIE, then we will be adding the children and
// object pointer later on. But what we don't want to do is process the
// concrete DIE twice.
- if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+ DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode);
+ if (AbsSPDIE) {
+ bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie());
// Pick up abstract subprogram DIE.
SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of
+ // DW_FORM_ref4.
SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
- dwarf::DW_FORM_ref4, AbsSPDIE);
+ InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+ AbsSPDIE);
SPCU->addDie(SPDie);
} else {
DISubprogram SPDecl = SP.getFunctionDeclaration();
@@ -716,13 +721,6 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
if (!FirstCU)
FirstCU = NewCU;
- if (useSplitDwarf()) {
- // This should be a unique identifier when we want to build .dwp files.
- NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
- // Now construct the skeleton CU associated.
- constructSkeletonCU(N);
- }
-
InfoHolder.addUnit(NewCU);
CUMap.insert(std::make_pair(N, NewCU));
@@ -789,6 +787,14 @@ void DwarfDebug::beginModule() {
DIArray RetainedTypes = CUNode.getRetainedTypes();
for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+ // If we're splitting the dwarf out now that we've got the entire
+ // CU then construct a skeleton CU based upon it.
+ if (useSplitDwarf()) {
+ // This should be a unique identifier when we want to build .dwp files.
+ CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+ // Now construct the skeleton CU associated.
+ constructSkeletonCU(CUNode);
+ }
}
// Tell MMI that we have debug info.
@@ -1666,8 +1672,8 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
// Start the size with the size of abbreviation code.
Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
- const SmallVector<DIEValue*, 32> &Values = Die->getValues();
- const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+ const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
// Size the DIE attribute values.
for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -1692,15 +1698,19 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
// Compute the size and offset of all the DIEs.
void DwarfUnits::computeSizeAndOffsets() {
- for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+ // Offset from the beginning of debug info section.
+ unsigned AccuOffset = 0;
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
E = CUs.end(); I != E; ++I) {
+ (*I)->setDebugInfoOffset(AccuOffset);
unsigned Offset =
sizeof(int32_t) + // Length of Compilation Unit Info
sizeof(int16_t) + // DWARF version number
sizeof(int32_t) + // Offset Into Abbrev. Section
sizeof(int8_t); // Pointer Size (in bytes)
- computeSizeAndOffset((*I)->getCUDie(), Offset);
+ unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset);
+ AccuOffset += EndOffset;
}
}
@@ -1757,8 +1767,8 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
dwarf::TagString(Abbrev->getTag()));
Asm->EmitULEB128(AbbrevNumber);
- const SmallVector<DIEValue*, 32> &Values = Die->getValues();
- const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+ const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
// Emit the DIE attribute values.
for (unsigned i = 0, N = Values.size(); i < N; ++i) {
@@ -1774,6 +1784,13 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
DIEEntry *E = cast<DIEEntry>(Values[i]);
DIE *Origin = E->getEntry();
unsigned Addr = Origin->getOffset();
+ if (Form == dwarf::DW_FORM_ref_addr) {
+ // For DW_FORM_ref_addr, output the offset from beginning of debug info
+ // section. Origin->getOffset() returns the offset from start of the
+ // compile unit.
+ DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Addr += Holder.getCUOffset(Origin->getCompileUnit());
+ }
Asm->EmitInt32(Addr);
break;
}
@@ -1839,7 +1856,7 @@ void DwarfUnits::emitUnits(DwarfDebug *DD,
const MCSection *ASection,
const MCSymbol *ASectionSym) {
Asm->OutStreamer.SwitchSection(USection);
- for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
E = CUs.end(); I != E; ++I) {
CompileUnit *TheCU = *I;
DIE *Die = TheCU->getCUDie();
@@ -1871,6 +1888,19 @@ void DwarfUnits::emitUnits(DwarfDebug *DD,
}
}
+/// For a given compile unit DIE, returns offset from beginning of debug info.
+unsigned DwarfUnits::getCUOffset(DIE *Die) {
+ assert(Die->getTag() == dwarf::DW_TAG_compile_unit &&
+ "Input DIE should be compile unit in getCUOffset.");
+ for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
+ E = CUs.end(); I != E; ++I) {
+ CompileUnit *TheCU = *I;
+ if (TheCU->getCUDie() == Die)
+ return TheCU->getDebugInfoOffset();
+ }
+ llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits.");
+}
+
// Emit the debug info section.
void DwarfDebug::emitDebugInfo() {
DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
@@ -2255,7 +2285,7 @@ void DwarfDebug::emitDebugLoc() {
if (DotDebugLocEntries.empty())
return;
- for (SmallVector<DotDebugLocEntry, 4>::iterator
+ for (SmallVectorImpl<DotDebugLocEntry>::iterator
I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
I != E; ++I) {
DotDebugLocEntry &Entry = *I;
@@ -2269,7 +2299,7 @@ void DwarfDebug::emitDebugLoc() {
unsigned char Size = Asm->getDataLayout().getPointerSize();
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
unsigned index = 1;
- for (SmallVector<DotDebugLocEntry, 4>::iterator
+ for (SmallVectorImpl<DotDebugLocEntry>::iterator
I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
I != E; ++I, ++index) {
DotDebugLocEntry &Entry = *I;
@@ -2362,7 +2392,7 @@ void DwarfDebug::emitDebugRanges() {
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfRangesSection());
unsigned char Size = Asm->getDataLayout().getPointerSize();
- for (SmallVector<const MCSymbol *, 8>::iterator
+ for (SmallVectorImpl<const MCSymbol *>::iterator
I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
I != E; ++I) {
if (*I)
@@ -2420,13 +2450,13 @@ void DwarfDebug::emitDebugInlineInfo() {
Asm->OutStreamer.AddComment("Address Size (in bytes)");
Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
- for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+ for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(),
E = InlinedSPNodes.end(); I != E; ++I) {
const MDNode *Node = *I;
DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
= InlineInfo.find(Node);
- SmallVector<InlineInfoLabels, 4> &Labels = II->second;
+ SmallVectorImpl<InlineInfoLabels> &Labels = II->second;
DISubprogram SP(Node);
StringRef LName = SP.getLinkageName();
StringRef Name = SP.getName();
@@ -2445,7 +2475,7 @@ void DwarfDebug::emitDebugInlineInfo() {
DwarfStrSectionSym);
Asm->EmitULEB128(Labels.size(), "Inline count");
- for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
+ for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(),
LE = Labels.end(); LI != LE; ++LI) {
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
Asm->EmitInt32(LI->second->getOffset());
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 771bc362cb..81e345e628 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -274,6 +274,10 @@ public:
/// \brief Returns the address pool.
AddrPool *getAddrPool() { return &AddressPool; }
+
+ /// \brief for a given compile unit DIE, returns offset from beginning of
+ /// debug info.
+ unsigned getCUOffset(DIE *Die);
};
/// \brief Collects and handles dwarf debug information.
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
new file mode 100644
index 0000000000..a8fb66dcf1
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -0,0 +1,120 @@
+//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the compiler plugin that is used in order to emit
+// garbage collection information in a convenient layout for parsing and
+// loading in the Erlang/OTP runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ErlangGCPrinter : public GCMetadataPrinter {
+ public:
+ void beginAssembly(AsmPrinter &AP);
+ void finishAssembly(AsmPrinter &AP);
+ };
+
+}
+
+static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGCPrinter() { }
+
+void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { }
+
+void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
+ MCStreamer &OS = AP.OutStreamer;
+ unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+
+ // Put this in a custom .note section.
+ AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext()
+ .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0,
+ SectionKind::getDataRel()));
+
+ // For each function...
+ for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
+ GCFunctionInfo &MD = **FI;
+
+ /** A compact GC layout. Emit this data structure:
+ *
+ * struct {
+ * int16_t PointCount;
+ * void *SafePointAddress[PointCount];
+ * int16_t StackFrameSize; (in words)
+ * int16_t StackArity;
+ * int16_t LiveCount;
+ * int16_t LiveOffsets[LiveCount];
+ * } __gcmap_<FUNCTIONNAME>;
+ **/
+
+ // Align to address width.
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+ // Emit PointCount.
+ OS.AddComment("safe point count");
+ AP.EmitInt16(MD.size());
+
+ // And each safe point...
+ for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE;
+ ++PI) {
+ // Emit the address of the safe point.
+ OS.AddComment("safe point address");
+ MCSymbol *Label = PI->Label;
+ AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/);
+ }
+
+ // Stack information never change in safe points! Only print info from the
+ // first call-site.
+ GCFunctionInfo::iterator PI = MD.begin();
+
+ // Emit the stack frame size.
+ OS.AddComment("stack frame size (in words)");
+ AP.EmitInt16(MD.getFrameSize() / IntPtrSize);
+
+ // Emit stack arity, i.e. the number of stacked arguments.
+ unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
+ unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ?
+ MD.getFunction().arg_size() - RegisteredArgs : 0;
+ OS.AddComment("stack arity");
+ AP.EmitInt16(StackArity);
+
+ // Emit the number of live roots in the function.
+ OS.AddComment("live root count");
+ AP.EmitInt16(MD.live_size(PI));
+
+ // And for each live root...
+ for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+ LE = MD.live_end(PI);
+ LI != LE; ++LI) {
+ // Emit live root's offset within the stack frame.
+ OS.AddComment("stack index (offset / wordsize)");
+ AP.EmitInt16(LI->StackOffset / IntPtrSize);
+ }
+ }
+}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index ddc7adab49..56aa3309d3 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -7,13 +7,13 @@ add_llvm_library(LLVMCodeGen
CalcSpillWeights.cpp
CallingConvLower.cpp
CodeGen.cpp
- CodePlacementOpt.cpp
CriticalAntiDepBreaker.cpp
DFAPacketizer.cpp
DeadMachineInstructionElim.cpp
DwarfEHPrepare.cpp
EarlyIfConversion.cpp
EdgeBundles.cpp
+ ErlangGC.cpp
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index a33b672044..35ec68d00c 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -22,7 +22,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBasicTTIPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeCalculateSpillWeightsPass(Registry);
- initializeCodePlacementOptPass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
initializeEarlyIfConverterPass(Registry);
initializeExpandPostRAPass(Registry);
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
deleted file mode 100644
index 24518443a7..0000000000
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ /dev/null
@@ -1,423 +0,0 @@
-//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the pass that optimizes code placement and aligns loop
-// headers to target-specific alignment boundaries.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "code-placement"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-STATISTIC(NumLoopsAligned, "Number of loops aligned");
-STATISTIC(NumIntraElim, "Number of intra loop branches eliminated");
-STATISTIC(NumIntraMoved, "Number of intra loop branches moved");
-
-namespace {
- class CodePlacementOpt : public MachineFunctionPass {
- const MachineLoopInfo *MLI;
- const TargetInstrInfo *TII;
- const TargetLowering *TLI;
-
- public:
- static char ID;
- CodePlacementOpt() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineLoopInfo>();
- AU.addPreservedID(MachineDominatorsID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- private:
- bool HasFallthrough(MachineBasicBlock *MBB);
- bool HasAnalyzableTerminator(MachineBasicBlock *MBB);
- void Splice(MachineFunction &MF,
- MachineFunction::iterator InsertPt,
- MachineFunction::iterator Begin,
- MachineFunction::iterator End);
- bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
- MachineLoop *L);
- bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
- MachineLoop *L);
- bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L);
- bool OptimizeIntraLoopEdges(MachineFunction &MF);
- bool AlignLoops(MachineFunction &MF);
- bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
- };
-
- char CodePlacementOpt::ID = 0;
-} // end anonymous namespace
-
-char &llvm::CodePlacementOptID = CodePlacementOpt::ID;
-INITIALIZE_PASS(CodePlacementOpt, "code-placement",
- "Code Placement Optimizer", false, false)
-
-/// HasFallthrough - Test whether the given branch has a fallthrough, either as
-/// a plain fallthrough or as a fallthrough case of a conditional branch.
-///
-bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) {
- MachineBasicBlock *TBB = 0, *FBB = 0;
- SmallVector<MachineOperand, 4> Cond;
- if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
- return false;
- // This conditional branch has no fallthrough.
- if (FBB)
- return false;
- // An unconditional branch has no fallthrough.
- if (Cond.empty() && TBB)
- return false;
- // It has a fallthrough.
- return true;
-}
-
-/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB.
-/// This is called before major changes are begun to test whether it will be
-/// possible to complete the changes.
-///
-/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed
-/// whenever possible.
-///
-bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
- // Conservatively ignore EH landing pads.
- if (MBB->isLandingPad()) return false;
-
- // Aggressively handle return blocks and similar constructs.
- if (MBB->succ_empty()) return true;
-
- // Ask the target's AnalyzeBranch if it can handle this block.
- MachineBasicBlock *TBB = 0, *FBB = 0;
- SmallVector<MachineOperand, 4> Cond;
- // Make sure the terminator is understood.
- if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
- return false;
- // Ignore blocks which look like they might have EH-related control flow.
- // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't
- // recognize the possibility of a control transfer through an unwind.
- // Such blocks contain EH_LABEL instructions, however they may be in the
- // middle of the block. Instead of searching for them, just check to see
- // if the CFG disagrees with AnalyzeBranch.
- if (1u + !Cond.empty() != MBB->succ_size())
- return false;
- // Make sure we have the option of reversing the condition.
- if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
- return false;
- return true;
-}
-
-/// Splice - Move the sequence of instructions [Begin,End) to just before
-/// InsertPt. Update branch instructions as needed to account for broken
-/// fallthrough edges and to take advantage of newly exposed fallthrough
-/// opportunities.
-///
-void CodePlacementOpt::Splice(MachineFunction &MF,
- MachineFunction::iterator InsertPt,
- MachineFunction::iterator Begin,
- MachineFunction::iterator End) {
- assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() &&
- "Splice can't change the entry block!");
- MachineFunction::iterator OldBeginPrior = prior(Begin);
- MachineFunction::iterator OldEndPrior = prior(End);
-
- MF.splice(InsertPt, Begin, End);
-
- prior(Begin)->updateTerminator();
- OldBeginPrior->updateTerminator();
- OldEndPrior->updateTerminator();
-}
-
-/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
-/// to the loop top to the top of the loop so that they have a fall through.
-/// This can introduce a branch on entry to the loop, but it can eliminate a
-/// branch within the loop. See the @simple case in
-/// test/CodeGen/X86/loop_blocks.ll for an example of this.
-bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
- MachineLoop *L) {
- bool Changed = false;
- MachineBasicBlock *TopMBB = L->getTopBlock();
-
- bool BotHasFallthrough = HasFallthrough(L->getBottomBlock());
-
- if (TopMBB == MF.begin() ||
- HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) {
- new_top:
- for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(),
- PE = TopMBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *Pred = *PI;
- if (Pred == TopMBB) continue;
- if (HasFallthrough(Pred)) continue;
- if (!L->contains(Pred)) continue;
-
- // Verify that we can analyze all the loop entry edges before beginning
- // any changes which will require us to be able to analyze them.
- if (Pred == MF.begin())
- continue;
- if (!HasAnalyzableTerminator(Pred))
- continue;
- if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred))))
- continue;
-
- // Move the block.
- DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber()
- << " to top of loop.\n");
- Changed = true;
-
- // Move it and all the blocks that can reach it via fallthrough edges
- // exclusively, to keep existing fallthrough edges intact.
- MachineFunction::iterator Begin = Pred;
- MachineFunction::iterator End = llvm::next(Begin);
- while (Begin != MF.begin()) {
- MachineFunction::iterator Prior = prior(Begin);
- if (Prior == MF.begin())
- break;
- // Stop when a non-fallthrough edge is found.
- if (!HasFallthrough(Prior))
- break;
- // Stop if a block which could fall-through out of the loop is found.
- if (Prior->isSuccessor(End))
- break;
- // If we've reached the top, stop scanning.
- if (Prior == MachineFunction::iterator(TopMBB)) {
- // We know top currently has a fall through (because we just checked
- // it) which would be lost if we do the transformation, so it isn't
- // worthwhile to do the transformation unless it would expose a new
- // fallthrough edge.
- if (!Prior->isSuccessor(End))
- goto next_pred;
- // Otherwise we can stop scanning and proceed to move the blocks.
- break;
- }
- // If we hit a switch or something complicated, don't move anything
- // for this predecessor.
- if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior))))
- break;
- // Ok, the block prior to Begin will be moved along with the rest.
- // Extend the range to include it.
- Begin = Prior;
- ++NumIntraMoved;
- }
-
- // Move the blocks.
- Splice(MF, TopMBB, Begin, End);
-
- // Update TopMBB.
- TopMBB = L->getTopBlock();
-
- // We have a new loop top. Iterate on it. We shouldn't have to do this
- // too many times if BranchFolding has done a reasonable job.
- goto new_top;
- next_pred:;
- }
- }
-
- // If the loop previously didn't exit with a fall-through and it now does,
- // we eliminated a branch.
- if (Changed &&
- !BotHasFallthrough &&
- HasFallthrough(L->getBottomBlock())) {
- ++NumIntraElim;
- }
-
- return Changed;
-}
-
-/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the
-/// portion of the loop contiguous with the header. This usually makes the loop
-/// contiguous, provided that AnalyzeBranch can handle all the relevant
-/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll
-/// for an example of this.
-bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
- MachineLoop *L) {
- bool Changed = false;
- MachineBasicBlock *TopMBB = L->getTopBlock();
- MachineBasicBlock *BotMBB = L->getBottomBlock();
-
- // Determine a position to move orphaned loop blocks to. If TopMBB is not
- // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
- // to the top of the loop to avoid losing that fallthrough. Otherwise append
- // them to the bottom, even if it previously had a fallthrough, on the theory
- // that it's worth an extra branch to keep the loop contiguous.
- MachineFunction::iterator InsertPt =
- llvm::next(MachineFunction::iterator(BotMBB));
- bool InsertAtTop = false;
- if (TopMBB != MF.begin() &&
- !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
- HasFallthrough(BotMBB)) {
- InsertPt = TopMBB;
- InsertAtTop = true;
- }
-
- // Keep a record of which blocks are in the portion of the loop contiguous
- // with the loop header.
- SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
- for (MachineFunction::iterator I = TopMBB,
- E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
- ContiguousBlocks.insert(I);
-
- // Find non-contigous blocks and fix them.
- if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt)))
- for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end();
- BI != BE; ++BI) {
- MachineBasicBlock *BB = *BI;
-
- // Verify that we can analyze all the loop entry edges before beginning
- // any changes which will require us to be able to analyze them.
- if (!HasAnalyzableTerminator(BB))
- continue;
- if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB))))
- continue;
-
- // If the layout predecessor is part of the loop, this block will be
- // processed along with it. This keeps them in their relative order.
- if (BB != MF.begin() &&
- L->contains(prior(MachineFunction::iterator(BB))))
- continue;
-
- // Check to see if this block is already contiguous with the main
- // portion of the loop.
- if (!ContiguousBlocks.insert(BB))
- continue;
-
- // Move the block.
- DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber()
- << " to be contiguous with loop.\n");
- Changed = true;
-
- // Process this block and all loop blocks contiguous with it, to keep
- // them in their relative order.
- MachineFunction::iterator Begin = BB;
- MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
- for (; End != MF.end(); ++End) {
- if (!L->contains(End)) break;
- if (!HasAnalyzableTerminator(End)) break;
- ContiguousBlocks.insert(End);
- ++NumIntraMoved;
- }
-
- // If we're inserting at the bottom of the loop, and the code we're
- // moving originally had fall-through successors, bring the sucessors
- // up with the loop blocks to preserve the fall-through edges.
- if (!InsertAtTop)
- for (; End != MF.end(); ++End) {
- if (L->contains(End)) break;
- if (!HasAnalyzableTerminator(End)) break;
- if (!HasFallthrough(prior(End))) break;
- }
-
- // Move the blocks. This may invalidate TopMBB and/or BotMBB, but
- // we don't need them anymore at this point.
- Splice(MF, InsertPt, Begin, End);
- }
-
- return Changed;
-}
-
-/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize
-/// intra-loop branching and to form contiguous loops.
-///
-/// This code takes the approach of making minor changes to the existing
-/// layout to fix specific loop-oriented problems. Also, it depends on
-/// AnalyzeBranch, which can't understand complex control instructions.
-///
-bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF,
- MachineLoop *L) {
- bool Changed = false;
-
- // Do optimization for nested loops.
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
-
- // Do optimization for this loop.
- Changed |= EliminateUnconditionalJumpsToTop(MF, L);
- Changed |= MoveDiscontiguousLoopBlocks(MF, L);
-
- return Changed;
-}
-
-/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize
-/// intra-loop branching and to form contiguous loops.
-///
-bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
- bool Changed = false;
-
- if (!TLI->shouldOptimizeCodePlacement())
- return Changed;
-
- // Do optimization for each loop in the function.
- for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
- I != E; ++I)
- if (!(*I)->getParentLoop())
- Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
-
- return Changed;
-}
-
-/// AlignLoops - Align loop headers to target preferred alignments.
-///
-bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
- const Function *F = MF.getFunction();
- if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize))
- return false;
-
- unsigned Align = TLI->getPrefLoopAlignment();
- if (!Align)
- return false; // Don't care about loop alignment.
-
- bool Changed = false;
-
- for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
- I != E; ++I)
- Changed |= AlignLoop(MF, *I, Align);
-
- return Changed;
-}
-
-/// AlignLoop - Align loop headers to target preferred alignments.
-///
-bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
- unsigned Align) {
- bool Changed = false;
-
- // Do alignment for nested loops.
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- Changed |= AlignLoop(MF, *I, Align);
-
- L->getTopBlock()->setAlignment(Align);
- Changed = true;
- ++NumLoopsAligned;
-
- return Changed;
-}
-
-bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
- MLI = &getAnalysis<MachineLoopInfo>();
- if (MLI->empty())
- return false; // No loops.
-
- TLI = MF.getTarget().getTargetLowering();
- TII = MF.getTarget().getInstrInfo();
-
- bool Changed = OptimizeIntraLoopEdges(MF);
-
- Changed |= AlignLoops(MF);
-
- return Changed;
-}
diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp
new file mode 100644
index 0000000000..8a1e2d9c99
--- /dev/null
+++ b/lib/CodeGen/ErlangGC.cpp
@@ -0,0 +1,81 @@
+//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Erlang/OTP runtime-compatible garbage collector
+// (e.g. defines safe points, root initialization etc.)
+//
+// The frametable emitter is in ErlangGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+ class ErlangGC : public GCStrategy {
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const;
+ public:
+ ErlangGC();
+ bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF);
+ };
+
+}
+
+static GCRegistry::Add<ErlangGC>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGC() { }
+
+ErlangGC::ErlangGC() {
+ InitRoots = false;
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+ CustomRoots = false;
+ CustomSafePoints = true;
+}
+
+MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ DebugLoc DL) const {
+ const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+ MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+ return Label;
+}
+
+bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
+ ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+ MI != ME; ++MI)
+
+ if (MI->getDesc().isCall()) {
+
+ // Do not treat tail call sites as safe points.
+ if (MI->getDesc().isTerminator())
+ continue;
+
+ /* Code copied from VisitCallPoint(...) */
+ MachineBasicBlock::iterator RAI = MI; ++RAI;
+ MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc());
+ FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc());
+ }
+
+ return false;
+}
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 3b28e6afb6..7793e96c35 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -77,7 +77,7 @@ bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
/// OrigIdx are also available with the same value at UseIdx.
bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
SlotIndex OrigIdx,
- SlotIndex UseIdx) {
+ SlotIndex UseIdx) const {
OrigIdx = OrigIdx.getRegSlot(true);
UseIdx = UseIdx.getRegSlot(true);
for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 3b09c6b779..cd948e24a6 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1061,7 +1061,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
// Align this block if the layout predecessor's edge into this block is
- // cold relative to the block. When this is true, othe predecessors make up
+ // cold relative to the block. When this is true, other predecessors make up
// all of the hot entries into the block and thus alignment is likely to be
// important.
BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 5e04f2d8a3..04321f3292 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -574,6 +574,54 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
return BV;
}
+unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ unsigned MaxAlign = getMaxAlignment();
+ int Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
+ if (isDeadObjectIndex(i))
+ continue;
+ Offset += getObjectSize(i);
+ unsigned Align = getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ if (adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (adjustsStack() || hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlignment();
+ else
+ StackAlign = TFI->getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+ return (unsigned)Offset;
+}
void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
if (Objects.empty()) return;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index a777f52cb2..1af00e84a6 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -37,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() {
///
void
MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+ assert(RC && RC->isAllocatable() && "Invalid RC for virtual register");
VRegInfo[Reg].first = RC;
}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 103b058c13..c872355e37 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -2182,7 +2182,7 @@ public:
/// Callback to select the highest priority node from the ready Q.
virtual SUnit *pickNode(bool &IsTopNode) {
if (ReadyQ.empty()) return NULL;
- pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
SUnit *SU = ReadyQ.back();
ReadyQ.pop_back();
IsTopNode = false;
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 6e1cad3252..1af65c88ab 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -39,12 +39,9 @@ static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
cl::desc("Disable pre-register allocation tail duplication"));
static cl::opt<bool> DisableBlockPlacement("disable-block-placement",
- cl::Hidden, cl::desc("Disable the probability-driven block placement, and "
- "re-enable the old code placement pass"));
+ cl::Hidden, cl::desc("Disable probability-driven block placement"));
static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
-static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
- cl::desc("Disable code placement"));
static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
cl::desc("Disable Stack Slot Coloring"));
static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
@@ -149,10 +146,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
return applyDisable(TargetID, DisableEarlyTailDup);
if (StandardID == &MachineBlockPlacementID)
- return applyDisable(TargetID, DisableCodePlace);
-
- if (StandardID == &CodePlacementOptID)
- return applyDisable(TargetID, DisableCodePlace);
+ return applyDisable(TargetID, DisableBlockPlacement);
if (StandardID == &StackSlotColoringID)
return applyDisable(TargetID, DisableSSC);
@@ -742,16 +736,7 @@ bool TargetPassConfig::addGCPasses() {
/// Add standard basic block placement passes.
void TargetPassConfig::addBlockPlacement() {
- AnalysisID PassID = 0;
- if (!DisableBlockPlacement) {
- // MachineBlockPlacement is a new pass which subsumes the functionality of
- // CodPlacementOpt. The old code placement pass can be restored by
- // disabling block placement, but eventually it will be removed.
- PassID = addPass(&MachineBlockPlacementID);
- } else {
- PassID = addPass(&CodePlacementOptID);
- }
- if (PassID) {
+ if (addPass(&MachineBlockPlacementID)) {
// Run a separate pass to collect block placement statistics.
if (EnableBlockPlacementStats)
addPass(&MachineBlockPlacementStatsID);
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 32830f57bc..5a168dd244 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -55,7 +55,6 @@ INITIALIZE_PASS_END(PEI, "prologepilog",
"Prologue/Epilogue Insertion & Frame Finalization",
false, false)
-STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
STATISTIC(NumBytesStackSpace,
"Number of bytes used for stack in all functions");
@@ -101,7 +100,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// Allow the target machine to make final modifications to the function
// before the frame layout is finalized.
- TFI->processFunctionBeforeFrameFinalized(Fn);
+ TFI->processFunctionBeforeFrameFinalized(Fn, RS);
// Calculate actual frame offsets for all abstract stack objects...
calculateFrameObjectOffsets(Fn);
@@ -548,9 +547,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
!RegInfo->needsStackRealignment(Fn)) {
- int SFI = RS->getScavengingFrameIndex();
- if (SFI >= 0)
- AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
}
// FIXME: Once this is working, then enable flag will change to a target
@@ -593,7 +594,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
- if (RS && (int)i == RS->getScavengingFrameIndex())
+ if (RS && RS->isScavengingFrameIndex((int)i))
continue;
if (MFI->isDeadObjectIndex(i))
continue;
@@ -615,7 +616,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
- if (RS && (int)i == RS->getScavengingFrameIndex())
+ if (RS && RS->isScavengingFrameIndex((int)i))
continue;
if (MFI->isDeadObjectIndex(i))
continue;
@@ -631,9 +632,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// stack pointer.
if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
!RegInfo->useFPForScavengingIndex(Fn))) {
- int SFI = RS->getScavengingFrameIndex();
- if (SFI >= 0)
- AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
}
if (!TFI.targetHandlesStackFrameRounding()) {
@@ -816,14 +819,20 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
E = Fn.end(); BB != E; ++BB) {
RS->enterBasicBlock(BB);
- unsigned VirtReg = 0;
- unsigned ScratchReg = 0;
int SPAdj = 0;
// The instruction stream may change in the loop, so check BB->end()
// directly.
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
MachineInstr *MI = I;
+ MachineBasicBlock::iterator J = llvm::next(I);
+
+ // RS should process this instruction before we might scavenge at this
+ // location. This is because we might be replacing a virtual register
+ // defined by this instruction, and if so, registers killed by this
+ // instruction are available, and defined registers are not.
+ RS->forward(I);
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
if (MI->getOperand(i).isReg()) {
MachineOperand &MO = MI->getOperand(i);
@@ -833,29 +842,37 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- ++NumVirtualFrameRegs;
-
- // Have we already allocated a scratch register for this virtual?
- if (Reg != VirtReg) {
- // When we first encounter a new virtual register, it
- // must be a definition.
- assert(MI->getOperand(i).isDef() &&
- "frame index virtual missing def!");
- // Scavenge a new scratch register
- VirtReg = Reg;
- const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
- ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
- ++NumScavengedRegs;
- }
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MI->getOperand(i).isDef() &&
+ "frame index virtual missing def!");
+ // Scavenge a new scratch register
+ const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+ unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
+
+ ++NumScavengedRegs;
+
// Replace this reference to the virtual register with the
// scratch register.
assert (ScratchReg && "Missing scratch register!");
- MI->getOperand(i).setReg(ScratchReg);
+ Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
+ // Because this instruction was processed by the RS before this
+ // register was allocated, make sure that the RS now records the
+ // register as being used.
+ RS->setUsed(ScratchReg);
}
}
- RS->forward(I);
- ++I;
+
+ // If the scavenger needed to use one of its spill slots, the
+ // spill code will have been inserted in between I and J. This is a
+ // problem because we need the spill code before I: Move I to just
+ // prior to J.
+ if (I != llvm::prior(J)) {
+ BB->splice(J, BB, I++);
+ RS->skipTo(I == BB->begin() ? NULL : llvm::prior(I));
+ } else
+ ++I;
}
}
}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 6da901f81d..55a66ba548 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -45,9 +45,11 @@ bool RegScavenger::isAliasUsed(unsigned Reg) const {
}
void RegScavenger::initRegState() {
- ScavengedReg = 0;
- ScavengedRC = NULL;
- ScavengeRestore = NULL;
+ for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+ IE = Scavenged.end(); I != IE; ++I) {
+ I->Reg = 0;
+ I->Restore = NULL;
+ }
// All registers started out unused.
RegsAvailable.set();
@@ -121,10 +123,13 @@ void RegScavenger::forward() {
MachineInstr *MI = MBBI;
- if (MI == ScavengeRestore) {
- ScavengedReg = 0;
- ScavengedRC = NULL;
- ScavengeRestore = NULL;
+ for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+ IE = Scavenged.end(); I != IE; ++I) {
+ if (I->Restore != MI)
+ continue;
+
+ I->Reg = 0;
+ I->Restore = NULL;
}
if (MI->isDebugValue())
@@ -145,7 +150,7 @@ void RegScavenger::forward() {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || isReserved(Reg))
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
continue;
if (MO.isUse()) {
@@ -170,7 +175,7 @@ void RegScavenger::forward() {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || isReserved(Reg))
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
continue;
if (MO.isUse()) {
if (MO.isUndef())
@@ -360,37 +365,47 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
return SReg;
}
- assert(ScavengedReg == 0 &&
- "Scavenger slot is live, unable to scavenge another register!");
+ // Find an available scavenging slot.
+ unsigned SI;
+ for (SI = 0; SI < Scavenged.size(); ++SI)
+ if (Scavenged[SI].Reg == 0)
+ break;
+
+ if (SI == Scavenged.size()) {
+ // We need to scavenge a register but have no spill slot, the target
+ // must know how to do it (if not, we'll assert below).
+ Scavenged.push_back(ScavengedInfo());
+ }
// Avoid infinite regress
- ScavengedReg = SReg;
+ Scavenged[SI].Reg = SReg;
// If the target knows how to save/restore the register, let it do so;
// otherwise, use the emergency stack spill slot.
if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
// Spill the scavenged register before I.
- assert(ScavengingFrameIndex >= 0 &&
+ assert(Scavenged[SI].FrameIndex >= 0 &&
"Cannot scavenge register without an emergency spill slot!");
- TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
+ TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
+ RC, TRI);
MachineBasicBlock::iterator II = prior(I);
unsigned FIOperandNum = getFrameIndexOperandNum(II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
// Restore the scavenged register before its use (or first terminator).
- TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
+ TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex,
+ RC, TRI);
II = prior(UseMI);
FIOperandNum = getFrameIndexOperandNum(II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
}
- ScavengeRestore = prior(UseMI);
+ Scavenged[SI].Restore = prior(UseMI);
// Doing this here leads to infinite regress.
- // ScavengedReg = SReg;
- ScavengedRC = RC;
+ // Scavenged[SI].Reg = SReg;
DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
"\n");
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 716fb93b29..ff98a04c5b 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4496,8 +4496,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
NegOne, DAG.getConstant(0, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+ if (!VT.isVector() && (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))))
return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
DAG.getSetCC(N->getDebugLoc(),
TLI.getSetCCResultType(VT),
@@ -5835,14 +5835,25 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
+ // No FP constant should be created after legalization as Instruction
+ // Selection pass has hard time in dealing with FP constant.
+ //
+ // We don't need test this condition for transformation like following, as
+ // the DAG being transformed implies it is legal to take FP constant as
+ // operand.
+ //
+ // (fadd (fmul c, x), x) -> (fmul c+1, x)
+ //
+ bool AllowNewFpConst = (Level < AfterLegalizeDAG);
+
// If allow, fold (fadd (fneg x), x) -> 0.0
- if (DAG.getTarget().Options.UnsafeFPMath &&
+ if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) {
return DAG.getConstantFP(0.0, VT);
}
// If allow, fold (fadd x, (fneg x)) -> 0.0
- if (DAG.getTarget().Options.UnsafeFPMath &&
+ if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) {
return DAG.getConstantFP(0.0, VT);
}
@@ -5944,7 +5955,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
}
- if (N0.getOpcode() == ISD::FADD) {
+ if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul 3.0, x)
if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
@@ -5954,7 +5965,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
}
- if (N1.getOpcode() == ISD::FADD) {
+ if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul 3.0, x)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
@@ -5965,7 +5976,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
// (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
- if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+ if (AllowNewFpConst &&
+ N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
@@ -6811,9 +6823,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
MVT::Other, Chain, Tmp, N2);
}
- // visitXOR has changed XOR's operands.
- Op0 = TheXor->getOperand(0);
- Op1 = TheXor->getOperand(1);
+ // visitXOR has changed XOR's operands or replaced the XOR completely,
+ // bail out.
+ return SDValue(N, 0);
}
}
@@ -7699,16 +7711,82 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
return SDValue();
}
-/// Returns the base pointer and an integer offset from that object.
-static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) {
- if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) {
- int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
- SDValue Base = Ptr->getOperand(0);
- return std::make_pair(Base, Offset);
+/// Helper struct to parse and store a memory address as base + index + offset.
+/// We ignore sign extensions when it is safe to do so.
+/// The following two expressions are not equivalent. To differentiate we need
+/// to store whether there was a sign extension involved in the index
+/// computation.
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (add (i8 load %index)
+/// (i8 1))))
+/// vs
+///
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
+/// (i32 1)))))
+struct BaseIndexOffset {
+ SDValue Base;
+ SDValue Index;
+ int64_t Offset;
+ bool IsIndexSignExt;
+
+ BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
+
+ BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
+ bool IsIndexSignExt) :
+ Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
+
+ bool equalBaseIndex(const BaseIndexOffset &Other) {
+ return Other.Base == Base && Other.Index == Index &&
+ Other.IsIndexSignExt == IsIndexSignExt;
}
- return std::make_pair(Ptr, 0);
-}
+ /// Parses tree in Ptr for base, index, offset addresses.
+ static BaseIndexOffset match(SDValue Ptr) {
+ bool IsIndexSignExt = false;
+
+ // Just Base or possibly anything else.
+ if (Ptr->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+ // Base + offset.
+ if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
+ int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+ return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
+ IsIndexSignExt);
+ }
+
+ // Look at Base + Index + Offset cases.
+ SDValue Base = Ptr->getOperand(0);
+ SDValue IndexOffset = Ptr->getOperand(1);
+
+ // Skip signextends.
+ if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
+ IndexOffset = IndexOffset->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Either the case of Base + Index (no offset) or something else.
+ if (IndexOffset->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
+
+ // Now we have the case of Base + Index + offset.
+ SDValue Index = IndexOffset->getOperand(0);
+ SDValue Offset = IndexOffset->getOperand(1);
+
+ if (!isa<ConstantSDNode>(Offset))
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+ // Ignore signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else IsIndexSignExt = false;
+
+ int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
+ return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
+ }
+};
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
@@ -7755,16 +7833,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
return false;
- // This holds the base pointer and the offset in bytes from the base pointer.
- std::pair<SDValue, int64_t> BasePtr =
- GetPointerBaseAndOffset(St->getBasePtr());
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
// We must have a base and an offset.
- if (!BasePtr.first.getNode())
+ if (!BasePtr.Base.getNode())
return false;
// Do not handle stores to undef base pointers.
- if (BasePtr.first.getOpcode() == ISD::UNDEF)
+ if (BasePtr.Base.getOpcode() == ISD::UNDEF)
return false;
// Save the LoadSDNodes that we find in the chain.
@@ -7786,11 +7864,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
break;
// Find the base pointer and offset for this memory node.
- std::pair<SDValue, int64_t> Ptr =
- GetPointerBaseAndOffset(Index->getBasePtr());
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
// Check that the base pointer is the same as the original one.
- if (Ptr.first.getNode() != BasePtr.first.getNode())
+ if (!Ptr.equalBaseIndex(BasePtr))
break;
// Check that the alignment is the same.
@@ -7816,7 +7893,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
break;
// We found a potential memory operand to merge.
- StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++));
+ StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
// Find the next memory operand in the chain. If the next operand in the
// chain is a store then move up and continue the scan with the next
@@ -8013,7 +8090,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Find acceptable loads. Loads need to have the same chain (token factor),
// must not be zext, volatile, indexed, and they must be consecutive.
- SDValue LdBasePtr;
+ BaseIndexOffset LdBasePtr;
for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
@@ -8039,21 +8116,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (Ld->getMemoryVT() != MemVT)
break;
- std::pair<SDValue, int64_t> LdPtr =
- GetPointerBaseAndOffset(Ld->getBasePtr());
-
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
// If this is not the first ptr that we check.
- if (LdBasePtr.getNode()) {
+ if (LdBasePtr.Base.getNode()) {
// The base ptr must be the same.
- if (LdPtr.first != LdBasePtr)
+ if (!LdPtr.equalBaseIndex(LdBasePtr))
break;
} else {
// Check that all other base pointers are the same as this one.
- LdBasePtr = LdPtr.first;
+ LdBasePtr = LdPtr;
}
// We found a potential memory operand to merge.
- LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0));
+ LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
}
if (LoadNodes.size() < 2)
@@ -8978,12 +9053,32 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
+ if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+ if (V->getOperand(0).getValueType() != NVT)
+ return SDValue();
+ unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned NumElems = NVT.getVectorNumElements();
+ assert((Idx % NumElems) == 0 &&
+ "IDX in concat is not a multiple of the result vector length.");
+ return V->getOperand(Idx / NumElems);
+ }
+
+ // Skip bitcasting
+ if (V->getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ DebugLoc dl = N->getDebugLoc();
// Handle only simple case where vector being inserted and vector
// being extracted are of same type, and are half size of larger vectors.
EVT BigVT = V->getOperand(0).getValueType();
EVT SmallVT = V->getOperand(1).getValueType();
- if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+ if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
return SDValue();
// Only handle cases where both indexes are constants with the same type.
@@ -8996,30 +9091,18 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
- // indices are equal => V1
+ // indices are equal or bit offsets are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
- if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
- return V->getOperand(1);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
- V->getOperand(0), N->getOperand(1));
+ if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
+ ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
+ DAG.getNode(ISD::BITCAST, dl,
+ N->getOperand(0).getValueType(),
+ V->getOperand(0)), N->getOperand(1));
}
}
- if (V->getOpcode() == ISD::CONCAT_VECTORS) {
- // Combine:
- // (extract_subvec (concat V1, V2, ...), i)
- // Into:
- // Vi if possible
- // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
- if (V->getOperand(0).getValueType() != NVT)
- return SDValue();
- unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- unsigned NumElems = NVT.getVectorNumElements();
- assert((Idx % NumElems) == 0 &&
- "IDX in concat is not a multiple of the result vector length.");
- return V->getOperand(Idx / NumElems);
- }
-
return SDValue();
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 10e2dc6149..9ac738e507 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1183,6 +1183,8 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
VT.getSizeInBits());
MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+ assert (MaterialReg != 0 && "Unable to materialize imm.");
+ if (MaterialReg == 0) return 0;
}
return FastEmit_rr(VT, VT, Opcode,
Op0, Op0IsKill,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index e26d1656e8..b6436bf427 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -735,6 +735,9 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
SDValue &OpEntry = PromotedIntegers[Op];
assert(OpEntry.getNode() == 0 && "Node is already promoted!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
@@ -746,6 +749,9 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
SDValue &OpEntry = SoftenedFloats[Op];
assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
@@ -760,6 +766,9 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
SDValue &OpEntry = ScalarizedVectors[Op];
assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
@@ -787,6 +796,10 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
assert(Entry.first.getNode() == 0 && "Node already expanded");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
@@ -814,6 +827,10 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
assert(Entry.first.getNode() == 0 && "Node already expanded");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
@@ -843,6 +860,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
assert(Entry.first.getNode() == 0 && "Node already split");
Entry.first = Lo;
Entry.second = Hi;
+
+ // Propagate ordering
+ DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+ DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
}
void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
@@ -854,6 +875,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
SDValue &OpEntry = WidenedVectors[Op];
assert(OpEntry.getNode() == 0 && "Node already widened!");
OpEntry = Result;
+
+ // Propagate node ordering
+ DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
}
@@ -919,8 +943,11 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
// Make everything that once used N's values now use those in Results instead.
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
- for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
ReplaceValueWith(SDValue(N, i), Results[i]);
+ // Propagate node ordering
+ DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N));
+ }
return true;
}
diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
index d2269f8acc..7e7b8974be 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -33,8 +33,10 @@ class SDNodeOrdering {
public:
SDNodeOrdering() {}
- void add(const SDNode *Node, unsigned O) {
- OrderMap[Node] = O;
+ void add(const SDNode *Node, unsigned NewOrder) {
+ unsigned &OldOrder = OrderMap[Node];
+ if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder))
+ OldOrder = NewOrder;
}
void remove(const SDNode *Node) {
DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 35707e86ce..64244313a3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1917,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
}
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
- if (ISD::isZEXTLoad(Op.getNode())) {
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarType().getSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
@@ -2287,17 +2288,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
break;
}
- // Handle LOADX separately here. EXTLOAD case will fallthrough.
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
- unsigned ExtType = LD->getExtensionType();
- switch (ExtType) {
- default: break;
- case ISD::SEXTLOAD: // '17' bits known
- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
- return VTBits-Tmp+1;
- case ISD::ZEXTLOAD: // '16' bits known
- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
- return VTBits-Tmp;
+ // If we are looking at the loaded value of the SDNode.
+ if (Op.getResNo() == 0) {
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp;
+ }
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 33d100eb3a..ce40cd6a0c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4914,7 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
- TLI.isOperationLegalOrCustom(ISD::FMA, VT) &&
TLI.isFMAFasterThanMulAndAdd(VT)){
setValue(&I, DAG.getNode(ISD::FMA, dl,
getValue(I.getArgOperand(0)).getValueType(),
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 39a1f8a3d0..eeea9e4cfc 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -362,6 +362,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TargetSubtargetInfo &ST =
const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>());
ST.resetSubtargetFeatures(MF);
+ TM.resetTargetOptions(MF);
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -784,8 +785,12 @@ void SelectionDAGISel::DoInstructionSelection() {
if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
continue;
// Replace node.
- if (ResNode)
+ if (ResNode) {
+ // Propagate ordering
+ CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node));
+
ReplaceUses(Node, ResNode);
+ }
// If after the replacement this node is not used any more,
// remove this dead node.
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 320128a999..c5bbba3ffc 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -29,6 +29,7 @@
#define DEBUG_TYPE "spillplacement"
#include "SpillPlacement.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index ec44b8cb59..a789a2596d 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -67,14 +67,14 @@ DisableColoring("no-stack-coloring",
/// code. If this flag is enabled, we try to save the user.
static cl::opt<bool>
ProtectFromEscapedAllocas("protect-from-escaped-allocas",
- cl::init(false), cl::Hidden,
- cl::desc("Do not optimize lifetime zones that are broken"));
+ cl::init(false), cl::Hidden,
+ cl::desc("Do not optimize lifetime zones that "
+ "are broken"));
STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
-STATISTIC(EscapedAllocas,
- "Number of allocas that escaped the lifetime region");
+STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
//===----------------------------------------------------------------------===//
// StackColoring Pass
@@ -577,7 +577,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
SlotIndex Index = Indexes->getInstructionIndex(I);
LiveInterval *Interval = Intervals[FromSlot];
assert(Interval->find(Index) != Interval->end() &&
- "Found instruction usage outside of live range.");
+ "Found instruction usage outside of live range.");
}
#endif
@@ -741,9 +741,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
SlotSizeSorter(MFI));
- bool Chanded = true;
- while (Chanded) {
- Chanded = false;
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
for (unsigned I = 0; I < NumSlots; ++I) {
if (SortedSlots[I] == -1)
continue;
@@ -760,7 +760,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// Merge disjoint slots.
if (!First->overlaps(*Second)) {
- Chanded = true;
+ Changed = true;
First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
SlotRemap[SecondSlot] = FirstSlot;
SortedSlots[J] = -1;
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 3c346766ad..f42bdbd276 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -710,7 +710,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
= MaxStoresPerMemmoveOptSize = 4;
- BenefitFromCodePlacementOpt = false;
UseUnderscoreSetJmp = false;
UseUnderscoreLongJmp = false;
SelectIsExpensive = false;
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 9a9ddc9b48..3bdca4c640 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -742,8 +742,11 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
return ".text$";
if (Kind.isBSS ())
return ".bss$";
- if (Kind.isThreadLocal())
- return ".tls$";
+ if (Kind.isThreadLocal()) {
+ // 'LLVM' is just an arbitary string to ensure that the section name gets
+ // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker.
+ return ".tls$LLVM";
+ }
if (Kind.isWriteable())
return ".data$";
return ".rdata$";
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index f31f67d58c..783bfa1c1a 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -240,7 +240,10 @@ unsigned TargetSchedModel::computeOperandLatency(
report_fatal_error(ss.str());
}
#endif
- return DefMI->isTransient() ? 0 : 1;
+ // FIXME: Automatically giving all implicit defs defaultDefLatency is
+ // undesirable. We should only do it for defs that are known to the MC
+ // desc like flags. Truly implicit defs should get 1 cycle latency.
+ return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI);
}
unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp
index 2efbfd1f92..7dff9ff49a 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.cpp
+++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp
@@ -16,7 +16,7 @@ using namespace llvm;
void DWARFDebugArangeSet::clear() {
Offset = -1U;
- std::memset(&Header, 0, sizeof(Header));
+ std::memset(&HeaderData, 0, sizeof(Header));
ArangeDescriptors.clear();
}
@@ -66,15 +66,15 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
// descriptor on the target system. This header is followed by a series
// of tuples. Each tuple consists of an address and a length, each in
// the size appropriate for an address on the target architecture.
- Header.Length = data.getU32(offset_ptr);
- Header.Version = data.getU16(offset_ptr);
- Header.CuOffset = data.getU32(offset_ptr);
- Header.AddrSize = data.getU8(offset_ptr);
- Header.SegSize = data.getU8(offset_ptr);
+ HeaderData.Length = data.getU32(offset_ptr);
+ HeaderData.Version = data.getU16(offset_ptr);
+ HeaderData.CuOffset = data.getU32(offset_ptr);
+ HeaderData.AddrSize = data.getU8(offset_ptr);
+ HeaderData.SegSize = data.getU8(offset_ptr);
// Perform basic validation of the header fields.
- if (!data.isValidOffsetForDataOfSize(Offset, Header.Length) ||
- (Header.AddrSize != 4 && Header.AddrSize != 8)) {
+ if (!data.isValidOffsetForDataOfSize(Offset, HeaderData.Length) ||
+ (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)) {
clear();
return false;
}
@@ -84,7 +84,7 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
// size of an address). The header is padded, if necessary, to the
// appropriate boundary.
const uint32_t header_size = *offset_ptr - Offset;
- const uint32_t tuple_size = Header.AddrSize * 2;
+ const uint32_t tuple_size = HeaderData.AddrSize * 2;
uint32_t first_tuple_offset = 0;
while (first_tuple_offset < header_size)
first_tuple_offset += tuple_size;
@@ -94,11 +94,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
Descriptor arangeDescriptor;
assert(sizeof(arangeDescriptor.Address) == sizeof(arangeDescriptor.Length));
- assert(sizeof(arangeDescriptor.Address) >= Header.AddrSize);
+ assert(sizeof(arangeDescriptor.Address) >= HeaderData.AddrSize);
while (data.isValidOffset(*offset_ptr)) {
- arangeDescriptor.Address = data.getUnsigned(offset_ptr, Header.AddrSize);
- arangeDescriptor.Length = data.getUnsigned(offset_ptr, Header.AddrSize);
+ arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
+ arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
// Each set of tuples is terminated by a 0 for the address and 0
// for the length.
@@ -115,11 +115,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
void DWARFDebugArangeSet::dump(raw_ostream &OS) const {
OS << format("Address Range Header: length = 0x%8.8x, version = 0x%4.4x, ",
- Header.Length, Header.Version)
+ HeaderData.Length, HeaderData.Version)
<< format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n",
- Header.CuOffset, Header.AddrSize, Header.SegSize);
+ HeaderData.CuOffset, HeaderData.AddrSize, HeaderData.SegSize);
- const uint32_t hex_width = Header.AddrSize * 2;
+ const uint32_t hex_width = HeaderData.AddrSize * 2;
for (DescriptorConstIter pos = ArangeDescriptors.begin(),
end = ArangeDescriptors.end(); pos != end; ++pos)
OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address)
@@ -145,7 +145,7 @@ uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const {
std::find_if(ArangeDescriptors.begin(), end, // Range
DescriptorContainsAddress(address)); // Predicate
if (pos != end)
- return Header.CuOffset;
+ return HeaderData.CuOffset;
return -1U;
}
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h
index 9a2a6d0f00..d76867615a 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.h
+++ b/lib/DebugInfo/DWARFDebugArangeSet.h
@@ -48,7 +48,7 @@ private:
typedef DescriptorColl::const_iterator DescriptorConstIter;
uint32_t Offset;
- Header Header;
+ Header HeaderData;
DescriptorColl ArangeDescriptors;
public:
@@ -58,11 +58,11 @@ public:
bool extract(DataExtractor data, uint32_t *offset_ptr);
void dump(raw_ostream &OS) const;
- uint32_t getCompileUnitDIEOffset() const { return Header.CuOffset; }
- uint32_t getOffsetOfNextEntry() const { return Offset + Header.Length + 4; }
+ uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; }
+ uint32_t getOffsetOfNextEntry() const { return Offset + HeaderData.Length + 4; }
uint32_t findAddress(uint64_t address) const;
uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); }
- const struct Header &getHeader() const { return Header; }
+ const struct Header &getHeader() const { return HeaderData; }
const Descriptor *getDescriptor(uint32_t i) const {
if (i < ArangeDescriptors.size())
return &ArangeDescriptors[i];
diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp
index b077eb5e38..f79862d606 100644
--- a/lib/DebugInfo/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -186,7 +186,7 @@ uint32_t DWARFDebugAranges::findAddress(uint64_t address) const {
Range range(address);
RangeCollIterator begin = Aranges.begin();
RangeCollIterator end = Aranges.end();
- RangeCollIterator pos = lower_bound(begin, end, range, RangeLessThan);
+ RangeCollIterator pos = std::lower_bound(begin, end, range, RangeLessThan);
if (pos != end && pos->LoPC <= address && address < pos->HiPC()) {
return pos->Offset;
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 3d59d251a0..906a3a3fda 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -535,6 +535,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
if (isa<UndefValue>(C)) {
GenericValue Result;
switch (C->getType()->getTypeID()) {
+ default:
+ break;
case Type::IntegerTyID:
case Type::X86_FP80TyID:
case Type::FP128TyID:
@@ -543,7 +545,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
// with the correct bit width.
Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0);
break;
- default:
+ case Type::VectorTyID:
+ // if the whole vector is 'undef' just reserve memory for the value.
+ const VectorType* VTy = dyn_cast<VectorType>(C->getType());
+ const Type *ElemTy = VTy->getElementType();
+ unsigned int elemNum = VTy->getNumElements();
+ Result.AggregateVal.resize(elemNum);
+ if (ElemTy->isIntegerTy())
+ for (unsigned int i = 0; i < elemNum; ++i)
+ Result.AggregateVal[i].IntVal =
+ APInt(ElemTy->getPrimitiveSizeInBits(), 0);
break;
}
return Result;
@@ -825,6 +836,101 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
else
llvm_unreachable("Unknown constant pointer type!");
break;
+ case Type::VectorTyID: {
+ unsigned elemNum;
+ Type* ElemTy;
+ const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
+ const ConstantVector *CV = dyn_cast<ConstantVector>(C);
+ const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(C);
+
+ if (CDV) {
+ elemNum = CDV->getNumElements();
+ ElemTy = CDV->getElementType();
+ } else if (CV || CAZ) {
+ VectorType* VTy = dyn_cast<VectorType>(C->getType());
+ elemNum = VTy->getNumElements();
+ ElemTy = VTy->getElementType();
+ } else {
+ llvm_unreachable("Unknown constant vector type!");
+ }
+
+ Result.AggregateVal.resize(elemNum);
+ // Check if vector holds floats.
+ if(ElemTy->isFloatTy()) {
+ if (CAZ) {
+ GenericValue floatZero;
+ floatZero.FloatVal = 0.f;
+ std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+ floatZero);
+ break;
+ }
+ if(CV) {
+ for (unsigned i = 0; i < elemNum; ++i)
+ if (!isa<UndefValue>(CV->getOperand(i)))
+ Result.AggregateVal[i].FloatVal = cast<ConstantFP>(
+ CV->getOperand(i))->getValueAPF().convertToFloat();
+ break;
+ }
+ if(CDV)
+ for (unsigned i = 0; i < elemNum; ++i)
+ Result.AggregateVal[i].FloatVal = CDV->getElementAsFloat(i);
+
+ break;
+ }
+ // Check if vector holds doubles.
+ if (ElemTy->isDoubleTy()) {
+ if (CAZ) {
+ GenericValue doubleZero;
+ doubleZero.DoubleVal = 0.0;
+ std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+ doubleZero);
+ break;
+ }
+ if(CV) {
+ for (unsigned i = 0; i < elemNum; ++i)
+ if (!isa<UndefValue>(CV->getOperand(i)))
+ Result.AggregateVal[i].DoubleVal = cast<ConstantFP>(
+ CV->getOperand(i))->getValueAPF().convertToDouble();
+ break;
+ }
+ if(CDV)
+ for (unsigned i = 0; i < elemNum; ++i)
+ Result.AggregateVal[i].DoubleVal = CDV->getElementAsDouble(i);
+
+ break;
+ }
+ // Check if vector holds integers.
+ if (ElemTy->isIntegerTy()) {
+ if (CAZ) {
+ GenericValue intZero;
+ intZero.IntVal = APInt(ElemTy->getScalarSizeInBits(), 0ull);
+ std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+ intZero);
+ break;
+ }
+ if(CV) {
+ for (unsigned i = 0; i < elemNum; ++i)
+ if (!isa<UndefValue>(CV->getOperand(i)))
+ Result.AggregateVal[i].IntVal = cast<ConstantInt>(
+ CV->getOperand(i))->getValue();
+ else {
+ Result.AggregateVal[i].IntVal =
+ APInt(CV->getOperand(i)->getType()->getPrimitiveSizeInBits(), 0);
+ }
+ break;
+ }
+ if(CDV)
+ for (unsigned i = 0; i < elemNum; ++i)
+ Result.AggregateVal[i].IntVal = APInt(
+ CDV->getElementType()->getPrimitiveSizeInBits(),
+ CDV->getElementAsInteger(i));
+
+ break;
+ }
+ llvm_unreachable("Unknown constant pointer type!");
+ }
+ break;
+
default:
SmallString<256> Msg;
raw_svector_ostream OS(Msg);
@@ -866,6 +972,9 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty);
switch (Ty->getTypeID()) {
+ default:
+ dbgs() << "Cannot store value of type " << *Ty << "!\n";
+ break;
case Type::IntegerTyID:
StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes);
break;
@@ -885,8 +994,19 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
*((PointerTy*)Ptr) = Val.PointerVal;
break;
- default:
- dbgs() << "Cannot store value of type " << *Ty << "!\n";
+ case Type::VectorTyID:
+ for (unsigned i = 0; i < Val.AggregateVal.size(); ++i) {
+ if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+ *(((double*)Ptr)+i) = Val.AggregateVal[i].DoubleVal;
+ if (cast<VectorType>(Ty)->getElementType()->isFloatTy())
+ *(((float*)Ptr)+i) = Val.AggregateVal[i].FloatVal;
+ if (cast<VectorType>(Ty)->getElementType()->isIntegerTy()) {
+ unsigned numOfBytes =(Val.AggregateVal[i].IntVal.getBitWidth()+7)/8;
+ StoreIntToMemory(Val.AggregateVal[i].IntVal,
+ (uint8_t*)Ptr + numOfBytes*i, numOfBytes);
+ }
+ }
+ break;
}
if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian())
@@ -951,6 +1071,31 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
Result.IntVal = APInt(80, y);
break;
}
+ case Type::VectorTyID: {
+ const VectorType *VT = cast<VectorType>(Ty);
+ const Type *ElemT = VT->getElementType();
+ const unsigned numElems = VT->getNumElements();
+ if (ElemT->isFloatTy()) {
+ Result.AggregateVal.resize(numElems);
+ for (unsigned i = 0; i < numElems; ++i)
+ Result.AggregateVal[i].FloatVal = *((float*)Ptr+i);
+ }
+ if (ElemT->isDoubleTy()) {
+ Result.AggregateVal.resize(numElems);
+ for (unsigned i = 0; i < numElems; ++i)
+ Result.AggregateVal[i].DoubleVal = *((double*)Ptr+i);
+ }
+ if (ElemT->isIntegerTy()) {
+ GenericValue intZero;
+ const unsigned elemBitWidth = cast<IntegerType>(ElemT)->getBitWidth();
+ intZero.IntVal = APInt(elemBitWidth, 0);
+ Result.AggregateVal.resize(numElems, intZero);
+ for (unsigned i = 0; i < numElems; ++i)
+ LoadIntFromMemory(Result.AggregateVal[i].IntVal,
+ (uint8_t*)Ptr+((elemBitWidth+7)/8)*i, (elemBitWidth+7)/8);
+ }
+ break;
+ }
default:
SmallString<256> Msg;
raw_svector_ostream OS(Msg);
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index ec4f7f6813..526c04e082 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1187,6 +1187,39 @@ void Interpreter::visitVAArgInst(VAArgInst &I) {
++VAList.UIntPairVal.second;
}
+void Interpreter::visitExtractElementInst(ExtractElementInst &I) {
+ ExecutionContext &SF = ECStack.back();
+ GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+ GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+ GenericValue Dest;
+
+ Type *Ty = I.getType();
+ const unsigned indx = unsigned(Src2.IntVal.getZExtValue());
+
+ if(Src1.AggregateVal.size() > indx) {
+ switch (Ty->getTypeID()) {
+ default:
+ dbgs() << "Unhandled destination type for extractelement instruction: "
+ << *Ty << "\n";
+ llvm_unreachable(0);
+ break;
+ case Type::IntegerTyID:
+ Dest.IntVal = Src1.AggregateVal[indx].IntVal;
+ break;
+ case Type::FloatTyID:
+ Dest.FloatVal = Src1.AggregateVal[indx].FloatVal;
+ break;
+ case Type::DoubleTyID:
+ Dest.DoubleVal = Src1.AggregateVal[indx].DoubleVal;
+ break;
+ }
+ } else {
+ dbgs() << "Invalid index in extractelement instruction\n";
+ }
+
+ SetValue(&I, Dest, SF);
+}
+
GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
ExecutionContext &SF) {
switch (CE->getOpcode()) {
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index e95db2fc4e..2952d7eabe 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -178,6 +178,7 @@ public:
void visitAShr(BinaryOperator &I);
void visitVAArgInst(VAArgInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
void visitInstruction(Instruction &I) {
errs() << I << "\n";
llvm_unreachable("Instruction not interpretable yet!");
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index ed2bf05e90..2d828914cd 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -649,6 +649,13 @@ AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
}
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+ StringRef Kind) const {
+ llvm::AttrBuilder B;
+ B.addAttribute(Kind);
+ return addAttributes(C, Idx, AttributeSet::get(C, Idx, B));
+}
+
AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
AttributeSet Attrs) const {
if (!pImpl) return Attrs;
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 0c7effb5ca..1abb656435 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -47,6 +47,16 @@ bool Constant::isNegativeZeroValue() const {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
return CFP->isZero() && CFP->isNegative();
+ // Equivalent for a vector of -0.0's.
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
+ if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative())
+ return true;
+
+ // We've already handled true FP case; any other FP vectors can't represent -0.0.
+ if (getType()->isFPOrFPVectorTy())
+ return false;
+
// Otherwise, just use +0.0.
return isNullValue();
}
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 5ee36abc6b..0d18bed001 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -71,6 +71,16 @@ static MDNode *getNonCompileUnitScope(MDNode *N) {
return N;
}
+static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename,
+ StringRef Directory) {
+ assert(!Filename.empty() && "Unable to create file without name");
+ Value *Pair[] = {
+ MDString::get(VMContext, Filename),
+ MDString::get(VMContext, Directory),
+ };
+ return MDNode::get(VMContext, Pair);
+}
+
/// createCompileUnit - A CompileUnit provides an anchor for all debugging
/// information generated during this instance of compilation.
void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
@@ -93,13 +103,9 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ createFilePathPair(VMContext, Filename, Directory),
ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
- MDString::get(VMContext, Filename),
- MDString::get(VMContext, Directory),
MDString::get(VMContext, Producer),
- // isMain field can be removed when we remove the legacy debug info.
- ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
MDString::get(VMContext, Flags),
ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer),
@@ -119,13 +125,9 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
/// createFile - Create a file descriptor to hold debugging information
/// for a file.
DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
- assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
- assert(!Filename.empty() && "Unable to create file without name");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
- MDString::get(VMContext, Filename),
- MDString::get(VMContext, Directory),
- NULL // TheCU
+ createFilePathPair(VMContext, Filename, Directory)
};
return DIFile(MDNode::get(VMContext, Elts));
}
@@ -148,9 +150,9 @@ DIType DIBuilder::createNullPtrType(StringRef Name) {
// ,size, alignment, offset and flags are always empty here.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type),
+ NULL, // Filename
NULL, //TheCU,
MDString::get(VMContext, Name),
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -171,9 +173,9 @@ DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
// offset and flags are always empty here.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
+ NULL, // File/directory name
NULL, //TheCU,
MDString::get(VMContext, Name),
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -190,9 +192,9 @@ DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
// Qualified types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
+ NULL, // Filename
NULL, //TheCU,
MDString::get(VMContext, StringRef()), // Empty name.
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -210,9 +212,9 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
// Pointer types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
+ NULL, // Filename
NULL, //TheCU,
MDString::get(VMContext, Name),
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -227,9 +229,9 @@ DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base)
// Pointer types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type),
+ NULL, // Filename
NULL, //TheCU,
NULL,
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
@@ -248,9 +250,9 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
// References are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
+ NULL, // Filename
NULL, // TheCU,
NULL, // Name
- NULL, // Filename
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -268,9 +270,9 @@ DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
assert(Ty.Verify() && "Invalid typedef type!");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
+ File.getFileNode(),
getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -288,9 +290,9 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
assert(FriendTy.Verify() && "Invalid friend type!");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_friend),
+ NULL,
Ty,
NULL, // Name
- Ty.getFile(),
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -309,9 +311,9 @@ DIDerivedType DIBuilder::createInheritance(
// TAG_inheritance is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
+ NULL,
Ty,
NULL, // Name
- Ty.getFile(),
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -330,9 +332,9 @@ DIDerivedType DIBuilder::createMemberType(
// TAG_member is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -353,9 +355,9 @@ DIType DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name,
Flags |= DIDescriptor::FlagStaticMember;
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), 0/*SizeInBits*/),
ConstantInt::get(Type::getInt64Ty(VMContext), 0/*AlignInBits*/),
@@ -379,9 +381,9 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
// TAG_member is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
getNonCompileUnitScope(File),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -406,9 +408,9 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
// TAG_member is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File.getFileNode(),
getNonCompileUnitScope(File),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -480,21 +482,23 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
}
/// createClassType - Create debugging information entry for a class.
-DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
- DIFile File, unsigned LineNumber,
- uint64_t SizeInBits, uint64_t AlignInBits,
- uint64_t OffsetInBits, unsigned Flags,
- DIType DerivedFrom, DIArray Elements,
- MDNode *VTableHolder,
- MDNode *TemplateParams) {
+DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ uint64_t OffsetInBits,
+ unsigned Flags, DIType DerivedFrom,
+ DIArray Elements,
+ MDNode *VTableHolder,
+ MDNode *TemplateParams) {
assert((!Context || Context.Verify()) &&
"createClassType should be called with a valid Context");
// TAG_class_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
+ File.getFileNode(),
getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -506,7 +510,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
VTableHolder,
TemplateParams
};
- DIType R(MDNode::get(VMContext, Elts));
+ DICompositeType R(MDNode::get(VMContext, Elts));
assert(R.Verify() && "createClassType should return a verifiable DIType");
return R;
}
@@ -524,9 +528,9 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context,
// TAG_structure_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
+ File.getFileNode(),
getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -551,9 +555,9 @@ DICompositeType DIBuilder::createUnionType(
// TAG_union_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
+ File.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -574,8 +578,8 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
- MDString::get(VMContext, ""),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ MDString::get(VMContext, ""),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), 0),
@@ -598,9 +602,9 @@ DICompositeType DIBuilder::createEnumerationType(
// TAG_enumeration_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
+ File.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -622,9 +626,9 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
// TAG_array_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+ NULL, // Filename/Directory,
NULL, //TheCU,
MDString::get(VMContext, ""),
- NULL, //TheCU,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), Size),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -645,9 +649,9 @@ DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
// A vector is an array type with the FlagVector flag applied.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+ NULL, // Filename/Directory,
NULL, //TheCU,
MDString::get(VMContext, ""),
- NULL, //TheCU,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt64Ty(VMContext), Size),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -725,29 +729,6 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() {
return DIDescriptor(MDNode::get(VMContext, Elts));
}
-/// createTemporaryType - Create a temporary forward-declared type.
-DIType DIBuilder::createTemporaryType() {
- // Give the temporary MDNode a tag. It doesn't matter what tag we
- // use here as long as DIType accepts it.
- Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
- MDNode *Node = MDNode::getTemporary(VMContext, Elts);
- return DIType(Node);
-}
-
-/// createTemporaryType - Create a temporary forward-declared type.
-DIType DIBuilder::createTemporaryType(DIFile F) {
- // Give the temporary MDNode a tag. It doesn't matter what tag we
- // use here as long as DIType accepts it.
- Value *Elts[] = {
- GetTagConstant(VMContext, DW_TAG_base_type),
- TheCU,
- NULL,
- F
- };
- MDNode *Node = MDNode::getTemporary(VMContext, Elts);
- return DIType(Node);
-}
-
/// createForwardDecl - Create a temporary forward-declared type that
/// can be RAUW'd if the full type is seen.
DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name,
@@ -758,9 +739,9 @@ DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name,
// Create a temporary MDNode.
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
+ F.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- F,
ConstantInt::get(Type::getInt32Ty(VMContext), Line),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -798,17 +779,18 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
return DISubrange(MDNode::get(VMContext, Elts));
}
-/// createGlobalVariable - Create a new descriptor for the specified global.
+/// \brief Create a new descriptor for the specified global.
DIGlobalVariable DIBuilder::
-createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
- DIType Ty, bool isLocalToUnit, Value *Val) {
+createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile F,
+ unsigned LineNumber, DIType Ty, bool isLocalToUnit,
+ Value *Val) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_variable),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
NULL, // TheCU,
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
- MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
Ty,
@@ -822,6 +804,14 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
return DIGlobalVariable(Node);
}
+/// \brief Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit, Value *Val) {
+ return createGlobalVariable(Name, Name, F, LineNumber, Ty, isLocalToUnit,
+ Val);
+}
+
/// createStaticVariable - Create a new descriptor for the specified static
/// variable.
DIGlobalVariable DIBuilder::
@@ -920,12 +910,11 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ File.getFileNode(),
getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
MDString::get(VMContext, LinkageName),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
Ty,
ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
@@ -946,7 +935,9 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
// Create a named metadata so that we do not lose this mdnode.
if (isDefinition)
AllSubprograms.push_back(Node);
- return DISubprogram(Node);
+ DISubprogram S(Node);
+ assert(S.Verify() && "createFunction should return a valid DISubprogram");
+ return S;
}
/// createMethod - Create a new descriptor for the specified C++ method.
@@ -966,12 +957,11 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
- Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ F.getFileNode(),
getNonCompileUnitScope(Context),
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
MDString::get(VMContext, LinkageName),
- F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
Ty,
ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
@@ -991,7 +981,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
MDNode *Node = MDNode::get(VMContext, Elts);
if (isDefinition)
AllSubprograms.push_back(Node);
- return DISubprogram(Node);
+ DISubprogram S(Node);
+ assert(S.Verify() && "createMethod should return a valid DISubprogram");
+ return S;
}
/// createNameSpace - This creates new descriptor for a namespace
@@ -1000,9 +992,9 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
DIFile File, unsigned LineNo) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
+ File.getFileNode(),
getNonCompileUnitScope(Scope),
MDString::get(VMContext, Name),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
};
DINameSpace R(MDNode::get(VMContext, Elts));
@@ -1017,8 +1009,8 @@ DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
DIFile File) {
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
- Scope,
- File
+ File.getFileNode(),
+ Scope
};
DILexicalBlockFile R(MDNode::get(VMContext, Elts));
assert(
@@ -1033,10 +1025,10 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
static unsigned int unique_id = 0;
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+ File.getFileNode(),
getNonCompileUnitScope(Scope),
ConstantInt::get(Type::getInt32Ty(VMContext), Line),
ConstantInt::get(Type::getInt32Ty(VMContext), Col),
- File,
ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
};
DILexicalBlock R(MDNode::get(VMContext, Elts));
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index f09de3a731..ecd5216f20 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -438,6 +438,12 @@ DataLayout::~DataLayout() {
delete static_cast<StructLayoutMap*>(LayoutMap);
}
+bool DataLayout::doFinalization(Module &M) {
+ delete static_cast<StructLayoutMap*>(LayoutMap);
+ LayoutMap = 0;
+ return false;
+}
+
const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
if (!LayoutMap)
LayoutMap = new StructLayoutMap();
@@ -504,47 +510,6 @@ std::string DataLayout::getStringRepresentation() const {
}
-uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
- assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
- switch (Ty->getTypeID()) {
- case Type::LabelTyID:
- return getPointerSizeInBits(0);
- case Type::PointerTyID: {
- unsigned AS = dyn_cast<PointerType>(Ty)->getAddressSpace();
- return getPointerSizeInBits(AS);
- }
- case Type::ArrayTyID: {
- ArrayType *ATy = cast<ArrayType>(Ty);
- return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
- }
- case Type::StructTyID:
- // Get the layout annotation... which is lazily created on demand.
- return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
- case Type::IntegerTyID:
- return cast<IntegerType>(Ty)->getBitWidth();
- case Type::HalfTyID:
- return 16;
- case Type::FloatTyID:
- return 32;
- case Type::DoubleTyID:
- case Type::X86_MMXTyID:
- return 64;
- case Type::PPC_FP128TyID:
- case Type::FP128TyID:
- return 128;
- // In memory objects this is always aligned to a higher boundary, but
- // only 80 bits contain information.
- case Type::X86_FP80TyID:
- return 80;
- case Type::VectorTyID: {
- VectorType *VTy = cast<VectorType>(Ty);
- return VTy->getNumElements()*getTypeSizeInBits(VTy->getElementType());
- }
- default:
- llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type");
- }
-}
-
/*!
\param abi_or_pref Flag that determines which alignment is returned. true
returns the ABI alignment, false returns the preferred alignment.
@@ -656,6 +621,13 @@ Type *DataLayout::getIntPtrType(Type *Ty) const {
return IntTy;
}
+Type *DataLayout::getSmallestLegalIntType(LLVMContext &C, unsigned Width) const {
+ for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
+ if (Width <= LegalIntWidths[i])
+ return Type::getIntNTy(C, LegalIntWidths[i]);
+ return 0;
+}
+
uint64_t DataLayout::getIndexedOffset(Type *ptrTy,
ArrayRef<Value *> Indices) const {
Type *Ty = ptrTy;
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index e85d4adf77..0ffe99d704 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::dwarf;
@@ -66,18 +67,28 @@ bool DIDescriptor::Verify() const {
DITemplateValueParameter(DbgNode).Verify());
}
-StringRef
-DIDescriptor::getStringField(unsigned Elt) const {
- if (DbgNode == 0)
- return StringRef();
+static Value *getField(const MDNode *DbgNode, unsigned Elt) {
+ if (DbgNode == 0 || Elt >= DbgNode->getNumOperands())
+ return 0;
+ return DbgNode->getOperand(Elt);
+}
- if (Elt < DbgNode->getNumOperands())
- if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt)))
- return MDS->getString();
+static const MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
+ if (const MDNode *R = dyn_cast_or_null<MDNode>(getField(DbgNode, Elt)))
+ return R;
+ return 0;
+}
+static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) {
+ if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt)))
+ return MDS->getString();
return StringRef();
}
+StringRef DIDescriptor::getStringField(unsigned Elt) const {
+ return ::getStringField(DbgNode, Elt);
+}
+
uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
if (DbgNode == 0)
return 0;
@@ -321,7 +332,7 @@ bool DIDescriptor::isEnumerator() const {
return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
}
-/// isObjCProperty - Return true if the specified tag is DW_TAG
+/// isObjCProperty - Return true if the specified tag is DW_TAG_APPLE_property.
bool DIDescriptor::isObjCProperty() const {
return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
}
@@ -407,7 +418,7 @@ bool DICompileUnit::Verify() const {
if (N.empty())
return false;
// It is possible that directory and produce string is empty.
- return DbgNode->getNumOperands() == 15;
+ return DbgNode->getNumOperands() == 12;
}
/// Verify - Verify that an ObjC property is well formed.
@@ -475,7 +486,7 @@ bool DISubprogram::Verify() const {
DICompositeType Ty = getType();
if (!Ty.Verify())
return false;
- return DbgNode->getNumOperands() == 21;
+ return DbgNode->getNumOperands() == 20;
}
/// Verify - Verify that a global variable descriptor is well formed.
@@ -529,9 +540,14 @@ bool DINameSpace::Verify() const {
return DbgNode->getNumOperands() == 5;
}
+/// \brief Retrieve the MDNode for the directory/file pair.
+MDNode *DIFile::getFileNode() const {
+ return const_cast<MDNode*>(getNodeField(DbgNode, 1));
+}
+
/// \brief Verify that the file descriptor is well formed.
bool DIFile::Verify() const {
- return isFile() && DbgNode->getNumOperands() == 4;
+ return isFile() && DbgNode->getNumOperands() == 2;
}
/// \brief Verify that the enumerator descriptor is well formed.
@@ -600,6 +616,25 @@ MDNode *DIDerivedType::getObjCProperty() const {
return dyn_cast_or_null<MDNode>(DbgNode->getOperand(10));
}
+/// \brief Set the array of member DITypes.
+void DICompositeType::setTypeArray(DIArray Elements, DIArray TParams) {
+ assert((!TParams || DbgNode->getNumOperands() == 14) &&
+ "If you're setting the template parameters this should include a slot "
+ "for that!");
+ TrackingVH<MDNode> N(*this);
+ N->replaceOperandWith(10, Elements);
+ if (TParams)
+ N->replaceOperandWith(13, TParams);
+ DbgNode = N;
+}
+
+/// \brief Set the containing type.
+void DICompositeType::setContainingType(DICompositeType ContainingType) {
+ TrackingVH<MDNode> N(*this);
+ N->replaceOperandWith(12, ContainingType);
+ DbgNode = N;
+}
+
/// isInlinedFnArgument - Return true if this variable provides debugging
/// information for an inlined function arguments.
bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
@@ -627,21 +662,21 @@ bool DISubprogram::describes(const Function *F) {
unsigned DISubprogram::isOptimized() const {
assert (DbgNode && "Invalid subprogram descriptor!");
- if (DbgNode->getNumOperands() == 16)
- return getUnsignedField(15);
+ if (DbgNode->getNumOperands() == 15)
+ return getUnsignedField(14);
return 0;
}
MDNode *DISubprogram::getVariablesNodes() const {
- if (!DbgNode || DbgNode->getNumOperands() <= 19)
+ if (!DbgNode || DbgNode->getNumOperands() <= 18)
return NULL;
- return dyn_cast_or_null<MDNode>(DbgNode->getOperand(19));
+ return dyn_cast_or_null<MDNode>(DbgNode->getOperand(18));
}
DIArray DISubprogram::getVariables() const {
- if (!DbgNode || DbgNode->getNumOperands() <= 19)
+ if (!DbgNode || DbgNode->getNumOperands() <= 18)
return DIArray();
- if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19)))
+ if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(18)))
return DIArray(T);
return DIArray();
}
@@ -649,76 +684,48 @@ DIArray DISubprogram::getVariables() const {
StringRef DIScope::getFilename() const {
if (!DbgNode)
return StringRef();
- if (isLexicalBlockFile())
- return DILexicalBlockFile(DbgNode).getFilename();
- if (isLexicalBlock())
- return DILexicalBlock(DbgNode).getFilename();
- if (isSubprogram())
- return DISubprogram(DbgNode).getFilename();
- if (isCompileUnit())
- return DICompileUnit(DbgNode).getFilename();
- if (isNameSpace())
- return DINameSpace(DbgNode).getFilename();
- if (isType())
- return DIType(DbgNode).getFilename();
- if (isFile())
- return DIFile(DbgNode).getFilename();
- llvm_unreachable("Invalid DIScope!");
+ return ::getStringField(getNodeField(DbgNode, 1), 0);
}
StringRef DIScope::getDirectory() const {
if (!DbgNode)
return StringRef();
- if (isLexicalBlockFile())
- return DILexicalBlockFile(DbgNode).getDirectory();
- if (isLexicalBlock())
- return DILexicalBlock(DbgNode).getDirectory();
- if (isSubprogram())
- return DISubprogram(DbgNode).getDirectory();
- if (isCompileUnit())
- return DICompileUnit(DbgNode).getDirectory();
- if (isNameSpace())
- return DINameSpace(DbgNode).getDirectory();
- if (isType())
- return DIType(DbgNode).getDirectory();
- if (isFile())
- return DIFile(DbgNode).getDirectory();
- llvm_unreachable("Invalid DIScope!");
+ return ::getStringField(getNodeField(DbgNode, 1), 1);
}
DIArray DICompileUnit::getEnumTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(7)))
return DIArray(N);
return DIArray();
}
DIArray DICompileUnit::getRetainedTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8)))
return DIArray(N);
return DIArray();
}
DIArray DICompileUnit::getSubprograms() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9)))
return DIArray(N);
return DIArray();
}
DIArray DICompileUnit::getGlobalVariables() const {
- if (!DbgNode || DbgNode->getNumOperands() < 14)
+ if (!DbgNode || DbgNode->getNumOperands() < 12)
return DIArray();
- if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(13)))
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
return DIArray(N);
return DIArray();
}
@@ -1026,6 +1033,8 @@ void DIDescriptor::print(raw_ostream &OS) const {
DIVariable(DbgNode).printInternal(OS);
} else if (this->isObjCProperty()) {
DIObjCProperty(DbgNode).printInternal(OS);
+ } else if (this->isNameSpace()) {
+ DINameSpace(DbgNode).printInternal(OS);
} else if (this->isScope()) {
DIScope(DbgNode).printInternal(OS);
}
@@ -1099,6 +1108,14 @@ void DICompositeType::printInternal(raw_ostream &OS) const {
OS << " [" << A.getNumElements() << " elements]";
}
+void DINameSpace::printInternal(raw_ostream &OS) const {
+ StringRef Name = getName();
+ if (!Name.empty())
+ OS << " [" << Name << ']';
+
+ OS << " [line " << getLineNumber() << ']';
+}
+
void DISubprogram::printInternal(raw_ostream &OS) const {
// TODO : Print context
OS << " [line " << getLineNumber() << ']';
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 5559a6c56e..1e72b90a13 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -211,7 +211,7 @@ Function::~Function() {
clearGC();
// Remove the intrinsicID from the Cache.
- if(getValueName() && isIntrinsic())
+ if (getValueName() && isIntrinsic())
getContext().pImpl->IntrinsicIDCache.erase(this);
}
@@ -352,7 +352,7 @@ unsigned Function::getIntrinsicID() const {
LLVMContextImpl::IntrinsicIDCacheTy &IntrinsicIDCache =
getContext().pImpl->IntrinsicIDCache;
- if(!IntrinsicIDCache.count(this)) {
+ if (!IntrinsicIDCache.count(this)) {
unsigned Id = lookupIntrinsicID();
IntrinsicIDCache[this]=Id;
return Id;
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 2e3a525826..d58877ef77 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -3000,8 +3000,8 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
uint32_t BitWidth = C.getBitWidth();
switch (pred) {
default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
- case ICmpInst::ICMP_EQ: Upper++; break;
- case ICmpInst::ICMP_NE: Lower++; break;
+ case ICmpInst::ICMP_EQ: ++Upper; break;
+ case ICmpInst::ICMP_NE: ++Lower; break;
case ICmpInst::ICMP_ULT:
Lower = APInt::getMinValue(BitWidth);
// Check for an empty-set condition.
@@ -3015,25 +3015,25 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
return ConstantRange(BitWidth, /*isFullSet=*/false);
break;
case ICmpInst::ICMP_UGT:
- Lower++; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max)
+ ++Lower; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max)
// Check for an empty-set condition.
if (Lower == Upper)
return ConstantRange(BitWidth, /*isFullSet=*/false);
break;
case ICmpInst::ICMP_SGT:
- Lower++; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max)
+ ++Lower; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max)
// Check for an empty-set condition.
if (Lower == Upper)
return ConstantRange(BitWidth, /*isFullSet=*/false);
break;
case ICmpInst::ICMP_ULE:
- Lower = APInt::getMinValue(BitWidth); Upper++;
+ Lower = APInt::getMinValue(BitWidth); ++Upper;
// Check for a full-set condition.
if (Lower == Upper)
return ConstantRange(BitWidth, /*isFullSet=*/true);
break;
case ICmpInst::ICMP_SLE:
- Lower = APInt::getSignedMinValue(BitWidth); Upper++;
+ Lower = APInt::getSignedMinValue(BitWidth); ++Upper;
// Check for a full-set condition.
if (Lower == Upper)
return ConstantRange(BitWidth, /*isFullSet=*/true);
diff --git a/lib/IRReader/CMakeLists.txt b/lib/IRReader/CMakeLists.txt
new file mode 100644
index 0000000000..cf10d8b7db
--- /dev/null
+++ b/lib/IRReader/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMIRReader
+ IRReader.cpp
+ )
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
new file mode 100644
index 0000000000..fb87cbd8bf
--- /dev/null
+++ b/lib/IRReader/IRReader.cpp
@@ -0,0 +1,78 @@
+//===---- IRReader.cpp - Reader for LLVM IR files -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+
+using namespace llvm;
+
+Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
+ LLVMContext &Context) {
+ if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+ (const unsigned char *)Buffer->getBufferEnd())) {
+ std::string ErrMsg;
+ Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg);
+ if (M == 0) {
+ Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+ ErrMsg);
+ // ParseBitcodeFile does not take ownership of the Buffer in the
+ // case of an error.
+ delete Buffer;
+ }
+ return M;
+ }
+
+ return ParseAssembly(Buffer, 0, Err, Context);
+}
+
+Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err,
+ LLVMContext &Context) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+ Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
+ "Could not open input file: " + ec.message());
+ return 0;
+ }
+
+ return getLazyIRModule(File.take(), Err, Context);
+}
+
+Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err,
+ LLVMContext &Context) {
+ if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+ (const unsigned char *)Buffer->getBufferEnd())) {
+ std::string ErrMsg;
+ Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg);
+ if (M == 0)
+ Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+ ErrMsg);
+ // ParseBitcodeFile does not take ownership of the Buffer.
+ delete Buffer;
+ return M;
+ }
+
+ return ParseAssembly(Buffer, 0, Err, Context);
+}
+
+Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
+ LLVMContext &Context) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+ Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
+ "Could not open input file: " + ec.message());
+ return 0;
+ }
+
+ return ParseIR(File.take(), Err, Context);
+}
diff --git a/lib/IRReader/LLVMBuild.txt b/lib/IRReader/LLVMBuild.txt
new file mode 100644
index 0000000000..b7bc74d616
--- /dev/null
+++ b/lib/IRReader/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/IRReader/LLVMBuild.txt -----------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = IRReader
+parent = Libraries
+required_libraries = AsmParser BitReader Core Support
diff --git a/lib/IRReader/Makefile b/lib/IRReader/Makefile
new file mode 100644
index 0000000000..cf6bc11354
--- /dev/null
+++ b/lib/IRReader/Makefile
@@ -0,0 +1,14 @@
+##===- lib/IRReader/Makefile -------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME := LLVMIRReader
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
index a31793c5fd..0565443806 100644
--- a/lib/LLVMBuild.txt
+++ b/lib/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker IR MC Object Option Support TableGen Target Transforms
+subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker IR IRReader MC Object Option Support TableGen Target Transforms
[component_0]
type = Group
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 0acbcfadaf..74cbdadd61 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -17,13 +17,13 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index c8ea8ff0a9..74d24f278b 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -15,7 +15,6 @@
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
using namespace llvm;
@@ -24,7 +23,6 @@ Linker::Linker(StringRef progname, StringRef modname,
LLVMContext& C, unsigned flags):
Context(C),
Composite(new Module(modname, C)),
- LibPaths(),
Flags(flags),
Error(),
ProgramName(progname) { }
@@ -32,7 +30,6 @@ Linker::Linker(StringRef progname, StringRef modname,
Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
Context(aModule->getContext()),
Composite(aModule),
- LibPaths(),
Flags(flags),
Error(),
ProgramName(progname) { }
@@ -63,27 +60,9 @@ Linker::verbose(StringRef message) {
errs() << " " << message << "\n";
}
-void
-Linker::addPath(const sys::Path& path) {
- LibPaths.push_back(path);
-}
-
-void
-Linker::addPaths(const std::vector<std::string>& paths) {
- for (unsigned i = 0, e = paths.size(); i != e; ++i)
- LibPaths.push_back(sys::Path(paths[i]));
-}
-
-void
-Linker::addSystemPaths() {
- sys::Path::GetBitcodeLibraryPaths(LibPaths);
- LibPaths.insert(LibPaths.begin(),sys::Path("./"));
-}
-
Module*
Linker::releaseModule() {
Module* result = Composite;
- LibPaths.clear();
Error.clear();
Composite = 0;
Flags = 0;
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 26d378e6c0..9adcc02b71 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -304,8 +304,8 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
// Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
// to not be less than one. This needs to be change to be not less than zero.
- std::vector<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
- std::vector<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID];
+ SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
+ SmallVectorImpl<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID];
// Make space for this FileNumber in the MCDwarfFiles vector if needed.
if (FileNumber >= MCDwarfFiles.size()) {
MCDwarfFiles.resize(FileNumber + 1);
@@ -366,7 +366,7 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
/// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
/// currently is assigned and false otherwise.
bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
- std::vector<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
+ SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
return false;
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index d3fa906a06..4766b37476 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -44,41 +44,49 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
// Get the assembler info needed to setup the MCContext.
const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple);
- assert(MAI && "Unable to create target asm info!");
+ if (!MAI)
+ return 0;
const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
- assert(MII && "Unable to create target instruction info!");
+ if (!MII)
+ return 0;
const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
- assert(MRI && "Unable to create target register info!");
+ if (!MRI)
+ return 0;
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU,
FeaturesStr);
- assert(STI && "Unable to create subtarget info!");
+ if (!STI)
+ return 0;
// Set up the MCContext for creating symbols and MCExpr's.
MCContext *Ctx = new MCContext(*MAI, *MRI, 0);
- assert(Ctx && "Unable to create MCContext!");
+ if (!Ctx)
+ return 0;
// Set up disassembler.
MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI);
- assert(DisAsm && "Unable to create disassembler!");
+ if (!DisAsm)
+ return 0;
DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, Ctx);
// Set up the instruction printer.
int AsmPrinterVariant = MAI->getAssemblerDialect();
MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
*MAI, *MII, *MRI, *STI);
- assert(IP && "Unable to create instruction printer!");
+ if (!IP)
+ return 0;
LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType,
GetOpInfo, SymbolLookUp,
TheTarget, MAI, MRI,
STI, MII, Ctx, DisAsm, IP);
- assert(DC && "Allocation failure!");
+ if (!DC)
+ return 0;
return DC;
}
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index c81abe952a..0f8f0741bd 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -298,7 +298,7 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) {
// Put out the directory and file tables.
// First the directory table.
- const std::vector<StringRef> &MCDwarfDirs =
+ const SmallVectorImpl<StringRef> &MCDwarfDirs =
context.getMCDwarfDirs(CUID);
for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
MCOS->EmitBytes(MCDwarfDirs[i]); // the DirectoryName
@@ -307,7 +307,7 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) {
MCOS->EmitIntValue(0, 1); // Terminate the directory list
// Second the file table.
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
MCOS->getContext().getMCDwarfFiles(CUID);
for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
MCOS->EmitBytes(MCDwarfFiles[i]->getName()); // FileName
@@ -643,13 +643,13 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
// AT_name, the name of the source file. Reconstruct from the first directory
// and file table entries.
- const std::vector<StringRef> &MCDwarfDirs =
+ const SmallVectorImpl<StringRef> &MCDwarfDirs =
context.getMCDwarfDirs();
if (MCDwarfDirs.size() > 0) {
MCOS->EmitBytes(MCDwarfDirs[0]);
MCOS->EmitBytes("/");
}
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
MCOS->getContext().getMCDwarfFiles();
MCOS->EmitBytes(MCDwarfFiles[1]->getName());
MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 1a53934fef..cd4d144575 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -194,7 +194,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_TPOFF: return "TPOFF";
case VK_DTPOFF: return "DTPOFF";
case VK_TLVP: return "TLVP";
- case VK_SECREL: return "SECREL";
+ case VK_SECREL: return "SECREL32";
case VK_ARM_NONE: return "(NONE)";
case VK_ARM_PLT: return "(PLT)";
case VK_ARM_GOT: return "(GOT)";
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 2e1a045569..0d32ad40e8 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -223,6 +223,12 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
}
void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
+ if (T.getArch() != Triple::mips &&
+ T.getArch() != Triple::mipsel &&
+ T.getArch() != Triple::mips64 &&
+ T.getArch() != Triple::mips64el )
+ FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
if (T.getArch() == Triple::x86) {
PersonalityEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
@@ -230,15 +236,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
LSDAEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
- FDEEncoding = FDECFIEncoding = (RelocM == Reloc::PIC_)
+ FDEEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
TTypeEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
} else if (T.getArch() == Triple::x86_64) {
- FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-
if (RelocM == Reloc::PIC_) {
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
((CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
@@ -261,8 +265,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
}
} else if (T.getArch() == Triple::aarch64) {
- FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-
// The small model guarantees static code/data size < 4GB, but not where it
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
@@ -282,7 +284,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
} else if (T.getArch() == Triple::ppc64) {
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_udata8;
- FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 6ab49ec92c..804734cea9 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -626,7 +626,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
return TokError("unmatched .ifs or .elses");
// Check to see there are no empty DwarfFile slots.
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
getContext().getMCDwarfFiles();
for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
if (!MCDwarfFiles[i])
@@ -1495,7 +1495,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
// If we previously parsed a cpp hash file line comment then make sure the
// current Dwarf File is for the CppHashFilename if not then emit the
// Dwarf File table for it and adjust the line number for the .loc.
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
getContext().getMCDwarfFiles();
if (CppHashFilename.size() != 0) {
if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() !=
@@ -4105,12 +4105,8 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
MCParsedAsmOperand *Operand = Info.ParsedOperands[i];
// Immediate.
- if (Operand->isImm()) {
- if (Operand->needAsmRewrite())
- AsmStrRewrites.push_back(AsmRewrite(AOK_ImmPrefix,
- Operand->getStartLoc()));
+ if (Operand->isImm())
continue;
- }
// Register operand.
if (Operand->isReg() && !Operand->needAddressOf()) {
@@ -4131,11 +4127,6 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
continue;
bool isOutput = (i == 1) && Desc.mayStore();
- if (Operand->isMem() && Operand->needSizeDirective())
- AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective,
- Operand->getStartLoc(), /*Len*/0,
- Operand->getMemSize()));
-
if (isOutput) {
++InputIdx;
OutputDecls.push_back(OpDecl);
@@ -4184,28 +4175,31 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
// Build the IR assembly string.
std::string AsmStringIR;
- AsmRewriteKind PrevKind = AOK_Imm;
raw_string_ostream OS(AsmStringIR);
- const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart();
+ const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart();
+ const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort);
for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(),
E = AsmStrRewrites.end();
I != E; ++I) {
const char *Loc = (*I).Loc.getPointer();
- assert(Loc >= Start && "Expected Loc to be after Start!");
+ assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
unsigned AdditionalSkip = 0;
AsmRewriteKind Kind = (*I).Kind;
- // Emit everything up to the immediate/expression. If the previous rewrite
- // was a size directive, then this has already been done.
- if (PrevKind != AOK_SizeDirective)
- OS << StringRef(Start, Loc - Start);
- PrevKind = Kind;
+ // Emit everything up to the immediate/expression.
+ unsigned Len = Loc - AsmStart;
+ if (Len) {
+ // For Input/Output operands we need to remove the brackets, if present.
+ if ((Kind == AOK_Input || Kind == AOK_Output) && Loc[-1] == '[')
+ --Len;
+ OS << StringRef(AsmStart, Len);
+ }
// Skip the original expression.
if (Kind == AOK_Skip) {
- Start = Loc + (*I).Len;
+ AsmStart = Loc + (*I).Len;
continue;
}
@@ -4254,14 +4248,17 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
}
// Skip the original expression.
- if (Kind != AOK_SizeDirective)
- Start = Loc + (*I).Len + AdditionalSkip;
+ AsmStart = Loc + (*I).Len + AdditionalSkip;
+
+ // For Input/Output operands we need to remove the brackets, if present.
+ if ((Kind == AOK_Input || Kind == AOK_Output) && AsmStart != AsmEnd &&
+ *AsmStart == ']')
+ ++AsmStart;
}
// Emit the remainder of the asm string.
- const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
- if (Start != AsmEnd)
- OS << StringRef(Start, AsmEnd - Start);
+ if (AsmStart != AsmEnd)
+ OS << StringRef(AsmStart, AsmEnd - AsmStart);
AsmString = OS.str();
return false;
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 51ef415542..d02e5535bd 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx)
: Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) {
- const MCSection *section = NULL;
+ const MCSection *section = 0;
SectionStack.push_back(std::make_pair(section, section));
}
@@ -40,7 +40,7 @@ void MCStreamer::reset() {
EmitDebugFrame = false;
CurrentW64UnwindInfo = 0;
LastSymbol = 0;
- const MCSection *section = NULL;
+ const MCSection *section = 0;
SectionStack.clear();
SectionStack.push_back(std::make_pair(section, section));
}
@@ -172,7 +172,7 @@ void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
if (FrameInfos.empty())
- return NULL;
+ return 0;
return &FrameInfos.back();
}
@@ -473,7 +473,7 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
report_fatal_error("Frame register and offset already specified!");
if (Offset & 0x0F)
report_fatal_error("Misaligned frame pointer offset!");
- MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, NULL, Register, Offset);
+ MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, 0, Register, Offset);
CurFrame->LastFrameInst = CurFrame->Instructions.size();
CurFrame->Instructions.push_back(Inst);
}
@@ -623,5 +623,5 @@ void MCStreamer::Finish() {
MCSymbolData &MCStreamer::getOrCreateSymbolData(MCSymbol *Symbol) {
report_fatal_error("Not supported!");
- return *(static_cast<MCSymbolData*> (NULL));
+ return *(static_cast<MCSymbolData*>(0));
}
diff --git a/lib/Makefile b/lib/Makefile
index 043eda6b99..57f016bc89 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,8 @@ LEVEL = ..
include $(LEVEL)/Makefile.config
PARALLEL_DIRS := IR AsmParser Bitcode Archive Analysis Transforms CodeGen \
- Target ExecutionEngine Linker MC Object Option DebugInfo
+ Target ExecutionEngine Linker MC Object Option DebugInfo \
+ IRReader
include $(LEVEL)/Makefile.common
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 5b68fbb270..6182e34150 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -3311,10 +3311,8 @@ namespace {
significand = significand.udiv(divisor);
- // Truncate the significand down to its active bit count, but
- // don't try to drop below 32.
- unsigned newPrecision = std::max(32U, significand.getActiveBits());
- significand = significand.trunc(newPrecision);
+ // Truncate the significand down to its active bit count.
+ significand = significand.trunc(significand.getActiveBits());
}
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 07cb057b48..e8534753b4 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -559,12 +559,12 @@ bool APInt::slt(const APInt& RHS) const {
if (lhsNeg) {
// Sign bit is set so perform two's complement to make it positive
lhs.flipAllBits();
- lhs++;
+ ++lhs;
}
if (rhsNeg) {
// Sign bit is set so perform two's complement to make it positive
rhs.flipAllBits();
- rhs++;
+ ++rhs;
}
// Now we have unsigned values to compare so do the comparison if necessary
@@ -2116,7 +2116,7 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
}
// If its negative, put it in two's complement form
if (isNeg) {
- (*this)--;
+ --(*this);
this->flipAllBits();
}
}
@@ -2197,7 +2197,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
// Flip the bits and add one to turn it into the equivalent positive
// value and put a '-' in the result.
Tmp.flipAllBits();
- Tmp++;
+ ++Tmp;
Str.push_back('-');
}
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 5ba69fc3c8..3746a81011 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -83,6 +83,7 @@ add_llvm_library(LLVMSupport
Threading.cpp
TimeValue.cpp
Valgrind.cpp
+ Watchdog.cpp
Unix/Host.inc
Unix/Memory.inc
Unix/Mutex.inc
@@ -95,6 +96,7 @@ add_llvm_library(LLVMSupport
Unix/system_error.inc
Unix/ThreadLocal.inc
Unix/TimeValue.inc
+ Unix/Watchdog.inc
Windows/DynamicLibrary.inc
Windows/Host.inc
Windows/Memory.inc
@@ -108,4 +110,5 @@ add_llvm_library(LLVMSupport
Windows/system_error.inc
Windows/ThreadLocal.inc
Windows/TimeValue.inc
+ Windows/Watchdog.inc
)
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index d4382e54e0..f4b591e777 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -49,21 +49,21 @@ void llvm::remove_fatal_error_handler() {
ErrorHandler = 0;
}
-void llvm::report_fatal_error(const char *Reason) {
- report_fatal_error(Twine(Reason));
+void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) {
+ report_fatal_error(Twine(Reason), GenCrashDiag);
}
-void llvm::report_fatal_error(const std::string &Reason) {
- report_fatal_error(Twine(Reason));
+void llvm::report_fatal_error(const std::string &Reason, bool GenCrashDiag) {
+ report_fatal_error(Twine(Reason), GenCrashDiag);
}
-void llvm::report_fatal_error(StringRef Reason) {
- report_fatal_error(Twine(Reason));
+void llvm::report_fatal_error(StringRef Reason, bool GenCrashDiag) {
+ report_fatal_error(Twine(Reason), GenCrashDiag);
}
-void llvm::report_fatal_error(const Twine &Reason) {
+void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
if (ErrorHandler) {
- ErrorHandler(ErrorHandlerUserData, Reason.str());
+ ErrorHandler(ErrorHandlerUserData, Reason.str(), GenCrashDiag);
} else {
// Blast the result out to stderr. We don't try hard to make sure this
// succeeds (e.g. handling EINTR) and we can't use errs() here because
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index cd430f218b..1ee69b6023 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -70,8 +70,8 @@ error_code FileOutputBuffer::create(StringRef FilePath,
if (EC)
return EC;
- OwningPtr<mapped_file_region> MappedFile(
- new mapped_file_region(FD, mapped_file_region::readwrite, Size, 0, EC));
+ OwningPtr<mapped_file_region> MappedFile(new mapped_file_region(
+ FD, true, mapped_file_region::readwrite, Size, 0, EC));
if (EC)
return EC;
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 4c558b37cf..7c5ab96a76 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -72,13 +72,17 @@ static void CopyStringRef(char *Memory, StringRef Data) {
Memory[Data.size()] = 0; // Null terminate string.
}
-/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it.
-template <typename T>
-static T *GetNamedBuffer(StringRef Buffer, StringRef Name,
- bool RequiresNullTerminator) {
- char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1));
- CopyStringRef(Mem + sizeof(T), Name);
- return new (Mem) T(Buffer, RequiresNullTerminator);
+namespace {
+struct NamedBufferAlloc {
+ StringRef Name;
+ NamedBufferAlloc(StringRef Name) : Name(Name) {}
+};
+}
+
+void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
+ char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1));
+ CopyStringRef(Mem + N, Alloc.Name);
+ return Mem;
}
namespace {
@@ -105,8 +109,8 @@ public:
MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
StringRef BufferName,
bool RequiresNullTerminator) {
- return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName,
- RequiresNullTerminator);
+ return new (NamedBufferAlloc(BufferName))
+ MemoryBufferMem(InputData, RequiresNullTerminator);
}
/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
@@ -183,24 +187,38 @@ error_code MemoryBuffer::getFileOrSTDIN(const char *Filename,
//===----------------------------------------------------------------------===//
namespace {
-/// MemoryBufferMMapFile - This represents a file that was mapped in with the
-/// sys::Path::MapInFilePages method. When destroyed, it calls the
-/// sys::Path::UnMapFilePages method.
-class MemoryBufferMMapFile : public MemoryBufferMem {
-public:
- MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator)
- : MemoryBufferMem(Buffer, RequiresNullTerminator) { }
+/// \brief Memorry maps a file descriptor using sys::fs::mapped_file_region.
+///
+/// This handles converting the offset into a legal offset on the platform.
+class MemoryBufferMMapFile : public MemoryBuffer {
+ sys::fs::mapped_file_region MFR;
+
+ static uint64_t getLegalMapOffset(uint64_t Offset) {
+ return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
+ }
+
+ static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
+ return Len + (Offset - getLegalMapOffset(Offset));
+ }
- ~MemoryBufferMMapFile() {
- static int PageSize = sys::process::get_self()->page_size();
+ const char *getStart(uint64_t Len, uint64_t Offset) {
+ return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
+ }
- uintptr_t Start = reinterpret_cast<uintptr_t>(getBufferStart());
- size_t Size = getBufferSize();
- uintptr_t RealStart = Start & ~(PageSize - 1);
- size_t RealSize = Size + (Start - RealStart);
+public:
+ MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len,
+ uint64_t Offset, error_code EC)
+ : MFR(FD, false, sys::fs::mapped_file_region::readonly,
+ getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) {
+ if (!EC) {
+ const char *Start = getStart(Len, Offset);
+ init(Start, Start + Len, RequiresNullTerminator);
+ }
+ }
- sys::Path::UnMapFilePages(reinterpret_cast<const char*>(RealStart),
- RealSize);
+ virtual const char *getBufferIdentifier() const LLVM_OVERRIDE {
+ // The name is stored after the class itself.
+ return reinterpret_cast<const char *>(this + 1);
}
virtual BufferKind getBufferKind() const LLVM_OVERRIDE {
@@ -344,17 +362,11 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
PageSize)) {
- off_t RealMapOffset = Offset & ~(PageSize - 1);
- off_t Delta = Offset - RealMapOffset;
- size_t RealMapSize = MapSize + Delta;
-
- if (const char *Pages = sys::Path::MapInFilePages(FD,
- RealMapSize,
- RealMapOffset)) {
- result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
- StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator));
+ error_code EC;
+ result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile(
+ RequiresNullTerminator, FD, MapSize, Offset, EC));
+ if (!EC)
return error_code::success();
- }
}
MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
index 41add96194..58a6ea720e 100644
--- a/lib/Support/PathV2.cpp
+++ b/lib/Support/PathV2.cpp
@@ -18,6 +18,9 @@
#include <cctype>
#include <cstdio>
#include <cstring>
+#ifdef __APPLE__
+#include <unistd.h>
+#endif
namespace {
using llvm::StringRef;
@@ -493,6 +496,27 @@ bool is_separator(char value) {
void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
result.clear();
+#ifdef __APPLE__
+ // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
+ int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR
+ : _CS_DARWIN_USER_CACHE_DIR;
+ size_t ConfLen = confstr(ConfName, 0, 0);
+ if (ConfLen > 0) {
+ do {
+ result.resize(ConfLen);
+ ConfLen = confstr(ConfName, result.data(), result.size());
+ } while (ConfLen > 0 && ConfLen != result.size());
+
+ if (ConfLen > 0) {
+ assert(result.back() == 0);
+ result.pop_back();
+ return;
+ }
+
+ result.clear();
+ }
+#endif
+
// Check whether the temporary directory is specified by an environment
// variable.
const char *EnvironmentVariable;
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 21d56adb5e..23ee5ab105 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -17,6 +17,7 @@
#include "llvm/Config/config.h" // Get autoconf configuration settings
#include "llvm/Support/Signals.h"
#include "llvm/Support/ThreadLocal.h"
+#include "llvm/Support/Watchdog.h"
#include "llvm/Support/raw_ostream.h"
#ifdef HAVE_CRASHREPORTERCLIENT_H
@@ -37,7 +38,10 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
if (Entry->getNextEntry())
NextID = PrintStack(Entry->getNextEntry(), OS);
OS << NextID << ".\t";
- Entry->print(OS);
+ {
+ sys::Watchdog W(5);
+ Entry->print(OS);
+ }
return NextID+1;
}
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index 75bc282d9b..201d5c0d30 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -29,12 +29,15 @@ Program::ExecuteAndWait(const Path& path,
const Path** redirects,
unsigned secondsToWait,
unsigned memoryLimit,
- std::string* ErrMsg) {
+ std::string* ErrMsg,
+ bool *ExecutionFailed) {
Program prg;
- if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg))
+ if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg)) {
+ if (ExecutionFailed) *ExecutionFailed = false;
return prg.Wait(path, secondsToWait, ErrMsg);
- else
- return -1;
+ }
+ if (ExecutionFailed) *ExecutionFailed = true;
+ return -1;
}
void
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index 3b53e9ff49..f0fed7792c 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -29,13 +29,9 @@ void SmallPtrSetImpl::shrink_and_clear() {
NumElements = NumTombstones = 0;
// Install the new array. Clear all the buckets to empty.
- CurArray = (const void**)malloc(sizeof(void*) * (CurArraySize+1));
+ CurArray = (const void**)malloc(sizeof(void*) * CurArraySize);
assert(CurArray && "Failed to allocate memory?");
memset(CurArray, -1, CurArraySize*sizeof(void*));
-
- // The end pointer, always valid, is set to a valid element to help the
- // iterator.
- CurArray[CurArraySize] = 0;
}
bool SmallPtrSetImpl::insert_imp(const void * Ptr) {
@@ -139,15 +135,11 @@ void SmallPtrSetImpl::Grow(unsigned NewSize) {
bool WasSmall = isSmall();
// Install the new array. Clear all the buckets to empty.
- CurArray = (const void**)malloc(sizeof(void*) * (NewSize+1));
+ CurArray = (const void**)malloc(sizeof(void*) * NewSize);
assert(CurArray && "Failed to allocate memory?");
CurArraySize = NewSize;
memset(CurArray, -1, NewSize*sizeof(void*));
- // The end pointer, always valid, is set to a valid element to help the
- // iterator.
- CurArray[NewSize] = 0;
-
// Copy over all the elements.
if (WasSmall) {
// Small sets store their elements in order.
@@ -180,7 +172,7 @@ SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage,
CurArray = SmallArray;
// Otherwise, allocate new heap space (unless we were the same size)
} else {
- CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1));
+ CurArray = (const void**)malloc(sizeof(void*) * that.CurArraySize);
assert(CurArray && "Failed to allocate memory?");
}
@@ -188,7 +180,7 @@ SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage,
CurArraySize = that.CurArraySize;
// Copy over the contents from the other set
- memcpy(CurArray, that.CurArray, sizeof(void*)*(CurArraySize+1));
+ memcpy(CurArray, that.CurArray, sizeof(void*)*CurArraySize);
NumElements = that.NumElements;
NumTombstones = that.NumTombstones;
@@ -200,7 +192,7 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
if (isSmall() && RHS.isSmall())
assert(CurArraySize == RHS.CurArraySize &&
"Cannot assign sets with different small sizes");
-
+
// If we're becoming small, prepare to insert into our stack space
if (RHS.isSmall()) {
if (!isSmall())
@@ -209,9 +201,9 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
// Otherwise, allocate new heap space (unless we were the same size)
} else if (CurArraySize != RHS.CurArraySize) {
if (isSmall())
- CurArray = (const void**)malloc(sizeof(void*) * (RHS.CurArraySize+1));
+ CurArray = (const void**)malloc(sizeof(void*) * RHS.CurArraySize);
else
- CurArray = (const void**)realloc(CurArray, sizeof(void*)*(RHS.CurArraySize+1));
+ CurArray = (const void**)realloc(CurArray, sizeof(void*)*RHS.CurArraySize);
assert(CurArray && "Failed to allocate memory?");
}
@@ -219,7 +211,7 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
CurArraySize = RHS.CurArraySize;
// Copy over the contents from the other set
- memcpy(CurArray, RHS.CurArray, sizeof(void*)*(CurArraySize+1));
+ memcpy(CurArray, RHS.CurArray, sizeof(void*)*CurArraySize);
NumElements = RHS.NumElements;
NumTombstones = RHS.NumTombstones;
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index e00394ec6a..e9b26bdb80 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -332,7 +332,16 @@ void Memory::InvalidateInstructionCache(const void *Addr,
__clear_cache(const_cast<char *>(Start), const_cast<char *>(End));
# elif defined(__mips__)
const char *Start = static_cast<const char *>(Addr);
+# if defined(ANDROID)
+ // The declaration of "cacheflush" in Android bionic:
+ // extern int cacheflush(long start, long end, long flags);
+ const char *End = Start + Len;
+ long LStart = reinterpret_cast<long>(const_cast<char *>(Start));
+ long LEnd = reinterpret_cast<long>(const_cast<char *>(End));
+ cacheflush(LStart, LEnd, BCACHE);
+# else
cacheflush(const_cast<char *>(Start), Len, BCACHE);
+# endif
# endif
#endif // end apple
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index 44b31b3202..a3dfd4b0a3 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -475,12 +475,14 @@ rety_open_create:
return error_code::success();
}
-error_code mapped_file_region::init(int fd, uint64_t offset) {
- AutoFD FD(fd);
+error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
+ AutoFD ScopedFD(FD);
+ if (!CloseFD)
+ ScopedFD.take();
// Figure out how large the file is.
struct stat FileInfo;
- if (fstat(fd, &FileInfo) == -1)
+ if (fstat(FD, &FileInfo) == -1)
return error_code(errno, system_category());
uint64_t FileSize = FileInfo.st_size;
@@ -488,7 +490,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) {
Size = FileSize;
else if (FileSize < Size) {
// We need to grow the file.
- if (ftruncate(fd, Size) == -1)
+ if (ftruncate(FD, Size) == -1)
return error_code(errno, system_category());
}
@@ -497,7 +499,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) {
#ifdef MAP_FILE
flags |= MAP_FILE;
#endif
- Mapping = ::mmap(0, Size, prot, flags, fd, offset);
+ Mapping = ::mmap(0, Size, prot, flags, FD, Offset);
if (Mapping == MAP_FAILED)
return error_code(errno, system_category());
return error_code::success();
@@ -526,12 +528,13 @@ mapped_file_region::mapped_file_region(const Twine &path,
return;
}
- ec = init(ofd, offset);
+ ec = init(ofd, true, offset);
if (ec)
Mapping = 0;
}
mapped_file_region::mapped_file_region(int fd,
+ bool closefd,
mapmode mode,
uint64_t length,
uint64_t offset,
@@ -545,7 +548,7 @@ mapped_file_region::mapped_file_region(int fd,
return;
}
- ec = init(fd, offset);
+ ec = init(fd, closefd, offset);
if (ec)
Mapping = 0;
}
diff --git a/lib/Support/Unix/Watchdog.inc b/lib/Support/Unix/Watchdog.inc
new file mode 100644
index 0000000000..5d89c0e51b
--- /dev/null
+++ b/lib/Support/Unix/Watchdog.inc
@@ -0,0 +1,32 @@
+//===--- Unix/Watchdog.inc - Unix Watchdog Implementation -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Unix implementation of the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+namespace llvm {
+ namespace sys {
+ Watchdog::Watchdog(unsigned int seconds) {
+#ifdef HAVE_UNISTD_H
+ alarm(seconds);
+#endif
+ }
+
+ Watchdog::~Watchdog() {
+#ifdef HAVE_UNISTD_H
+ alarm(0);
+#endif
+ }
+ }
+}
diff --git a/lib/Support/Watchdog.cpp b/lib/Support/Watchdog.cpp
new file mode 100644
index 0000000000..724aa001f1
--- /dev/null
+++ b/lib/Support/Watchdog.cpp
@@ -0,0 +1,23 @@
+//===---- Watchdog.cpp - Implement Watchdog ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Watchdog.h"
+#include "llvm/Config/config.h"
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Watchdog.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Watchdog.inc"
+#endif
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index 2e6cc96e7f..23f3d14f91 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -593,6 +593,10 @@ retry_random_path:
random_path_utf16.push_back(0);
random_path_utf16.pop_back();
+ // Make sure we don't fall into an infinite loop by constantly trying
+ // to create the parent path.
+ bool TriedToCreateParent = false;
+
// Try to create + open the path.
retry_create_file:
HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(),
@@ -610,7 +614,9 @@ retry_create_file:
if (ec == windows_error::file_exists)
goto retry_random_path;
// Check for non-existing parent directories.
- if (ec == windows_error::path_not_found) {
+ if (ec == windows_error::path_not_found && !TriedToCreateParent) {
+ TriedToCreateParent = true;
+
// Create the directories using result_path as temp storage.
if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
random_path_utf16.size(), result_path))
@@ -705,13 +711,14 @@ error_code get_magic(const Twine &path, uint32_t len,
return error_code::success();
}
-error_code mapped_file_region::init(int FD, uint64_t Offset) {
+error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
FileDescriptor = FD;
// Make sure that the requested size fits within SIZE_T.
if (Size > std::numeric_limits<SIZE_T>::max()) {
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return make_error_code(errc::invalid_argument);
}
@@ -732,9 +739,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
0);
if (FileMappingHandle == NULL) {
error_code ec = windows_error(GetLastError());
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return ec;
}
@@ -754,9 +762,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
if (Mapping == NULL) {
error_code ec = windows_error(GetLastError());
::CloseHandle(FileMappingHandle);
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return ec;
}
@@ -768,14 +777,24 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
error_code ec = windows_error(GetLastError());
::UnmapViewOfFile(Mapping);
::CloseHandle(FileMappingHandle);
- if (FileDescriptor)
- _close(FileDescriptor);
- else
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor);
+ } else
::CloseHandle(FileHandle);
return ec;
}
Size = mbi.RegionSize;
}
+
+ // Close all the handles except for the view. It will keep the other handles
+ // alive.
+ ::CloseHandle(FileMappingHandle);
+ if (FileDescriptor) {
+ if (CloseFD)
+ _close(FileDescriptor); // Also closes FileHandle.
+ } else
+ ::CloseHandle(FileHandle);
return error_code::success();
}
@@ -815,7 +834,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
}
FileDescriptor = 0;
- ec = init(FileDescriptor, offset);
+ ec = init(FileDescriptor, true, offset);
if (ec) {
Mapping = FileMappingHandle = 0;
FileHandle = INVALID_HANDLE_VALUE;
@@ -824,6 +843,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
}
mapped_file_region::mapped_file_region(int fd,
+ bool closefd,
mapmode mode,
uint64_t length,
uint64_t offset,
@@ -836,13 +856,14 @@ mapped_file_region::mapped_file_region(int fd,
, FileMappingHandle() {
FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
if (FileHandle == INVALID_HANDLE_VALUE) {
- _close(FileDescriptor);
+ if (closefd)
+ _close(FileDescriptor);
FileDescriptor = 0;
ec = make_error_code(errc::bad_file_descriptor);
return;
}
- ec = init(FileDescriptor, offset);
+ ec = init(FileDescriptor, closefd, offset);
if (ec) {
Mapping = FileMappingHandle = 0;
FileHandle = INVALID_HANDLE_VALUE;
@@ -853,12 +874,6 @@ mapped_file_region::mapped_file_region(int fd,
mapped_file_region::~mapped_file_region() {
if (Mapping)
::UnmapViewOfFile(Mapping);
- if (FileMappingHandle)
- ::CloseHandle(FileMappingHandle);
- if (FileDescriptor)
- _close(FileDescriptor);
- else if (FileHandle != INVALID_HANDLE_VALUE)
- ::CloseHandle(FileHandle);
}
#if LLVM_HAS_RVALUE_REFERENCES
diff --git a/lib/Support/Windows/Watchdog.inc b/lib/Support/Windows/Watchdog.inc
new file mode 100644
index 0000000000..fab2bdf2a9
--- /dev/null
+++ b/lib/Support/Windows/Watchdog.inc
@@ -0,0 +1,24 @@
+//===--- Windows/Watchdog.inc - Windows Watchdog Implementation -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Windows implementation of the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+// TODO: implement.
+// Currently this is only used by PrettyStackTrace which is also unimplemented
+// on Windows. Roughly, a Windows implementation would use CreateWaitableTimer
+// and a second thread to run the TimerAPCProc.
+
+namespace llvm {
+ namespace sys {
+ Watchdog::Watchdog(unsigned int seconds) {}
+ Watchdog::~Watchdog() {}
+ }
+}
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index f71abd3b24..a433088b19 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -306,7 +306,12 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) {
size_t BytesToWrite = Size - (Size % NumBytes);
write_impl(Ptr, BytesToWrite);
- copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite);
+ size_t BytesRemaining = Size - BytesToWrite;
+ if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) {
+ // Too much left over to copy into our buffer.
+ return write(Ptr + BytesToWrite, BytesRemaining);
+ }
+ copy_to_buffer(Ptr + BytesToWrite, BytesRemaining);
return *this;
}
@@ -512,7 +517,7 @@ raw_fd_ostream::~raw_fd_ostream() {
// has_error() and clear the error flag with clear_error() before
// destructing raw_ostream objects which may have errors.
if (has_error())
- report_fatal_error("IO failure on output stream.");
+ report_fatal_error("IO failure on output stream.", /*GenCrashDiag=*/false);
}
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
index ec84a72454..928b1203cd 100644
--- a/lib/TableGen/Error.cpp
+++ b/lib/TableGen/Error.cpp
@@ -20,9 +20,15 @@
namespace llvm {
SourceMgr SrcMgr;
+unsigned ErrorsPrinted = 0;
static void PrintMessage(ArrayRef<SMLoc> Loc, SourceMgr::DiagKind Kind,
const Twine &Msg) {
+ // Count the total number of errors printed.
+ // This is used to exit with an error code if there were any errors.
+ if (Kind == SourceMgr::DK_Error)
+ ++ErrorsPrinted;
+
SMLoc NullLoc;
if (Loc.empty())
Loc = NullLoc;
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
index e1cd623783..dc4167b305 100644
--- a/lib/TableGen/Main.cpp
+++ b/lib/TableGen/Main.cpp
@@ -117,11 +117,14 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) {
if (MainFn(Out.os(), Records))
return 1;
+ if (ErrorsPrinted > 0) {
+ errs() << argv0 << ": " << ErrorsPrinted << " errors.\n";
+ return 1;
+ }
+
// Declare success.
Out.keep();
return 0;
-
- return 1;
}
}
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index c4b48fe5e8..86ad2a6e3c 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -1547,29 +1547,39 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
/// ParseDagArgList - Parse the argument list for a dag literal expression.
///
-/// ParseDagArgList ::= Value (':' VARNAME)?
-/// ParseDagArgList ::= ParseDagArgList ',' Value (':' VARNAME)?
+/// DagArg ::= Value (':' VARNAME)?
+/// DagArg ::= VARNAME
+/// DagArgList ::= DagArg
+/// DagArgList ::= DagArgList ',' DagArg
std::vector<std::pair<llvm::Init*, std::string> >
TGParser::ParseDagArgList(Record *CurRec) {
std::vector<std::pair<llvm::Init*, std::string> > Result;
while (1) {
- Init *Val = ParseValue(CurRec);
- if (Val == 0) return std::vector<std::pair<llvm::Init*, std::string> >();
-
- // If the variable name is present, add it.
- std::string VarName;
- if (Lex.getCode() == tgtok::colon) {
- if (Lex.Lex() != tgtok::VarName) { // eat the ':'
- TokError("expected variable name in dag literal");
+ // DagArg ::= VARNAME
+ if (Lex.getCode() == tgtok::VarName) {
+ // A missing value is treated like '?'.
+ Result.push_back(std::make_pair(UnsetInit::get(), Lex.getCurStrVal()));
+ Lex.Lex();
+ } else {
+ // DagArg ::= Value (':' VARNAME)?
+ Init *Val = ParseValue(CurRec);
+ if (Val == 0)
return std::vector<std::pair<llvm::Init*, std::string> >();
- }
- VarName = Lex.getCurStrVal();
- Lex.Lex(); // eat the VarName.
- }
- Result.push_back(std::make_pair(Val, VarName));
+ // If the variable name is present, add it.
+ std::string VarName;
+ if (Lex.getCode() == tgtok::colon) {
+ if (Lex.Lex() != tgtok::VarName) { // eat the ':'
+ TokError("expected variable name in dag literal");
+ return std::vector<std::pair<llvm::Init*, std::string> >();
+ }
+ VarName = Lex.getCurStrVal();
+ Lex.Lex(); // eat the VarName.
+ }
+ Result.push_back(std::make_pair(Val, VarName));
+ }
if (Lex.getCode() != tgtok::comma) break;
Lex.Lex(); // eat the ','
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index cca6d12e16..dc41f2f605 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
return TopOfFrameOffset - FrameRegPos;
}
-/// Estimate and return the size of the frame.
-static unsigned estimateStackSize(MachineFunction &MF) {
- // FIXME: Make generic? Really consider after upstreaming. This code is now
- // shared between PEI, ARM *and* here.
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- int Offset = 0;
-
- // This code is very, very similar to PEI::calculateFrameObjectOffsets().
- // It really should be refactored to share code. Until then, changes
- // should keep in mind that there's tight coupling between the two.
-
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -MFI->getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
- continue;
- Offset += MFI->getObjectSize(i);
- unsigned Align = MFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
-
- MaxAlign = std::max(Align, MaxAlign);
- }
-
- if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
- Offset += MFI->getMaxCallFrameSize();
-
- // Round up the size to a multiple of the alignment. If the function has
- // any calls or alloca's, align to the target's StackAlignment value to
- // ensure that the callee's frame or the alloca data is suitably aligned;
- // otherwise, for leaf functions, align to the TransientStackAlignment
- // value.
- unsigned StackAlign;
- if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
- else
- StackAlign = TFI->getTransientStackAlignment();
-
- // If the frame pointer is eliminated, all frame offsets will be relative to
- // SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (unsigned)Offset;
-}
-
void
AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// callee-save register for this purpose or allocate an extra spill slot.
bool BigStack =
- (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+ (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
|| MFI->hasVarSizedObjects() // Access will be from X29: messes things up
|| (MFI->adjustsStack() && !hasReservedCallFrame(MF));
@@ -449,7 +396,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// helpfully places it near either SP or FP for us to avoid
// infinitely-regression during scavenging.
const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 7b93463244..cf3a2c3707 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -618,11 +618,11 @@ void llvm::emitRegUpdate(MachineBasicBlock &MBB,
int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
if (NumBytes == 0 && DstReg == SrcReg)
return;
- else if (abs(NumBytes) & ~0xffffff) {
+ else if (abs64(NumBytes) & ~0xffffff) {
// Generically, we have to materialize the offset into a temporary register
// and subtract it. There are a couple of ways this could be done, for now
// we'll use a movz/movk or movn/movk sequence.
- uint64_t Bits = static_cast<uint64_t>(abs(NumBytes));
+ uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes));
BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
.addImm(0xffff & Bits).addImm(0)
.setMIFlags(MIFlags);
@@ -673,7 +673,7 @@ void llvm::emitRegUpdate(MachineBasicBlock &MBB,
} else {
LowOp = AArch64::SUBxxi_lsl0_s;
HighOp = AArch64::SUBxxi_lsl12_s;
- NumBytes = abs(NumBytes);
+ NumBytes = abs64(NumBytes);
}
// If we're here, at the very least a move needs to be produced, which just
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index c1695dacb4..69bb80a485 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -160,44 +160,53 @@ private:
SMLoc StartLoc, EndLoc;
+ struct ImmWithLSLOp {
+ const MCExpr *Val;
+ unsigned ShiftAmount;
+ bool ImplicitAmount;
+ };
+
+ struct CondCodeOp {
+ A64CC::CondCodes Code;
+ };
+
+ struct FPImmOp {
+ double Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ShiftExtendOp {
+ A64SE::ShiftExtSpecifiers ShiftType;
+ unsigned Amount;
+ bool ImplicitAmount;
+ };
+
+ struct SysRegOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
union {
- struct {
- const MCExpr *Val;
- unsigned ShiftAmount;
- bool ImplicitAmount;
- } ImmWithLSL;
-
- struct {
- A64CC::CondCodes Code;
- } CondCode;
-
- struct {
- double Val;
- } FPImm;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- struct {
- A64SE::ShiftExtSpecifiers ShiftType;
- unsigned Amount;
- bool ImplicitAmount;
- } ShiftExtend;
-
- struct {
- const char *Data;
- unsigned Length;
- } SysReg;
-
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
+ struct ImmWithLSLOp ImmWithLSL;
+ struct CondCodeOp CondCode;
+ struct FPImmOp FPImm;
+ struct ImmOp Imm;
+ struct RegOp Reg;
+ struct ShiftExtendOp ShiftExtend;
+ struct SysRegOp SysReg;
+ struct TokOp Tok;
};
AArch64Operand(KindTy K, SMLoc S, SMLoc E)
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index ab9bba1836..c6690a96c7 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -194,7 +194,17 @@ const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = {
{"rvbar_el3", RVBAR_EL3},
{"isr_el1", ISR_EL1},
{"cntpct_el0", CNTPCT_EL0},
- {"cntvct_el0", CNTVCT_EL0}
+ {"cntvct_el0", CNTVCT_EL0},
+
+ // GICv3 registers
+ {"icc_iar1_el1", ICC_IAR1_EL1},
+ {"icc_iar0_el1", ICC_IAR0_EL1},
+ {"icc_hppir1_el1", ICC_HPPIR1_EL1},
+ {"icc_hppir0_el1", ICC_HPPIR0_EL1},
+ {"icc_rpr_el1", ICC_RPR_EL1},
+ {"ich_vtr_el2", ICH_VTR_EL2},
+ {"ich_eisr_el2", ICH_EISR_EL2},
+ {"ich_elsr_el2", ICH_ELSR_EL2}
};
A64SysReg::MRSMapper::MRSMapper() {
@@ -205,7 +215,15 @@ A64SysReg::MRSMapper::MRSMapper() {
const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = {
{"dbgdtrtx_el0", DBGDTRTX_EL0},
{"oslar_el1", OSLAR_EL1},
- {"pmswinc_el0", PMSWINC_EL0}
+ {"pmswinc_el0", PMSWINC_EL0},
+
+ // GICv3 registers
+ {"icc_eoir1_el1", ICC_EOIR1_EL1},
+ {"icc_eoir0_el1", ICC_EOIR0_EL1},
+ {"icc_dir_el1", ICC_DIR_EL1},
+ {"icc_sgi1r_el1", ICC_SGI1R_EL1},
+ {"icc_asgi1r_el1", ICC_ASGI1R_EL1},
+ {"icc_sgi0r_el1", ICC_SGI0R_EL1}
};
A64SysReg::MSRMapper::MSRMapper() {
@@ -467,6 +485,56 @@ const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = {
{"pmevtyper28_el0", PMEVTYPER28_EL0},
{"pmevtyper29_el0", PMEVTYPER29_EL0},
{"pmevtyper30_el0", PMEVTYPER30_EL0},
+
+ // GICv3 registers
+ {"icc_bpr1_el1", ICC_BPR1_EL1},
+ {"icc_bpr0_el1", ICC_BPR0_EL1},
+ {"icc_pmr_el1", ICC_PMR_EL1},
+ {"icc_ctlr_el1", ICC_CTLR_EL1},
+ {"icc_ctlr_el3", ICC_CTLR_EL3},
+ {"icc_sre_el1", ICC_SRE_EL1},
+ {"icc_sre_el2", ICC_SRE_EL2},
+ {"icc_sre_el3", ICC_SRE_EL3},
+ {"icc_igrpen0_el1", ICC_IGRPEN0_EL1},
+ {"icc_igrpen1_el1", ICC_IGRPEN1_EL1},
+ {"icc_igrpen1_el3", ICC_IGRPEN1_EL3},
+ {"icc_seien_el1", ICC_SEIEN_EL1},
+ {"icc_ap0r0_el1", ICC_AP0R0_EL1},
+ {"icc_ap0r1_el1", ICC_AP0R1_EL1},
+ {"icc_ap0r2_el1", ICC_AP0R2_EL1},
+ {"icc_ap0r3_el1", ICC_AP0R3_EL1},
+ {"icc_ap1r0_el1", ICC_AP1R0_EL1},
+ {"icc_ap1r1_el1", ICC_AP1R1_EL1},
+ {"icc_ap1r2_el1", ICC_AP1R2_EL1},
+ {"icc_ap1r3_el1", ICC_AP1R3_EL1},
+ {"ich_ap0r0_el2", ICH_AP0R0_EL2},
+ {"ich_ap0r1_el2", ICH_AP0R1_EL2},
+ {"ich_ap0r2_el2", ICH_AP0R2_EL2},
+ {"ich_ap0r3_el2", ICH_AP0R3_EL2},
+ {"ich_ap1r0_el2", ICH_AP1R0_EL2},
+ {"ich_ap1r1_el2", ICH_AP1R1_EL2},
+ {"ich_ap1r2_el2", ICH_AP1R2_EL2},
+ {"ich_ap1r3_el2", ICH_AP1R3_EL2},
+ {"ich_hcr_el2", ICH_HCR_EL2},
+ {"ich_misr_el2", ICH_MISR_EL2},
+ {"ich_vmcr_el2", ICH_VMCR_EL2},
+ {"ich_vseir_el2", ICH_VSEIR_EL2},
+ {"ich_lr0_el2", ICH_LR0_EL2},
+ {"ich_lr1_el2", ICH_LR1_EL2},
+ {"ich_lr2_el2", ICH_LR2_EL2},
+ {"ich_lr3_el2", ICH_LR3_EL2},
+ {"ich_lr4_el2", ICH_LR4_EL2},
+ {"ich_lr5_el2", ICH_LR5_EL2},
+ {"ich_lr6_el2", ICH_LR6_EL2},
+ {"ich_lr7_el2", ICH_LR7_EL2},
+ {"ich_lr8_el2", ICH_LR8_EL2},
+ {"ich_lr9_el2", ICH_LR9_EL2},
+ {"ich_lr10_el2", ICH_LR10_EL2},
+ {"ich_lr11_el2", ICH_LR11_EL2},
+ {"ich_lr12_el2", ICH_LR12_EL2},
+ {"ich_lr13_el2", ICH_LR13_EL2},
+ {"ich_lr14_el2", ICH_LR14_EL2},
+ {"ich_lr15_el2", ICH_LR15_EL2}
};
uint32_t
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 5eebf44431..c9b6e23de3 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -354,13 +354,31 @@ namespace A64SysReg {
RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001
ISR_EL1 = 0xc608, // 11 000 1100 0001 000
CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001
- CNTVCT_EL0 = 0xdf02 // 11 011 1110 0000 010
+ CNTVCT_EL0 = 0xdf02, // 11 011 1110 0000 010
+
+ // GICv3 registers
+ ICC_IAR1_EL1 = 0xc660, // 11 000 1100 1100 000
+ ICC_IAR0_EL1 = 0xc640, // 11 000 1100 1000 000
+ ICC_HPPIR1_EL1 = 0xc662, // 11 000 1100 1100 010
+ ICC_HPPIR0_EL1 = 0xc642, // 11 000 1100 1000 010
+ ICC_RPR_EL1 = 0xc65b, // 11 000 1100 1011 011
+ ICH_VTR_EL2 = 0xe659, // 11 100 1100 1011 001
+ ICH_EISR_EL2 = 0xe65b, // 11 100 1100 1011 011
+ ICH_ELSR_EL2 = 0xe65d // 11 100 1100 1011 101
};
enum SysRegWOValues {
DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000
OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100
- PMSWINC_EL0 = 0xdce4 // 11 011 1001 1100 100
+ PMSWINC_EL0 = 0xdce4, // 11 011 1001 1100 100
+
+ // GICv3 registers
+ ICC_EOIR1_EL1 = 0xc661, // 11 000 1100 1100 001
+ ICC_EOIR0_EL1 = 0xc641, // 11 000 1100 1000 001
+ ICC_DIR_EL1 = 0xc659, // 11 000 1100 1011 001
+ ICC_SGI1R_EL1 = 0xc65d, // 11 000 1100 1011 101
+ ICC_ASGI1R_EL1 = 0xc65e, // 11 000 1100 1011 110
+ ICC_SGI0R_EL1 = 0xc65f // 11 000 1100 1011 111
};
enum SysRegValues {
@@ -616,7 +634,57 @@ namespace A64SysReg {
PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011
PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100
PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101
- PMEVTYPER30_EL0 = 0xdf7e // 11 011 1110 1111 110
+ PMEVTYPER30_EL0 = 0xdf7e, // 11 011 1110 1111 110
+
+ // GICv3 registers
+ ICC_BPR1_EL1 = 0xc663, // 11 000 1100 1100 011
+ ICC_BPR0_EL1 = 0xc643, // 11 000 1100 1000 011
+ ICC_PMR_EL1 = 0xc230, // 11 000 0100 0110 000
+ ICC_CTLR_EL1 = 0xc664, // 11 000 1100 1100 100
+ ICC_CTLR_EL3 = 0xf664, // 11 110 1100 1100 100
+ ICC_SRE_EL1 = 0xc665, // 11 000 1100 1100 101
+ ICC_SRE_EL2 = 0xe64d, // 11 100 1100 1001 101
+ ICC_SRE_EL3 = 0xf665, // 11 110 1100 1100 101
+ ICC_IGRPEN0_EL1 = 0xc666, // 11 000 1100 1100 110
+ ICC_IGRPEN1_EL1 = 0xc667, // 11 000 1100 1100 111
+ ICC_IGRPEN1_EL3 = 0xf667, // 11 110 1100 1100 111
+ ICC_SEIEN_EL1 = 0xc668, // 11 000 1100 1101 000
+ ICC_AP0R0_EL1 = 0xc644, // 11 000 1100 1000 100
+ ICC_AP0R1_EL1 = 0xc645, // 11 000 1100 1000 101
+ ICC_AP0R2_EL1 = 0xc646, // 11 000 1100 1000 110
+ ICC_AP0R3_EL1 = 0xc647, // 11 000 1100 1000 111
+ ICC_AP1R0_EL1 = 0xc648, // 11 000 1100 1001 000
+ ICC_AP1R1_EL1 = 0xc649, // 11 000 1100 1001 001
+ ICC_AP1R2_EL1 = 0xc64a, // 11 000 1100 1001 010
+ ICC_AP1R3_EL1 = 0xc64b, // 11 000 1100 1001 011
+ ICH_AP0R0_EL2 = 0xe640, // 11 100 1100 1000 000
+ ICH_AP0R1_EL2 = 0xe641, // 11 100 1100 1000 001
+ ICH_AP0R2_EL2 = 0xe642, // 11 100 1100 1000 010
+ ICH_AP0R3_EL2 = 0xe643, // 11 100 1100 1000 011
+ ICH_AP1R0_EL2 = 0xe648, // 11 100 1100 1001 000
+ ICH_AP1R1_EL2 = 0xe649, // 11 100 1100 1001 001
+ ICH_AP1R2_EL2 = 0xe64a, // 11 100 1100 1001 010
+ ICH_AP1R3_EL2 = 0xe64b, // 11 100 1100 1001 011
+ ICH_HCR_EL2 = 0xe658, // 11 100 1100 1011 000
+ ICH_MISR_EL2 = 0xe65a, // 11 100 1100 1011 010
+ ICH_VMCR_EL2 = 0xe65f, // 11 100 1100 1011 111
+ ICH_VSEIR_EL2 = 0xe64c, // 11 100 1100 1001 100
+ ICH_LR0_EL2 = 0xe660, // 11 100 1100 1100 000
+ ICH_LR1_EL2 = 0xe661, // 11 100 1100 1100 001
+ ICH_LR2_EL2 = 0xe662, // 11 100 1100 1100 010
+ ICH_LR3_EL2 = 0xe663, // 11 100 1100 1100 011
+ ICH_LR4_EL2 = 0xe664, // 11 100 1100 1100 100
+ ICH_LR5_EL2 = 0xe665, // 11 100 1100 1100 101
+ ICH_LR6_EL2 = 0xe666, // 11 100 1100 1100 110
+ ICH_LR7_EL2 = 0xe667, // 11 100 1100 1100 111
+ ICH_LR8_EL2 = 0xe668, // 11 100 1100 1101 000
+ ICH_LR9_EL2 = 0xe669, // 11 100 1100 1101 001
+ ICH_LR10_EL2 = 0xe66a, // 11 100 1100 1101 010
+ ICH_LR11_EL2 = 0xe66b, // 11 100 1100 1101 011
+ ICH_LR12_EL2 = 0xe66c, // 11 100 1100 1101 100
+ ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101
+ ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110
+ ICH_LR15_EL2 = 0xe66f // 11 100 1100 1101 111
};
// Note that these do not inherit from NamedImmMapper. This class is
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
new file mode 100644
index 0000000000..f0d4dbe2bf
--- /dev/null
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -0,0 +1,704 @@
+//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Cortex-A15 processor employs a tracking scheme in its register renaming
+// in order to process each instruction's micro-ops speculatively and
+// out-of-order with appropriate forwarding. The ARM architecture allows VFP
+// instructions to read and write 32-bit S-registers. Each S-register
+// corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
+//
+// There are several instruction patterns which can be used to provide this
+// capability which can provide higher performance than other, potentially more
+// direct patterns, specifically around when one micro-op reads a D-register
+// operand that has recently been written as one or more S-register results.
+//
+// This file defines a pre-regalloc pass which looks for SPR producers which
+// are going to be used by a DPR (or QPR) consumers and creates the more
+// optimized access pattern.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "a15-sd-optimizer"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <set>
+
+using namespace llvm;
+
+namespace {
+ struct A15SDOptimizer : public MachineFunctionPass {
+ static char ID;
+ A15SDOptimizer() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM A15 S->D optimizer";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ bool runOnInstruction(MachineInstr *MI);
+
+ //
+ // Instruction builder helpers
+ //
+ unsigned createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane,
+ bool QPR=false);
+
+ unsigned createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC);
+
+ unsigned createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1);
+
+ unsigned createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2);
+
+ unsigned createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert);
+
+ unsigned createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL);
+
+ //
+ // Various property checkers
+ //
+ bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
+ bool hasPartialWrite(MachineInstr *MI);
+ SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
+ unsigned getDPRLaneFromSPR(unsigned SReg);
+
+ //
+ // Methods used for getting the definitions of partial registers
+ //
+
+ MachineInstr *elideCopies(MachineInstr *MI);
+ void elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs);
+
+ //
+ // Pattern optimization methods
+ //
+ unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
+ unsigned optimizeSDPattern(MachineInstr *MI);
+ unsigned getPrefSPRLane(unsigned SReg);
+
+ //
+ // Sanitizing method - used to make sure if don't leave dead code around.
+ //
+ void eraseInstrWithNoUses(MachineInstr *MI);
+
+ //
+ // A map used to track the changes done by this pass.
+ //
+ std::map<MachineInstr*, unsigned> Replacements;
+ std::set<MachineInstr *> DeadInstr;
+ };
+ char A15SDOptimizer::ID = 0;
+} // end anonymous namespace
+
+// Returns true if this is a use of a SPR register.
+bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
+ const TargetRegisterClass *TRC) {
+ if (!MO.isReg())
+ return false;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
+ else
+ return TRC->contains(Reg);
+}
+
+unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
+ &ARM::DPRRegClass);
+ if (DReg != ARM::NoRegister) return ARM::ssub_1;
+ return ARM::ssub_0;
+}
+
+// Get the subreg type that is most likely to be coalesced
+// for an SPR register that will be used in VDUP32d pseudo.
+unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
+ if (!TRI->isVirtualRegister(SReg))
+ return getDPRLaneFromSPR(SReg);
+
+ MachineInstr *MI = MRI->getVRegDef(SReg);
+ if (!MI) return ARM::ssub_0;
+ MachineOperand *MO = MI->findRegisterDefOperand(SReg);
+
+ assert(MO->isReg() && "Non register operand found!");
+ if (!MO) return ARM::ssub_0;
+
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ SReg = MI->getOperand(1).getReg();
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+ if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
+ return ARM::ssub_0;
+ }
+ return getDPRLaneFromSPR(SReg);
+}
+
+// MI is known to be dead. Figure out what instructions
+// are also made dead by this and mark them for removal.
+void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
+ SmallVector<MachineInstr *, 8> Front;
+ DeadInstr.insert(MI);
+
+ DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
+ Front.push_back(MI);
+
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // MI is already known to be dead. We need to see
+ // if other instructions can also be removed.
+ for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if ((!MO.isReg()) || (!MO.isUse()))
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TRI->isVirtualRegister(Reg))
+ continue;
+ MachineOperand *Op = MI->findRegisterDefOperand(Reg);
+
+ if (!Op)
+ continue;
+
+ MachineInstr *Def = Op->getParent();
+
+ // We don't need to do anything if we have already marked
+ // this instruction as being dead.
+ if (DeadInstr.find(Def) != DeadInstr.end())
+ continue;
+
+ // Check if all the uses of this instruction are marked as
+ // dead. If so, we can also mark this instruction as being
+ // dead.
+ bool IsDead = true;
+ for (unsigned int j = 0; j < Def->getNumOperands(); ++j) {
+ MachineOperand &MODef = Def->getOperand(j);
+ if ((!MODef.isReg()) || (!MODef.isDef()))
+ continue;
+ unsigned DefReg = MODef.getReg();
+ if (!TRI->isVirtualRegister(DefReg)) {
+ IsDead = false;
+ break;
+ }
+ for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg),
+ EE = MRI->use_end();
+ II != EE; ++II) {
+ // We don't care about self references.
+ if (&*II == Def)
+ continue;
+ if (DeadInstr.find(&*II) == DeadInstr.end()) {
+ IsDead = false;
+ break;
+ }
+ }
+ }
+
+ if (!IsDead) continue;
+
+ DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
+ DeadInstr.insert(Def);
+ }
+ }
+}
+
+// Creates the more optimized patterns and generally does all the code
+// transformations in this pass.
+unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
+ if (MI->isCopy()) {
+ return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
+ }
+
+ if (MI->isInsertSubreg()) {
+ unsigned DPRReg = MI->getOperand(1).getReg();
+ unsigned SPRReg = MI->getOperand(2).getReg();
+
+ if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
+ MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
+
+ if (DPRMI && SPRMI) {
+ // See if the first operand of this insert_subreg is IMPLICIT_DEF
+ MachineInstr *ECDef = elideCopies(DPRMI);
+ if (ECDef != 0 && ECDef->isImplicitDef()) {
+ // Another corner case - if we're inserting something that is purely
+ // a subreg copy of a DPR, just use that DPR.
+
+ MachineInstr *EC = elideCopies(SPRMI);
+ // Is it a subreg copy of ssub_0?
+ if (EC && EC->isCopy() &&
+ EC->getOperand(1).getSubReg() == ARM::ssub_0) {
+ DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
+
+ // Find the thing we're subreg copying out of - is it of the same
+ // regclass as DPRMI? (i.e. a DPR or QPR).
+ unsigned FullReg = SPRMI->getOperand(1).getReg();
+ const TargetRegisterClass *TRC =
+ MRI->getRegClass(MI->getOperand(1).getReg());
+ if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
+ DEBUG(dbgs() << "Subreg copy is compatible - returning ");
+ DEBUG(dbgs() << PrintReg(FullReg) << "\n");
+ eraseInstrWithNoUses(MI);
+ return FullReg;
+ }
+ }
+
+ return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
+ }
+ }
+ }
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ // See if all bar one of the operands are IMPLICIT_DEF and insert the
+ // optimizer pattern accordingly.
+ unsigned NumImplicit = 0, NumTotal = 0;
+ unsigned NonImplicitReg = ~0U;
+
+ for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
+ if (!MI->getOperand(I).isReg())
+ continue;
+ ++NumTotal;
+ unsigned OpReg = MI->getOperand(I).getReg();
+
+ if (!TRI->isVirtualRegister(OpReg))
+ break;
+
+ MachineInstr *Def = MRI->getVRegDef(OpReg);
+ if (!Def)
+ break;
+ if (Def->isImplicitDef())
+ ++NumImplicit;
+ else
+ NonImplicitReg = MI->getOperand(I).getReg();
+ }
+
+ if (NumImplicit == NumTotal - 1)
+ return optimizeAllLanesPattern(MI, NonImplicitReg);
+ else
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ assert(0 && "Unhandled update pattern!");
+ return 0;
+}
+
+// Return true if this MachineInstr inserts a scalar (SPR) value into
+// a D or Q register.
+bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
+ // The only way we can do a partial register update is through a COPY,
+ // INSERT_SUBREG or REG_SEQUENCE.
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
+ &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ return false;
+}
+
+// Looks through full copies to get the instruction that defines the input
+// operand for MI.
+MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
+ if (!MI->isFullCopy())
+ return MI;
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ return NULL;
+ MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!Def)
+ return NULL;
+ return elideCopies(Def);
+}
+
+// Look through full copies and PHIs to get the set of non-copy MachineInstrs
+// that can produce MI.
+void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs) {
+ // Looking through PHIs may create loops so we need to track what
+ // instructions we have visited before.
+ std::set<MachineInstr *> Reached;
+ SmallVector<MachineInstr *, 8> Front;
+ Front.push_back(MI);
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // If we have already explored this MachineInstr, ignore it.
+ if (Reached.find(MI) != Reached.end())
+ continue;
+ Reached.insert(MI);
+ if (MI->isPHI()) {
+ for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+ unsigned Reg = MI->getOperand(I).getReg();
+ if (!TRI->isVirtualRegister(Reg)) {
+ continue;
+ }
+ MachineInstr *NewMI = MRI->getVRegDef(Reg);
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ }
+ } else if (MI->isFullCopy()) {
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ continue;
+ MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ } else {
+ DEBUG(dbgs() << "Found partial copy" << *MI <<"\n");
+ Outs.push_back(MI);
+ }
+ }
+}
+
+// Return the DPR virtual registers that are read by this machine instruction
+// (if any).
+SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
+ if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
+ MI->isKill())
+ return SmallVector<unsigned, 8>();
+
+ SmallVector<unsigned, 8> Defs;
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ if (!usesRegClass(MO, &ARM::DPRRegClass) &&
+ !usesRegClass(MO, &ARM::QPRRegClass))
+ continue;
+
+ Defs.push_back(MO.getReg());
+ }
+ return Defs;
+}
+
+// Creates a DPR register from an SPR one by using a VDUP.
+unsigned
+A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane, bool QPR) {
+ unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
+ &ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
+ Out)
+ .addReg(Reg)
+ .addImm(Lane));
+
+ return Out;
+}
+
+// Creates a SPR register from a DPR by copying the value in lane 0.
+unsigned
+A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC) {
+ unsigned Out = MRI->createVirtualRegister(TRC);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::COPY), Out)
+ .addReg(DReg, 0, Lane);
+
+ return Out;
+}
+
+// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
+unsigned
+A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::REG_SEQUENCE), Out)
+ .addReg(Reg1)
+ .addImm(ARM::dsub_0)
+ .addReg(Reg2)
+ .addImm(ARM::dsub_1);
+ return Out;
+}
+
+// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
+// and merges them into one DPR register.
+unsigned
+A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(ARM::VEXTd32), Out)
+ .addReg(Ssub0)
+ .addReg(Ssub1)
+ .addImm(1));
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::INSERT_SUBREG), Out)
+ .addReg(DReg)
+ .addReg(ToInsert)
+ .addImm(Lane);
+
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::IMPLICIT_DEF), Out);
+ return Out;
+}
+
+// This function inserts instructions in order to optimize interactions between
+// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
+// lanes, and the using VEXT instructions to recompose the result.
+unsigned
+A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
+ MachineBasicBlock::iterator InsertPt(MI);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock &MBB = *MI->getParent();
+ InsertPt++;
+ unsigned Out;
+
+ if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
+ unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_0, &ARM::DPRRegClass);
+ unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_1, &ARM::DPRRegClass);
+
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
+ unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
+ Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
+
+ Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
+
+ } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ } else {
+ assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
+ "Found unexpected regclass!");
+
+ unsigned PrefLane = getPrefSPRLane(Reg);
+ unsigned Lane;
+ switch (PrefLane) {
+ case ARM::ssub_0: Lane = 0; break;
+ case ARM::ssub_1: Lane = 1; break;
+ default: llvm_unreachable("Unknown preferred lane!");
+ }
+
+ bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
+
+ Out = createImplicitDef(MBB, InsertPt, DL);
+ Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
+ Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
+ eraseInstrWithNoUses(MI);
+ }
+ return Out;
+}
+
+bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
+ // We look for instructions that write S registers that are then read as
+ // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
+ // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
+ // merge two SPR values to form a DPR register. In order avoid false
+ // positives we make sure that there is an SPR producer so we look past
+ // COPY and PHI nodes to find it.
+ //
+ // The best code pattern for when an SPR producer is going to be used by a
+ // DPR or QPR consumer depends on whether the other lanes of the
+ // corresponding DPR/QPR are currently defined.
+ //
+ // We can handle these efficiently, depending on the type of
+ // pseudo-instruction that is producing the pattern
+ //
+ // * COPY: * VDUP all lanes and merge the results together
+ // using VEXTs.
+ //
+ // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
+ // lane, and the other lane(s) of the DPR/QPR register
+ // that we are inserting in are undefined, use the
+ // original DPR/QPR value.
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+ // * REG_SEQUENCE: * If all except one of the input operands are
+ // IMPLICIT_DEFs, insert the VDUP pattern for just the
+ // defined input operand
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+
+ // First, get all the reads of D-registers done by this instruction.
+ SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
+ bool Modified = false;
+
+ for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ // Follow the def-use chain for this DPR through COPYs, and also through
+ // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
+ // we can end up with multiple defs of this DPR.
+
+ SmallVector<MachineInstr *, 8> DefSrcs;
+ if (!TRI->isVirtualRegister(*I))
+ continue;
+ MachineInstr *Def = MRI->getVRegDef(*I);
+ if (!Def)
+ continue;
+
+ elideCopiesAndPHIs(Def, DefSrcs);
+
+ for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(),
+ EE = DefSrcs.end(); II != EE; ++II) {
+ MachineInstr *MI = *II;
+
+ // If we've already analyzed and replaced this operand, don't do
+ // anything.
+ if (Replacements.find(MI) != Replacements.end())
+ continue;
+
+ // Now, work out if the instruction causes a SPR->DPR dependency.
+ if (!hasPartialWrite(MI))
+ continue;
+
+ // Collect all the uses of this MI's DPR def for updating later.
+ SmallVector<MachineOperand*, 8> Uses;
+ unsigned DPRDefReg = MI->getOperand(0).getReg();
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
+ E = MRI->use_end(); I != E; ++I)
+ Uses.push_back(&I.getOperand());
+
+ // We can optimize this.
+ unsigned NewReg = optimizeSDPattern(MI);
+
+ if (NewReg != 0) {
+ Modified = true;
+ for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ DEBUG(dbgs() << "Replacing operand "
+ << **I << " with "
+ << PrintReg(NewReg) << "\n");
+ (*I)->substVirtReg(NewReg, 0, *TRI);
+ }
+ }
+ Replacements[MI] = NewReg;
+ }
+ }
+ return Modified;
+}
+
+bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ bool Modified = false;
+
+ DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n");
+
+ DeadInstr.clear();
+ Replacements.clear();
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+
+ for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end();
+ MI != ME;) {
+ Modified |= runOnInstruction(MI++);
+ }
+
+ }
+
+ for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
+ E = DeadInstr.end();
+ I != E; ++I) {
+ (*I)->eraseFromParent();
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createA15SDOptimizerPass() {
+ return new A15SDOptimizer();
+}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 5faf8c320c..80e5f37eb0 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
JITCodeEmitter &JCE);
+FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMGlobalBaseRegPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 46915eecf6..68380847a0 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -143,14 +143,12 @@ include "ARMSchedule.td"
// ARM processor families.
def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
"Cortex-A5 ARM processors",
- [FeatureSlowFPBrcc, FeatureNEONForFP,
- FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
- FeatureT2XtPk]>;
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk]>;
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
"Cortex-A8 ARM processors",
- [FeatureSlowFPBrcc, FeatureNEONForFP,
- FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
- FeatureT2XtPk]>;
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk]>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
"Cortex-A9 ARM processors",
[FeatureVMLxForwarding,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 58c779830e..13ec208793 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1357,7 +1357,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
.addReg(ARM::PC)
- .addImm(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
// Add predicate operands.
.addImm(ARMCC::AL)
.addReg(0)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index ed001ea24a..126f160f6d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1125,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!WidenVMOVS || !MI->isCopy())
+ if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
return false;
// Look for a copy between even S-registers. That is where we keep floats
@@ -3734,9 +3734,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
- // A9-like cores are particularly picky about mixing the two and want these
+ // CortexA9 is particularly picky about mixing the two and wants these
// converted.
- if (Subtarget.isLikeA9() && !isPredicated(MI) &&
+ if (Subtarget.isCortexA9() && !isPredicated(MI) &&
(MI->getOpcode() == ARM::VMOVRS ||
MI->getOpcode() == ARM::VMOVSR ||
MI->getOpcode() == ARM::VMOVS))
@@ -4023,14 +4023,12 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
//
// FCONSTD can be used as a dependency-breaking instruction.
-
-
unsigned ARMBaseInstrInfo::
getPartialRegUpdateClearance(const MachineInstr *MI,
unsigned OpNum,
const TargetRegisterInfo *TRI) const {
- // Only Swift has partial register update problems.
- if (!SwiftPartialUpdateClearance || !Subtarget.isSwift())
+ if (!SwiftPartialUpdateClearance ||
+ !(Subtarget.isSwift() || Subtarget.isCortexA15()))
return 0;
assert(TRI && "Need TRI instance");
@@ -4056,7 +4054,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
// Explicitly reads the dependency.
case ARM::VLD1LNd32:
- UseOp = 1;
+ UseOp = 3;
break;
default:
return 0;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index abdd251743..b6b27f849a 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -680,7 +680,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// means the stack pointer cannot be used to access the emergency spill slot
// when !hasReservedCallFrame().
#ifndef NDEBUG
- if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+ if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
assert(TFI->hasReservedCallFrame(MF) &&
"Cannot use SP to access the emergency spill slot in "
"functions without a reserved call frame");
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 0ca6450e2b..7a02adf246 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
return FnSize;
}
-/// estimateStackSize - Estimate and return the size of the frame.
-/// FIXME: Make generic?
-static unsigned estimateStackSize(MachineFunction &MF) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- int Offset = 0;
-
- // This code is very, very similar to PEI::calculateFrameObjectOffsets().
- // It really should be refactored to share code. Until then, changes
- // should keep in mind that there's tight coupling between the two.
-
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -MFI->getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
- continue;
- Offset += MFI->getObjectSize(i);
- unsigned Align = MFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
-
- MaxAlign = std::max(Align, MaxAlign);
- }
-
- if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
- Offset += MFI->getMaxCallFrameSize();
-
- // Round up the size to a multiple of the alignment. If the function has
- // any calls or alloca's, align to the target's StackAlignment value to
- // ensure that the callee's frame or the alloca data is suitably aligned;
- // otherwise, for leaf functions, align to the TransientStackAlignment
- // value.
- unsigned StackAlign;
- if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
- else
- StackAlign = TFI->getTransientStackAlignment();
-
- // If the frame pointer is eliminated, all frame offsets will be relative to
- // SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (unsigned)Offset;
-}
-
/// estimateRSStackSizeLimit - Look at each instruction that references stack
/// frames and return the stack size limit beyond which some of these
/// instructions will require a scratch register during their expansion later.
@@ -1235,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// we've used all the registers and so R4 is already used, so not marking
// it here will be OK.
// FIXME: It will be better just to find spare register here.
- unsigned StackSize = estimateStackSize(MF);
+ unsigned StackSize = MFI->estimateStackSize(MF);
if (MFI->hasVarSizedObjects() || StackSize > 508)
MRI.setPhysRegUsed(ARM::R4);
}
@@ -1330,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// worth the effort and added fragility?
bool BigStack =
(RS &&
- (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ (MFI->estimateStackSize(MF) +
+ ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
estimateRSStackSizeLimit(MF, this)))
|| MFI->hasVarSizedObjects()
|| (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
@@ -1419,7 +1368,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
// closest to SP or frame pointer.
const TargetRegisterClass *RC = &ARM::GPRRegClass;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 514971f01e..bb26090d2d 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -564,6 +564,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ // Custom expand long extensions to vectors.
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
@@ -870,8 +880,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// are at least 4 bytes aligned.
setMinStackArgumentAlignment(4);
- BenefitFromCodePlacementOpt = true;
-
// Prefer likely predicted branches to selects on out-of-order cores.
PredictableSelectIsExpensive = Subtarget->isLikeA9();
@@ -3433,6 +3441,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
+/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
+/// and size(DestVec) > 128-bits.
+/// This is achieved by doing the one extension from the SrcVec, splitting the
+/// result, extending these parts, and then concatenating these into the
+/// destination.
+static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DestVT = N->getValueType(0);
+
+ assert(DestVT.getSizeInBits() > 128 &&
+ "Custom sext/zext expansion needs >128-bit vector.");
+ // If this is a normal length extension, use the default expansion.
+ if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
+ SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
+ return SDValue();
+
+ DebugLoc dl = N->getDebugLoc();
+ unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
+ unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
+ unsigned NumElts = SrcVT.getVectorNumElements();
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
+
+ EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts);
+ EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts/2);
+ EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
+ NumElts/2);
+
+ Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
+ SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(0));
+ SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(NumElts/2));
+ ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
+ ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -5621,6 +5670,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = ExpandVectorExtension(N, DAG);
+ break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 9409f35974..c2f357e84f 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -1010,7 +1010,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let isReMaterializable = 1 in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -1022,7 +1023,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
}
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
iir, opc, "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1037,7 +1039,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
def rsi : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]>,
+ Sched<[WriteALUsi, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1052,7 +1055,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
def rsr : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1079,7 +1083,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
let isReMaterializable = 1 in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> {
+ [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -1091,7 +1096,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
}
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
iir, opc, "\t$Rd, $Rn, $Rm",
- [/* pattern left blank */]> {
+ [/* pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1105,7 +1111,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
def rsi : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> {
+ [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1120,7 +1127,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
def rsr : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
+ [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1145,24 +1153,28 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
bit Commutable = 0> {
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
4, iii,
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]>;
def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
4, iir,
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> {
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
let isCommutable = Commutable;
}
def rsi : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
- so_reg_imm:$shift))]>;
+ so_reg_imm:$shift))]>,
+ Sched<[WriteALUsi, ReadALU]>;
def rsr : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
- so_reg_reg:$shift))]>;
+ so_reg_reg:$shift))]>,
+ Sched<[WriteALUSsr, ReadALUsr]>;
}
}
@@ -1174,19 +1186,22 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
bit Commutable = 0> {
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
4, iii,
- [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]>;
def rsi : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift,
- GPR:$Rn))]>;
+ GPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]>;
def rsr : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift,
- GPR:$Rn))]>;
+ GPR:$Rn))]>,
+ Sched<[WriteALUSsr, ReadALUsr]>;
}
}
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 98bd6c168e..e4e683c2a0 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -865,7 +865,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
- if (isLd && MI->getOperand(0).getReg() == Base)
+ if (MI->getOperand(0).getReg() == Base)
return false;
unsigned PredReg = 0;
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 02196d06bf..7eb5ff665a 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -6,6 +6,70 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction scheduling annotations for out-of-order CPUs.
+// These annotations are independent of the itinerary class defined below.
+// Here we define the subtarget independent read/write per-operand resources.
+// The subtarget schedule definitions will then map these to the subtarget's
+// resource usages.
+// For example:
+// The instruction cycle timings table might contain an entry for an operation
+// like the following:
+// Rd <- ADD Rn, Rm, <shift> Rs
+// Uops | Latency from register | Uops - resource requirements - latency
+// 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3
+// | | uopc Rd, Rn, T0 - P01 - 1
+// This is telling us that the result will be available in destination register
+// Rd after a minimum of three cycles after the result in Rm and Rs is available
+// and one cycle after the result in Rn is available. The micro-ops can execute
+// on resource P01.
+// To model this, we need to express that we need to dispatch two micro-ops,
+// that the resource P01 is needed and that the latency to Rn is different than
+// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by
+// two.
+// We will do this by assigning (abstract) resources to register defs/uses.
+// ARMSchedule.td:
+// def WriteALUsr : SchedWrite;
+// def ReadAdvanceALUsr : ScheRead;
+//
+// ARMInstrInfo.td:
+// def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault,
+// ReadDefault]> { ...}
+// ReadAdvance read resources allow us to define "pipeline by-passes" or
+// shorter latencies to certain registers as needed in the example above.
+// The "ReadDefault" can be omitted.
+// Next, the subtarget td file assigns resources to the abstract resources
+// defined here.
+// ARMScheduleSubtarget.td:
+// // Resources.
+// def P01 : ProcResource<3>; // ALU unit (3 of it).
+// ...
+// // Resource usages.
+// def : WriteRes<WriteALUsr, [P01, P01]> {
+// Latency = 4; // Latency of 4.
+// NumMicroOps = 2; // Dispatch 2 micro-ops.
+// // The two instances of resource P01 are occupied for one cycle. It is one
+// // cycle because these resources happen to be pipelined.
+// ResourceCycles = [1, 1];
+// }
+// def : ReadAdvance<ReadAdvanceALUsr, 3>;
+
+// Basic ALU operation.
+def WriteALU : SchedWrite;
+def ReadALU : SchedRead;
+
+// Basic ALU with shifts.
+def WriteALUsi : SchedWrite; // Shift by immediate.
+def WriteALUsr : SchedWrite; // Shift by register.
+def WriteALUSsr : SchedWrite; // Shift by register (flag setting).
+def ReadALUsr : SchedRead; // Some operands are read later.
+
+// Define TII for use in SchedVariant Predicates.
+def : PredicateProlog<[{
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
+ (void)TII;
+}]>;
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for ARM
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 4191931a5a..382e6cc4cd 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1898,6 +1898,8 @@ def CortexA9Model : SchedMachineModel {
//===----------------------------------------------------------------------===//
// Define each kind of processor resource and number available.
+let SchedModel = CortexA9Model in {
+
def A9UnitALU : ProcResource<2>;
def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
def A9UnitAGU : ProcResource<1>;
@@ -1918,11 +1920,11 @@ def A9WriteI : SchedWriteRes<[A9UnitALU]>;
def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
// Basic ALU.
-def A9WriteA : SchedWriteRes<[A9UnitALU]>;
+def : WriteRes<WriteALU, [A9UnitALU]>;
// ALU with operand shifted by immediate.
-def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
+def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
// ALU with operand shifted by register.
-def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
+def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
// Multiplication
def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
@@ -2003,13 +2005,6 @@ foreach NumCycles = 2-8 in {
def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
} // foreach NumCycles
-// Define TII for use in SchedVariant Predicates.
-def : PredicateProlog<[{
- const ARMBaseInstrInfo *TII =
- static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
- (void)TII;
-}]>;
-
// Define address generation sequences and predicates for 8 flavors of LDMs.
foreach NumAddr = 1-8 in {
@@ -2254,11 +2249,11 @@ def A9WriteLMfp : SchedWriteVariant<[
// These mov immediate writers are unconditionally expanded with
// additive latency.
def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
-def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>;
+def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
// Some ALU operations can read loaded integer values one cycle early.
-def A9ReadA : SchedReadAdvance<1,
+def A9ReadALU : SchedReadAdvance<1,
[A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
@@ -2279,26 +2274,25 @@ def A9Read4 : SchedReadAdvance<3>;
// This table follows the ARM Cortex-A9 Technical Reference Manuals,
// mostly in order.
-let SchedModel = CortexA9Model in {
def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
IIC_iMVNi,IIC_iMVNsi,
IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
-def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>;
+def :ItinRW<[A9WriteI,ReadALU],[IIC_iMVNr]>;
def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
-def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
-def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
-def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>;
-def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
-def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>;
-def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB
-def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
-def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>;
+def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
+def :ItinRW<[WriteALU, ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
+def :ItinRW<[WriteALU, ReadALU, ReadALU],[IIC_iALUr,IIC_iCMPr]>;
+def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
+def :ItinRW<[WriteALUsi, ReadALU], [IIC_iALUsi]>;
+def :ItinRW<[WriteALUsi, ReadDefault, ReadALU], [IIC_iALUsir]>; // RSB
+def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
+def :ItinRW<[A9WriteALUsr, ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
// A9WriteHi ignored for MUL32.
def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
@@ -2371,7 +2365,7 @@ def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
IIC_iStore_m,
IIC_iStore_mu]>;
def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
-def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>;
+def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
@@ -2486,4 +2480,11 @@ def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
+
+// Map SchedRWs that are identical for cortexa9 to existing resources.
+def : SchedAlias<WriteALUsr, A9WriteALUsr>;
+def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
+def : SchedAlias<ReadALU, A9ReadALU>;
+def : SchedAlias<ReadALUsr, A9ReadALU>;
+
} // SchedModel = CortexA9Model
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index e9bc3e0f39..28bb429feb 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1078,8 +1078,29 @@ def SwiftModel : SchedMachineModel {
let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
let LoadLatency = 3;
+ let MispredictPenalty = 14; // A branch direction mispredict.
let Itineraries = SwiftItineraries;
}
-// TODO: Add Swift processor and scheduler resources.
+// Swift resource mapping.
+let SchedModel = SwiftModel in {
+ // Processor resources.
+ def SwiftUnitP01 : ProcResource<2>; // ALU unit.
+ def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
+ def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
+ def SwiftUnitP2 : ProcResource<1>; // LS unit.
+ def SwiftUnitDiv : ProcResource<1>;
+
+ // 4.2.4 Arithmetic and Logical.
+ // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
+ // AND,BIC, EOR,ORN,ORR
+ // CLZ,RBIT,REV,REV16,REVSH,PKH
+ // Single cycle.
+ def : WriteRes<WriteALU, [SwiftUnitP01]>;
+ def : WriteRes<WriteALUsi, [SwiftUnitP01]>;
+ def : WriteRes<WriteALUsr, [SwiftUnitP01]>;
+ def : WriteRes<WriteALUSsr, [SwiftUnitP01]>;
+ def : ReadAdvance<ReadALU, 0>;
+ def : ReadAdvance<ReadALUsr, 2>;
+}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e11314d4fc..739300e4ef 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -42,12 +43,13 @@ StrictAlign("arm-strict-align", cl::Hidden,
cl::desc("Disallow all unaligned memory accesses"));
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS)
+ const std::string &FS, const TargetOptions &Options)
: ARMGenSubtargetInfo(TT, CPU, FS)
, ARMProcFamily(Others)
, stackAlignment(4)
, CPUString(CPU)
, TargetTriple(TT)
+ , Options(Options)
, TargetABI(ARM_ABI_APCS) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
@@ -92,6 +94,7 @@ void ARMSubtarget::initializeEnvironment() {
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
+ UnsafeFPMath = false;
}
void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
@@ -162,6 +165,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// configuration.
if (!StrictAlign && hasV6Ops() && isTargetDarwin())
AllowsUnalignedMem = true;
+
+ // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
+ uint64_t Bits = getFeatureBits();
+ if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
+ (Options.UnsafeFPMath || isTargetDarwin()))
+ UseNEONForSinglePrecisionFP = true;
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8ce22e1de2..5b5ee6aeb8 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,6 +26,7 @@
namespace llvm {
class GlobalValue;
class StringRef;
+class TargetOptions;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
@@ -159,6 +160,9 @@ protected:
/// NaCl TRAP instruction is generated instead of the regular TRAP.
bool UseNaClTrap;
+ /// Target machine allowed unsafe FP math (such as use of NEON fp)
+ bool UnsafeFPMath;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -175,6 +179,9 @@ protected:
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
+ /// Options passed via command line that could influence the target
+ const TargetOptions &Options;
+
public:
enum {
isELF, isDarwin
@@ -189,7 +196,7 @@ protected:
/// of the specified triple.
///
ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
+ const std::string &FS, const TargetOptions &Options);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 774521852a..42c7d2c437 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden,
cl::desc("Enable global merge pass"),
cl::init(true));
+static cl::opt<bool>
+DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
+ cl::desc("Inhibit optimization of S->D register accesses on A15"),
+ cl::init(false));
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -43,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
+ Subtarget(TT, CPU, FS, Options),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
// Default to soft float ABI
@@ -164,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() {
addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9())
addPass(createMLxExpansionPass());
+ // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+ // enabled when NEON is available.
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() &&
+ getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
+ addPass(createA15SDOptimizerPass());
+ }
return true;
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 01c04b48cf..1019b972e9 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -177,6 +177,23 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ // Single to/from double precision conversions.
+ static const CostTblEntry<MVT> NEONFltDblTbl[] = {
+ // Vector fptrunc/fpext conversions.
+ { ISD::FP_ROUND, MVT::v2f64, 2 },
+ { ISD::FP_EXTEND, MVT::v2f32, 2 },
+ { ISD::FP_EXTEND, MVT::v4f32, 4 }
+ };
+
+ if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
+ ISD == ISD::FP_EXTEND)) {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * NEONFltDblTbl[Idx].Cost;
+ }
+
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
@@ -194,17 +211,71 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+ // The number of vmovl instructions for the extension.
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
+ // Operations that we legalize using load/stores to the stack.
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+
// Vector float <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
// Vector double <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
};
if (SrcTy.isVector() && ST->hasNEON()) {
@@ -247,7 +318,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
return NEONFloatConversionTbl[Idx].Cost;
}
-
// Scalar integer to float conversions.
static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
@@ -303,7 +373,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
return ARMIntegerConversionTbl[Idx].Cost;
}
-
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
}
@@ -326,6 +395,25 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ // Lowering of some vector selects is currently far from perfect.
+ static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = {
+ { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
+ };
+
+ EVT SelCondTy = TLI->getValueType(CondTy);
+ EVT SelValTy = TLI->getValueType(ValTy);
+ int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
+ array_lengthof(NEONVectorSelectTbl),
+ ISD, SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONVectorSelectTbl[Idx].Cost;
+
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
return LT.first;
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 6c678fdbd7..ed7b7ec9d2 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -316,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
SmallVector<unsigned, 8> Registers;
+ struct CCOp {
+ ARMCC::CondCodes Val;
+ };
+
+ struct CopOp {
+ unsigned Val;
+ };
+
+ struct CoprocOptionOp {
+ unsigned Val;
+ };
+
+ struct ITMaskOp {
+ unsigned Mask:4;
+ };
+
+ struct MBOptOp {
+ ARM_MB::MemBOpt Val;
+ };
+
+ struct IFlagsOp {
+ ARM_PROC::IFlags Val;
+ };
+
+ struct MMaskOp {
+ unsigned Val;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ // A vector register list is a sequential list of 1 to 4 registers.
+ struct VectorListOp {
+ unsigned RegNum;
+ unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
+ };
+
+ struct VectorIndexOp {
+ unsigned Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ /// Combined record for all forms of ARM address expressions.
+ struct MemoryOp {
+ unsigned BaseRegNum;
+ // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
+ // was specified.
+ const MCConstantExpr *OffsetImm; // Offset immediate value
+ unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
+ ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
+ unsigned ShiftImm; // shift for OffsetReg.
+ unsigned Alignment; // 0 = no alignment specified
+ // n = alignment in bytes (2, 4, 8, 16, or 32)
+ unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
+ };
+
+ struct PostIdxRegOp {
+ unsigned RegNum;
+ bool isAdd;
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned ShiftImm;
+ };
+
+ struct ShifterImmOp {
+ bool isASR;
+ unsigned Imm;
+ };
+
+ struct RegShiftedRegOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftReg;
+ unsigned ShiftImm;
+ };
+
+ struct RegShiftedImmOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftImm;
+ };
+
+ struct RotImmOp {
+ unsigned Imm;
+ };
+
+ struct BitfieldOp {
+ unsigned LSB;
+ unsigned Width;
+ };
+
union {
- struct {
- ARMCC::CondCodes Val;
- } CC;
-
- struct {
- unsigned Val;
- } Cop;
-
- struct {
- unsigned Val;
- } CoprocOption;
-
- struct {
- unsigned Mask:4;
- } ITMask;
-
- struct {
- ARM_MB::MemBOpt Val;
- } MBOpt;
-
- struct {
- ARM_PROC::IFlags Val;
- } IFlags;
-
- struct {
- unsigned Val;
- } MMask;
-
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- // A vector register list is a sequential list of 1 to 4 registers.
- struct {
- unsigned RegNum;
- unsigned Count;
- unsigned LaneIndex;
- bool isDoubleSpaced;
- } VectorList;
-
- struct {
- unsigned Val;
- } VectorIndex;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- /// Combined record for all forms of ARM address expressions.
- struct {
- unsigned BaseRegNum;
- // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
- // was specified.
- const MCConstantExpr *OffsetImm; // Offset immediate value
- unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
- ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
- unsigned ShiftImm; // shift for OffsetReg.
- unsigned Alignment; // 0 = no alignment specified
- // n = alignment in bytes (2, 4, 8, 16, or 32)
- unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
- } Memory;
-
- struct {
- unsigned RegNum;
- bool isAdd;
- ARM_AM::ShiftOpc ShiftTy;
- unsigned ShiftImm;
- } PostIdxReg;
-
- struct {
- bool isASR;
- unsigned Imm;
- } ShifterImm;
- struct {
- ARM_AM::ShiftOpc ShiftTy;
- unsigned SrcReg;
- unsigned ShiftReg;
- unsigned ShiftImm;
- } RegShiftedReg;
- struct {
- ARM_AM::ShiftOpc ShiftTy;
- unsigned SrcReg;
- unsigned ShiftImm;
- } RegShiftedImm;
- struct {
- unsigned Imm;
- } RotImm;
- struct {
- unsigned LSB;
- unsigned Width;
- } Bitfield;
+ struct CCOp CC;
+ struct CopOp Cop;
+ struct CoprocOptionOp CoprocOption;
+ struct MBOptOp MBOpt;
+ struct ITMaskOp ITMask;
+ struct IFlagsOp IFlags;
+ struct MMaskOp MMask;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct VectorListOp VectorList;
+ struct VectorIndexOp VectorIndex;
+ struct ImmOp Imm;
+ struct MemoryOp Memory;
+ struct PostIdxRegOp PostIdxReg;
+ struct ShifterImmOp ShifterImm;
+ struct RegShiftedRegOp RegShiftedReg;
+ struct RegShiftedImmOp RegShiftedImm;
+ struct RotImmOp RotImm;
+ struct BitfieldOp Bitfield;
};
ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -4569,20 +4593,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return true;
case AsmToken::Identifier: {
- if (!tryParseRegisterWithWriteBack(Operands))
- return false;
- int Res = tryParseShiftRegister(Operands);
- if (Res == 0) // success
- return false;
- else if (Res == -1) // irrecoverable error
- return true;
- // If this is VMRS, check for the apsr_nzcv operand.
- if (Mnemonic == "vmrs" &&
- Parser.getTok().getString().equals_lower("apsr_nzcv")) {
- S = Parser.getTok().getLoc();
- Parser.Lex();
- Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
- return false;
+ // If we've seen a branch mnemonic, the next operand must be a label. This
+ // is true even if the label is a register name. So "br r1" means branch to
+ // label "r1".
+ bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl";
+ if (!ExpectLabel) {
+ if (!tryParseRegisterWithWriteBack(Operands))
+ return false;
+ int Res = tryParseShiftRegister(Operands);
+ if (Res == 0) // success
+ return false;
+ else if (Res == -1) // irrecoverable error
+ return true;
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (Mnemonic == "vmrs" &&
+ Parser.getTok().getString().equals_lower("apsr_nzcv")) {
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
+ return false;
+ }
}
// Fall though for the Identifier case that is not a register or a
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 586834cf73..b832508a08 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
+ A15SDOptimizer.cpp
ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 31a3b0b524..2e009e55e3 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -3049,9 +3049,9 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4,
+ if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4,
true, 2, Inst, Decoder))
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
+ Inst.addOperand(MCOperand::CreateImm(Val << 1));
return MCDisassembler::Success;
}
@@ -3278,7 +3278,7 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
if (load) {
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index 463c440852..a64707e6f3 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -173,7 +173,6 @@ GCC is doing a couple of clever things here:
mov r1, #1
lsl r1, r1, #8
tst r2, r1
-
//===---------------------------------------------------------------------===//
@@ -196,7 +195,6 @@ This is especially bad when dynamic alloca is used. The all fixed size stack
objects are referenced off the frame pointer with negative offsets. See
oggenc for an example.
-
//===---------------------------------------------------------------------===//
Poor codegen test/CodeGen/ARM/select.ll f7:
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 609d502aa5..7452fb776e 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -588,7 +588,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// means the stack pointer cannot be used to access the emergency spill slot
// when !hasReservedCallFrame().
#ifndef NDEBUG
- if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+ if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
"Cannot use SP to access the emergency spill slot in "
"functions without a reserved call frame");
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 604abf93cc..3e69098edc 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -518,7 +518,6 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
attrs.removeAttribute(Attribute::StackAlignment);
}
- assert(!attrs.hasAttributes() && "Unhandled attribute!");
Out << "PAS = AttributeSet::get(mod->getContext(), ";
if (index == ~0U)
Out << "~0U,";
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 62aed1353c..178662447a 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -701,7 +701,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// If the induction variable bump is not a power of 2, quit.
// Othwerise we'd need a general integer division.
- if (!isPowerOf2_64(abs(IVBump)))
+ if (!isPowerOf2_64(abs64(IVBump)))
return 0;
MachineBasicBlock *PH = Loop->getLoopPreheader();
@@ -1430,7 +1430,6 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
return 0;
typedef MachineBasicBlock::instr_iterator instr_iterator;
- typedef MachineBasicBlock::pred_iterator pred_iterator;
// Verify that all existing predecessors have analyzable branches
// (or no branches at all).
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 3a1c48bac9..8fc9ba1ee8 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -113,6 +113,46 @@ public:
SDNode *SelectAdd(SDNode *N);
bool isConstExtProfitable(SDNode *N) const;
+// XformMskToBitPosU5Imm - Returns the bit position which
+// the single bit 32 bit mask represents.
+// Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU5Imm(uint32_t Imm) {
+ int32_t bitPos;
+ bitPos = Log2_32(Imm);
+ assert(bitPos >= 0 && bitPos < 32 &&
+ "Constant out of range for 32 BitPos Memops");
+ return CurDAG->getTargetConstant(bitPos, MVT::i32);
+}
+
+// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit
+// mask represents. Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU4Imm(uint16_t Imm) {
+ return XformMskToBitPosU5Imm(Imm);
+}
+
+// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit
+// mask represents. Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU3Imm(uint8_t Imm) {
+ return XformMskToBitPosU5Imm(Imm);
+}
+
+// Return true if there is exactly one bit set in V, i.e., if V is one of the
+// following integers: 2^0, 2^1, ..., 2^31.
+bool ImmIsSingleBit(uint32_t v) const {
+ uint32_t c = CountPopulation_64(v);
+ // Only return true if we counted 1 bit.
+ return c == 1;
+}
+
+// XformM5ToU5Imm - Return a target constant with the specified value, of type
+// i32 where the negative literal is transformed into a positive literal for
+// use in -= memops.
+inline SDValue XformM5ToU5Imm(signed Imm) {
+ assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
+ return CurDAG->getTargetConstant( - Imm, MVT::i32);
+}
+
+
// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
// [1..128], used in cmpb.gtu instructions.
inline SDValue XformU7ToU7M1Imm(signed Imm) {
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 4c0f93c6cd..60b12ac01c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -537,6 +537,15 @@ MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return(0);
}
+MachineInstr*
+HexagonInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx, uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Hexagon::DBG_VALUE))
+ .addImm(0).addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
+}
unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
@@ -1881,6 +1890,13 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
}
+bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ assert(isPredicated(MI));
+ return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
bool
HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
@@ -1949,6 +1965,10 @@ isValidOffset(const int Opcode, const int Offset) const {
// the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
// inserted to calculate the final address. Due to this reason, the function
// assumes that the "Offset" has correct alignment.
+ // We used to assert if the offset was not properly aligned, however,
+ // there are cases where a misaligned pointer recast can cause this
+ // problem, and we need to allow for it. The front end warns of such
+ // misaligns with respect to load size.
switch(Opcode) {
@@ -1958,7 +1978,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::STriw_indexed:
case Hexagon::STriw:
case Hexagon::STriw_f:
- assert((Offset % 4 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
(Offset <= Hexagon_MEMW_OFFSET_MAX);
@@ -1968,14 +1987,12 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::STrid:
case Hexagon::STrid_indexed:
case Hexagon::STrid_f:
- assert((Offset % 8 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
(Offset <= Hexagon_MEMD_OFFSET_MAX);
case Hexagon::LDrih:
case Hexagon::LDriuh:
case Hexagon::STrih:
- assert((Offset % 2 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
(Offset <= Hexagon_MEMH_OFFSET_MAX);
@@ -1990,48 +2007,28 @@ isValidOffset(const int Opcode, const int Offset) const {
return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
(Offset <= Hexagon_ADDI_OFFSET_MAX);
- case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMw_ORr_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDi_MEM_V4 :
- case Hexagon::MEMw_SUBi_MEM_V4 :
- case Hexagon::MEMw_ADDr_MEM_V4 :
- case Hexagon::MEMw_SUBr_MEM_V4 :
- case Hexagon::MEMw_ANDr_MEM_V4 :
- case Hexagon::MEMw_ORr_MEM_V4 :
- assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
+ case Hexagon::MemOPw_ADDi_V4 :
+ case Hexagon::MemOPw_SUBi_V4 :
+ case Hexagon::MemOPw_ADDr_V4 :
+ case Hexagon::MemOPw_SUBr_V4 :
+ case Hexagon::MemOPw_ANDr_V4 :
+ case Hexagon::MemOPw_ORr_V4 :
return (0 <= Offset && Offset <= 255);
- case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMh_ORr_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDi_MEM_V4 :
- case Hexagon::MEMh_SUBi_MEM_V4 :
- case Hexagon::MEMh_ADDr_MEM_V4 :
- case Hexagon::MEMh_SUBr_MEM_V4 :
- case Hexagon::MEMh_ANDr_MEM_V4 :
- case Hexagon::MEMh_ORr_MEM_V4 :
- assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
+ case Hexagon::MemOPh_ADDi_V4 :
+ case Hexagon::MemOPh_SUBi_V4 :
+ case Hexagon::MemOPh_ADDr_V4 :
+ case Hexagon::MemOPh_SUBr_V4 :
+ case Hexagon::MemOPh_ANDr_V4 :
+ case Hexagon::MemOPh_ORr_V4 :
return (0 <= Offset && Offset <= 127);
- case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMb_ORr_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDi_MEM_V4 :
- case Hexagon::MEMb_SUBi_MEM_V4 :
- case Hexagon::MEMb_ADDr_MEM_V4 :
- case Hexagon::MEMb_SUBr_MEM_V4 :
- case Hexagon::MEMb_ANDr_MEM_V4 :
- case Hexagon::MEMb_ORr_MEM_V4 :
+ case Hexagon::MemOPb_ADDi_V4 :
+ case Hexagon::MemOPb_SUBi_V4 :
+ case Hexagon::MemOPb_ADDr_V4 :
+ case Hexagon::MemOPb_SUBr_V4 :
+ case Hexagon::MemOPb_ANDr_V4 :
+ case Hexagon::MemOPb_ORr_V4 :
return (0 <= Offset && Offset <= 63);
// LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
@@ -2087,44 +2084,33 @@ isMemOp(const MachineInstr *MI) const {
switch (MI->getOpcode())
{
default: return false;
- case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMw_ORr_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDi_MEM_V4 :
- case Hexagon::MEMw_SUBi_MEM_V4 :
- case Hexagon::MEMw_ADDr_MEM_V4 :
- case Hexagon::MEMw_SUBr_MEM_V4 :
- case Hexagon::MEMw_ANDr_MEM_V4 :
- case Hexagon::MEMw_ORr_MEM_V4 :
- case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMh_ORr_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDi_MEM_V4 :
- case Hexagon::MEMh_SUBi_MEM_V4 :
- case Hexagon::MEMh_ADDr_MEM_V4 :
- case Hexagon::MEMh_SUBr_MEM_V4 :
- case Hexagon::MEMh_ANDr_MEM_V4 :
- case Hexagon::MEMh_ORr_MEM_V4 :
- case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
- case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
- case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
- case Hexagon::MEMb_ORr_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDi_MEM_V4 :
- case Hexagon::MEMb_SUBi_MEM_V4 :
- case Hexagon::MEMb_ADDr_MEM_V4 :
- case Hexagon::MEMb_SUBr_MEM_V4 :
- case Hexagon::MEMb_ANDr_MEM_V4 :
- case Hexagon::MEMb_ORr_MEM_V4 :
- return true;
+ case Hexagon::MemOPw_ADDi_V4 :
+ case Hexagon::MemOPw_SUBi_V4 :
+ case Hexagon::MemOPw_ADDr_V4 :
+ case Hexagon::MemOPw_SUBr_V4 :
+ case Hexagon::MemOPw_ANDr_V4 :
+ case Hexagon::MemOPw_ORr_V4 :
+ case Hexagon::MemOPh_ADDi_V4 :
+ case Hexagon::MemOPh_SUBi_V4 :
+ case Hexagon::MemOPh_ADDr_V4 :
+ case Hexagon::MemOPh_SUBr_V4 :
+ case Hexagon::MemOPh_ANDr_V4 :
+ case Hexagon::MemOPh_ORr_V4 :
+ case Hexagon::MemOPb_ADDi_V4 :
+ case Hexagon::MemOPb_SUBi_V4 :
+ case Hexagon::MemOPb_ADDr_V4 :
+ case Hexagon::MemOPb_SUBr_V4 :
+ case Hexagon::MemOPb_ANDr_V4 :
+ case Hexagon::MemOPb_ORr_V4 :
+ case Hexagon::MemOPb_SETBITi_V4:
+ case Hexagon::MemOPh_SETBITi_V4:
+ case Hexagon::MemOPw_SETBITi_V4:
+ case Hexagon::MemOPb_CLRBITi_V4:
+ case Hexagon::MemOPh_CLRBITi_V4:
+ case Hexagon::MemOPw_CLRBITi_V4:
+ return true;
}
+ return false;
}
@@ -2383,6 +2369,13 @@ isConditionalStore (const MachineInstr* MI) const {
}
}
+// Returns true, if any one of the operands is a dot new
+// insn, whether it is predicated dot new or register dot new.
+bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
+ return (isNewValueInst(MI) ||
+ (isPredicated(MI) && isPredicatedNew(MI)));
+}
+
unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
const uint64_t F = MI->getDesc().TSFlags;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index d2f059aa79..5df13a88b5 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -127,6 +127,7 @@ public:
const BranchProbability &Probability) const;
virtual bool isPredicated(const MachineInstr *MI) const;
+ virtual bool isPredicatedNew(const MachineInstr *MI) const;
virtual bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const;
virtual bool
@@ -140,6 +141,11 @@ public:
isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles,
const BranchProbability &Probability) const;
+ virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const;
virtual DFAPacketizer*
CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const;
@@ -170,6 +176,7 @@ public:
bool isConditionalLoad (const MachineInstr* MI) const;
bool isConditionalStore(const MachineInstr* MI) const;
bool isNewValueInst(const MachineInstr* MI) const;
+ bool isDotNewInst(const MachineInstr* MI) const;
bool isDeallocRet(const MachineInstr *MI) const;
unsigned getInvertedPredicatedOpcode(const int Opc) const;
bool isExtendable(const MachineInstr* MI) const;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index d7bab200f9..74dc0ca72a 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -446,38 +446,58 @@ def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
s8ExtPred:$src2,
s8ImmPred:$src3)))]>;
-// Shift halfword.
-let isPredicable = 1 in
-def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = aslh($src1)",
- [(set (i32 IntRegs:$dst), (shl 16, (i32 IntRegs:$src1)))]>;
+// ALU32 - aslh, asrh, sxtb, sxth, zxtb, zxth
+multiclass ALU32_2op_Pbase<string mnemonic, bit isNot, bit isPredNew> {
+ let isPredicatedNew = isPredNew in
+ def NAME : ALU32Inst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
+ ") $dst = ")#mnemonic#"($src2)">,
+ Requires<[HasV4T]>;
+}
-let isPredicable = 1 in
-def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = asrh($src1)",
- [(set (i32 IntRegs:$dst), (sra 16, (i32 IntRegs:$src1)))]>;
+multiclass ALU32_2op_Pred<string mnemonic, bit PredNot> {
+ let isPredicatedFalse = PredNot in {
+ defm _c#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 1>;
+ }
+}
-// Sign extend.
-let isPredicable = 1 in
-def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = sxtb($src1)",
- [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i8))]>;
+multiclass ALU32_2op_base<string mnemonic> {
+ let BaseOpcode = mnemonic in {
+ let isPredicable = 1, neverHasSideEffects = 1 in
+ def NAME : ALU32Inst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1),
+ "$dst = "#mnemonic#"($src1)">;
-let isPredicable = 1 in
-def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = sxth($src1)",
- [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i16))]>;
-
-// Zero extend.
-let isPredicable = 1, neverHasSideEffects = 1 in
-def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = zxtb($src1)",
- []>;
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT, isPredicated = 1,
+ neverHasSideEffects = 1 in {
+ defm Pt_V4 : ALU32_2op_Pred<mnemonic, 0>;
+ defm NotPt_V4 : ALU32_2op_Pred<mnemonic, 1>;
+ }
+ }
+}
+
+defm ASLH : ALU32_2op_base<"aslh">, PredNewRel;
+defm ASRH : ALU32_2op_base<"asrh">, PredNewRel;
+defm SXTB : ALU32_2op_base<"sxtb">, PredNewRel;
+defm SXTH : ALU32_2op_base<"sxth">, PredNewRel;
+defm ZXTB : ALU32_2op_base<"zxtb">, PredNewRel;
+defm ZXTH : ALU32_2op_base<"zxth">, PredNewRel;
+
+def : Pat <(shl (i32 IntRegs:$src1), (i32 16)),
+ (ASLH IntRegs:$src1)>;
+
+def : Pat <(sra (i32 IntRegs:$src1), (i32 16)),
+ (ASRH IntRegs:$src1)>;
+
+def : Pat <(sext_inreg (i32 IntRegs:$src1), i8),
+ (SXTB IntRegs:$src1)>;
+
+def : Pat <(sext_inreg (i32 IntRegs:$src1), i16),
+ (SXTH IntRegs:$src1)>;
-let isPredicable = 1, neverHasSideEffects = 1 in
-def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = zxth($src1)",
- []>;
//===----------------------------------------------------------------------===//
// ALU32/PERM -
//===----------------------------------------------------------------------===//
@@ -488,29 +508,30 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
//===----------------------------------------------------------------------===//
// Conditional combine.
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1 in {
def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1) $dst = combine($src2, $src3)",
[]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedFalse = 1 in
def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1) $dst = combine($src2, $src3)",
[]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedNew = 1 in
def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if ($src1.new) $dst = combine($src2, $src3)",
[]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedNew = 1, isPredicatedFalse = 1 in
def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"if (!$src1.new) $dst = combine($src2, $src3)",
[]>;
+}
// Compare.
defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel;
@@ -1009,20 +1030,6 @@ def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
(LDrid_indexed IntRegs:$src1, s11_3ExtPred:$offset) >;
}
-let neverHasSideEffects = 1 in
-def LDrid_GP : LDInst2<(outs DoubleRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memd(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDd_GP : LDInst2<(outs DoubleRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memd(#$global)",
- []>,
- Requires<[NoV4T]>;
-
//===----------------------------------------------------------------------===//
// Post increment load
// Make sure that in post increment load, the first operand is always the post
@@ -1095,27 +1102,6 @@ let AddedComplexity = 20 in
def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
(i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >;
-let neverHasSideEffects = 1 in
-def LDrib_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memb(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDb_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memb(#$global)",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDub_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memub(#$global)",
- []>,
- Requires<[NoV4T]>;
-
def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)),
(i32 (LDrih ADDRriS11_1:$addr))>;
@@ -1123,27 +1109,6 @@ let AddedComplexity = 20 in
def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))),
(i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >;
-let neverHasSideEffects = 1 in
-def LDrih_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memh(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDh_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memh(#$global)",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDuh_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memuh(#$global)",
- []>,
- Requires<[NoV4T]>;
-
let AddedComplexity = 10 in
def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)),
(i32 (LDriub ADDRriS11_0:$addr))>;
@@ -1152,21 +1117,6 @@ let AddedComplexity = 20 in
def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))),
(i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>;
-let neverHasSideEffects = 1 in
-def LDriub_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memub(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
-
-// Load unsigned halfword.
-let neverHasSideEffects = 1 in
-def LDriuh_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memuh(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
-
// Load predicate.
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in
@@ -1175,21 +1125,6 @@ def LDriw_pred : LDInst2<(outs PredRegs:$dst),
"Error; should not emit",
[]>;
-// Indexed load.
-let neverHasSideEffects = 1 in
-def LDriw_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst = memw(#$global+$offset)",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def LDw_GP : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst = memw(#$global)",
- []>,
- Requires<[NoV4T]>;
-
// Deallocate stack frame.
let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
def DEALLOCFRAME : LDInst2<(outs), (ins),
@@ -1423,28 +1358,8 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
// ST +
//===----------------------------------------------------------------------===//
///
-/// Assumptions::: ****** DO NOT IGNORE ********
-/// 1. Make sure that in post increment store, the zero'th operand is always the
-/// post increment operand.
-/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
-/// last operand.
-///
// Store doubleword.
-let neverHasSideEffects = 1 in
-def STrid_GP : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
- "memd(#$global+$offset) = $src",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def STd_GP : STInst2<(outs),
- (ins globaladdress:$global, DoubleRegs:$src),
- "memd(#$global) = $src",
- []>,
- Requires<[NoV4T]>;
-
//===----------------------------------------------------------------------===//
// Post increment store
//===----------------------------------------------------------------------===//
@@ -1655,36 +1570,6 @@ def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2,
(i64 DoubleRegs:$src1))>;
}
-// memb(gp+#u16:0)=Rt
-let neverHasSideEffects = 1 in
-def STrib_GP : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memb(#$global+$offset) = $src",
- []>,
- Requires<[NoV4T]>;
-
-// memb(#global)=Rt
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def STb_GP : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memb(#$global) = $src",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1 in
-def STrih_GP : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memh(#$global+$offset) = $src",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def STh_GP : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memh(#$global) = $src",
- []>,
- Requires<[NoV4T]>;
-
// memh(Rx++#s4:1)=Rt.H
// Store word.
@@ -1695,20 +1580,6 @@ def STriw_pred : STInst2<(outs),
"Error; should not emit",
[]>;
-let neverHasSideEffects = 1 in
-def STriw_GP : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memw(#$global+$offset) = $src",
- []>,
- Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1, validSubTargets = NoV4SubT in
-def STw_GP : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memw(#$global) = $src",
- []>,
- Requires<[NoV4T]>;
-
// Allocate stack frame.
let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
def ALLOCFRAME : STInst2<(outs),
@@ -2183,68 +2054,26 @@ def : Pat<(HexagonTCRet (i32 IntRegs:$dst)),
// Atomic load and store support
// 8 bit atomic load
-def : Pat<(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDub_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
def : Pat<(atomic_load_8 ADDRriS11_0:$src1),
(i32 (LDriub ADDRriS11_0:$src1))>;
def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)),
(i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>;
-
-
// 16 bit atomic load
-def : Pat<(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDuh_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
def : Pat<(atomic_load_16 ADDRriS11_1:$src1),
(i32 (LDriuh ADDRriS11_1:$src1))>;
def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)),
(i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>;
-
-
-// 32 bit atomic load
-def : Pat<(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDw_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
def : Pat<(atomic_load_32 ADDRriS11_2:$src1),
(i32 (LDriw ADDRriS11_2:$src1))>;
def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)),
(i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>;
-
// 64 bit atomic load
-def : Pat<(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
- (i64 (LDd_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
def : Pat<(atomic_load_64 ADDRriS11_3:$src1),
(i64 (LDrid ADDRriS11_3:$src1))>;
@@ -2252,30 +2081,6 @@ def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)),
(i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>;
-// 64 bit atomic store
-def : Pat<(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
- (i64 DoubleRegs:$src1)),
- (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i64 DoubleRegs:$src1)),
- (STrid_GP tglobaladdr:$global, u16ImmPred:$offset,
- (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>;
-
-// 8 bit atomic store
-def : Pat<(atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STrib_GP tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>, Requires<[NoV4T]>;
-
def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)),
(STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>;
@@ -2285,18 +2090,6 @@ def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset),
(i32 IntRegs:$src1))>;
-// 16 bit atomic store
-def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STrih_GP tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>, Requires<[NoV4T]>;
-
def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)),
(STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>;
@@ -2305,20 +2098,6 @@ def : Pat<(atomic_store_16 (i32 IntRegs:$src1),
(STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset,
(i32 IntRegs:$src1))>;
-
-// 32 bit atomic store
-def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STw_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STriw_GP tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)),
(STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>;
@@ -2387,198 +2166,8 @@ def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
(i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
-// Map from store(globaladdress + x) -> memd(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrid_GP tglobaladdr:$global, u16ImmPred:$offset,
- (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memd(#foo).
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memw(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memw(#foo + 0).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>;
-
-// Map from store(globaladdress) -> memw(#foo).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei16 (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei16 (i32 IntRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei8 (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei8 (i32 IntRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memw(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDw_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memd(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo + 0).
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i64 (LDd_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd.
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i1 (TFR_PdRs (i32 (LDb_GP tglobaladdr:$global))))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDrih_GP tglobaladdr:$global, 0))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memuh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memuh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDriuh_GP tglobaladdr:$global, 0))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDh_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memuh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDuh_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memub(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memub(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDub_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-// When the Interprocedural Global Variable optimizer realizes that a
-// certain global variable takes only two constant values, it shrinks the
-// global to a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDub_GP tglobaladdr:$global))>,
- Requires<[NoV4T]>;
-
// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
+let AddedComplexity = 10 in
def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
(i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>;
@@ -2694,12 +2283,6 @@ def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
(STrib ADDRriS11_2:$addr, (TFRI 1))>;
-let AddedComplexity = 100 in
-// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1;
-// memw(#foo) = r0
-def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP tglobaladdr:$global, (TFRI 1))>,
- Requires<[NoV4T]>;
// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 1d0643d03b..cd0e475896 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -95,164 +95,6 @@ def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr),
//===----------------------------------------------------------------------===//
// ALU32 +
//===----------------------------------------------------------------------===//
-
-// Shift halfword.
-
-let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
-def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = aslh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = aslh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = aslh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = aslh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = asrh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = asrh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = asrh($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = asrh($src2)",
- []>,
- Requires<[HasV4T]>;
-}
-
-// Sign extend.
-
-let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
-def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = sxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = sxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = sxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = sxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-
-def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = sxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = sxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = sxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = sxth($src2)",
- []>,
- Requires<[HasV4T]>;
-}
-
-// Zero exten.
-
-let neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in {
-def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = zxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = zxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = zxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = zxtb($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1) $dst = zxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1) $dst = zxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) $dst = zxth($src2)",
- []>,
- Requires<[HasV4T]>;
-
-def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) $dst = zxth($src2)",
- []>,
- Requires<[HasV4T]>;
-}
-
// Generate frame index addresses.
let neverHasSideEffects = 1, isReMaterializable = 1,
isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in
@@ -596,329 +438,6 @@ def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))),
Requires<[HasV4T]>;
}
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memd(#$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memd(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-
-// if (!Pv) Rtt=memd(##global)
-def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memd(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rtt=memd(##global)
-def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memd(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-
-// if (!Pv) Rtt=memd(##global)
-def LDd_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memd(##$global)",
- []>,
- Requires<[HasV4T]>;
-}
-
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memb(#$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memb(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memb(##global)
-def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memb(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memb(##global)
-def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memb(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memb(##global)
-def LDb_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memb(##$global)",
- []>,
- Requires<[HasV4T]>;
-}
-
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memub(#$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memub(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memub(##global)
-def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memub(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memub(##global)
-def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memub(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memub(##global)
-def LDub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memub(##$global)",
- []>,
- Requires<[HasV4T]>;
-}
-
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memh(#$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memh(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memh(##global)
-def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memh(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memh(##global)
-def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memh(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memh(##global)
-def LDh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memh(##$global)",
- []>,
- Requires<[HasV4T]>;
-}
-
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memuh(#$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memuh(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memuh(##global)
-def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memuh(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memuh(##global)
-def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memuh(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memuh(##global)
-def LDuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memuh(##$global)",
- []>,
- Requires<[HasV4T]>;
-}
-
-let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
-def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global),
- "$dst=memw(#$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2,
-validSubTargets = HasV4SubT in {
-def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1) $dst=memw(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memw(##global)
-def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1) $dst=memw(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) Rt=memw(##global)
-def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if ($src1.new) $dst=memw(##$global)",
- []>,
- Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memw(##global)
-def LDw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global),
- "if (!$src1.new) $dst=memw(##$global)",
- []>,
- Requires<[HasV4T]>;
-}
-
-
-def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
- (i64 (LDd_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDw_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
- (i32 (LDub_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo + 0)
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i64 (LDd_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>,
- Requires<[HasV4T]>;
-
-// When the Interprocedural Global Variable optimizer realizes that a certain
-// global variable takes only two constant values, it shrinks the global to
-// a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDb_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDub_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memub(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDub_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDh_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDh_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memuh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i32 (LDw_GP_V4 tglobaladdr:$global))>,
- Requires<[HasV4T]>;
-
// zext i1->i64
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
(i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
@@ -1374,225 +893,6 @@ def STriw_shl_V4 : STInst<(outs),
// memw(Rx++I:circ(Mu))=Rt
// memw(Rx++Mu)=Rt
// memw(Rx++Mu:brev)=Rt
-// memw(gp+#u16:2)=Rt
-
-
-// memd(#global)=Rtt
-let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1,
-validSubTargets = HasV4SubT in
-def STd_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, DoubleRegs:$src),
- "memd(#$global) = $src",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memd(##global) = Rtt
-let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1,
-isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
-def STd_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
- "if ($src1) memd(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memd(##global) = Rtt
-def STd_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
- "if (!$src1) memd(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memd(##global) = Rtt
-def STd_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
- "if ($src1.new) memd(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memd(##global) = Rtt
-def STd_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
- "if (!$src1.new) memd(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-}
-
-// memb(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STb_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memb(#$global) = $src",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1,
-isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
-def STb_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memb(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-def STb_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memb(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-def STb_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memb(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-def STb_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memb(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-}
-
-// memh(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STh_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memh(#$global) = $src",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1,
-isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
-def STh_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memh(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-def STh_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memh(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-def STh_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memh(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-def STh_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memh(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-}
-
-// memw(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STw_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memw(#$global) = $src",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1,
-isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
-def STw_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memw(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-def STw_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memw(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-def STw_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memw(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-def STw_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memw(##$global) = $src2",
- []>,
- Requires<[HasV4T]>;
-}
-
-// 64 bit atomic store
-def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
- (i64 DoubleRegs:$src1)),
- (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memd(#foo)
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// 8 bit atomic store
-def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
-// to "r0 = 1; memw(#foo) = r0"
-let AddedComplexity = 100 in
-def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1),
- (HexagonCONST32_GP tglobaladdr:$global)),
- (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// 32 bit atomic store
-def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
- (i32 IntRegs:$src1)),
- (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memw(#foo)
-let AddedComplexity = 100 in
-def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
- (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
//===----------------------------------------------------------------------===
// ST -
@@ -1772,15 +1072,6 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
// memb(Rx++I:circ(Mu))=Nt.new
// memb(Rx++Mu)=Nt.new
// memb(Rx++Mu:brev)=Nt.new
-
-// memb(#global)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memb(#$global) = $src.new",
- []>,
- Requires<[HasV4T]>;
-
// memh(Ru<<#u2+#U6)=Nt.new
let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
isNVStore = 1, validSubTargets = HasV4SubT in
@@ -1795,14 +1086,6 @@ def STrih_shl_nv_V4 : NVInst_V4<(outs),
// memh(Rx++Mu)=Nt.new
// memh(Rx++Mu:brev)=Nt.new
-// memh(#global)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memh(#$global) = $src.new",
- []>,
- Requires<[HasV4T]>;
-
// memw(Ru<<#u2+#U6)=Nt.new
let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
isNVStore = 1, validSubTargets = HasV4SubT in
@@ -1816,102 +1099,6 @@ def STriw_shl_nv_V4 : NVInst_V4<(outs),
// memw(Rx++I:circ(Mu))=Nt.new
// memw(Rx++Mu)=Nt.new
// memw(Rx++Mu:brev)=Nt.new
-// memw(gp+#u16:2)=Nt.new
-
-let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1,
-validSubTargets = HasV4SubT in
-def STw_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, IntRegs:$src),
- "memw(#$global) = $src.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1,
-isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
-def STb_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memb(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memb(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memb(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memb(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-def STh_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memh(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memh(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memh(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memh(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-def STw_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1) memw(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1) memw(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if ($src1.new) memw(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
- "if (!$src1.new) memw(##$global) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-}
//===----------------------------------------------------------------------===//
// NV/ST -
@@ -2658,414 +1845,367 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
// MEMOP: Word, Half, Byte
//===----------------------------------------------------------------------===//
+def MEMOPIMM : SDNodeXForm<imm, [{
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int32_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm);
+}]>;
+
+def MEMOPIMM_HALF : SDNodeXForm<imm, [{
+ // -1 .. -31 represented as 65535..65515
+ // assigning to a short restores our desired signed value.
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int16_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm);
+}]>;
+
+def MEMOPIMM_BYTE : SDNodeXForm<imm, [{
+ // -1 .. -31 represented as 255..235
+ // assigning to a char restores our desired signed value.
+ // Call the transformation function XformM5ToU5Imm to get the negative
+ // immediate's positive counterpart.
+ int8_t imm = N->getSExtValue();
+ return XformM5ToU5Imm(imm);
+}]>;
+
+def SETMEMIMM : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-31].
+ // As an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformMskToBitPosU5Imm(imm);
+}]>;
+
+def CLRMEMIMM : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-31].
+ // As an SDNode.
+ // we bit negate the value first
+ int32_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU5Imm(imm);
+}]>;
+
+def SETMEMIMM_SHORT : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-15].
+ // As an SDNode.
+ int16_t imm = N->getSExtValue();
+ return XformMskToBitPosU4Imm(imm);
+}]>;
+
+def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-15].
+ // As an SDNode.
+ // we bit negate the value first
+ int16_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU4Imm(imm);
+}]>;
+
+def SETMEMIMM_BYTE : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-7].
+ // As an SDNode.
+ int8_t imm = N->getSExtValue();
+ return XformMskToBitPosU3Imm(imm);
+}]>;
+
+def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{
+ // Return the bit position we will clear [0-7].
+ // As an SDNode.
+ // we bit negate the value first
+ int8_t imm = ~(N->getSExtValue());
+ return XformMskToBitPosU3Imm(imm);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Template class for MemOp instructions with the register value.
+//===----------------------------------------------------------------------===//
+class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
+ string memOp, bits<2> memOpBits> :
+ MEMInst_V4<(outs),
+ (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta),
+ opc#"($base+#$offset)"#memOp#"$delta",
+ []>,
+ Requires<[HasV4T, UseMEMOP]> {
+
+ bits<5> base;
+ bits<5> delta;
+ bits<32> offset;
+ bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+ let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+ !if (!eq(opcBits, 0b01), offset{6-1},
+ !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+ let IClass = 0b0011;
+ let Inst{27-24} = 0b1110;
+ let Inst{22-21} = opcBits;
+ let Inst{20-16} = base;
+ let Inst{13} = 0b0;
+ let Inst{12-7} = offsetBits;
+ let Inst{6-5} = memOpBits;
+ let Inst{4-0} = delta;
+}
+
//===----------------------------------------------------------------------===//
-// MEMOP: Word
-//
-// Implemented:
-// MEMw_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5
-// MEMw_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5
-// MEMw_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt
-// MEMw_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt
-// MEMw_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt
-// MEMw_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt
-// MEMw_ADDi_V4 : memw(Rs+#u6:2)+=#U5
-// MEMw_SUBi_V4 : memw(Rs+#u6:2)-=#U5
-// MEMw_ADDr_V4 : memw(Rs+#u6:2)+=Rt
-// MEMw_SUBr_V4 : memw(Rs+#u6:2)-=Rt
-// MEMw_CLRr_V4 : memw(Rs+#u6:2)&=Rt
-// MEMw_SETr_V4 : memw(Rs+#u6:2)|=Rt
-//
-// Not implemented:
-// MEMw_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5)
-// MEMw_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5)
-// MEMw_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5)
-// MEMw_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5)
+// Template class for MemOp instructions with the immediate value.
//===----------------------------------------------------------------------===//
+class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
+ string memOp, bits<2> memOpBits> :
+ MEMInst_V4 <(outs),
+ (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta),
+ opc#"($base+#$offset)"#memOp#"#$delta"
+ #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')'
+ []>,
+ Requires<[HasV4T, UseMEMOP]> {
+ bits<5> base;
+ bits<5> delta;
+ bits<32> offset;
+ bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+ let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+ !if (!eq(opcBits, 0b01), offset{6-1},
+ !if (!eq(opcBits, 0b10), offset{7-2},0)));
-// memw(Rs+#u6:2) += #U5
-let AddedComplexity = 30 in
-def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend),
- "memw($base+#$offset) += #$addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+ let IClass = 0b0011;
+ let Inst{27-24} = 0b1111;
+ let Inst{22-21} = opcBits;
+ let Inst{20-16} = base;
+ let Inst{13} = 0b0;
+ let Inst{12-7} = offsetBits;
+ let Inst{6-5} = memOpBits;
+ let Inst{4-0} = delta;
+}
-// memw(Rs+#u6:2) -= #U5
-let AddedComplexity = 30 in
-def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend),
- "memw($base+#$offset) -= #$subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += Rt
-let AddedComplexity = 30 in
-def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend),
- "memw($base+#$offset) += $addend",
- [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
- (i32 IntRegs:$addend)),
- (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) -= Rt
-let AddedComplexity = 30 in
-def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend),
- "memw($base+#$offset) -= $subend",
- [(store (sub (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
- (i32 IntRegs:$subend)),
- (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) &= Rt
-let AddedComplexity = 30 in
-def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend),
- "memw($base+#$offset) &= $andend",
- [(store (and (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
- (i32 IntRegs:$andend)),
- (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) |= Rt
-let AddedComplexity = 30 in
-def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend),
- "memw($base+#$offset) |= $orend",
- [(store (or (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
- (i32 IntRegs:$orend)),
- (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += #U5
-let AddedComplexity = 30 in
-def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$addend),
- "memw($addr) += $addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+// multiclass to define MemOp instructions with register operand.
+multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> {
+ def _ADD#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add
+ def _SUB#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub
+ def _AND#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and
+ def _OR#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or
+}
-// memw(Rs+#u6:2) -= #U5
-let AddedComplexity = 30 in
-def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$subend),
- "memw($addr) -= $subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += Rt
-let AddedComplexity = 30 in
-def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$addend),
- "memw($addr) += $addend",
- [(store (add (load ADDRriU6_2:$addr), (i32 IntRegs:$addend)),
- ADDRriU6_2:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) -= Rt
-let AddedComplexity = 30 in
-def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$subend),
- "memw($addr) -= $subend",
- [(store (sub (load ADDRriU6_2:$addr), (i32 IntRegs:$subend)),
- ADDRriU6_2:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) &= Rt
-let AddedComplexity = 30 in
-def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$andend),
- "memw($addr) &= $andend",
- [(store (and (load ADDRriU6_2:$addr), (i32 IntRegs:$andend)),
- ADDRriU6_2:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) |= Rt
-let AddedComplexity = 30 in
-def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$orend),
- "memw($addr) |= $orend",
- [(store (or (load ADDRriU6_2:$addr), (i32 IntRegs:$orend)),
- ADDRriU6_2:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
+// multiclass to define MemOp instructions with immediate Operand.
+multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> {
+ def _ADD#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >;
+ def _SUB#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >;
+ def _CLRBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =clrbit(", 0b10>;
+ def _SETBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =setbit(", 0b11>;
+}
+
+multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
+ defm r : MemOp_rr <opc, opcBits, ImmOp>;
+ defm i : MemOp_ri <opc, opcBits, ImmOp>;
+}
+
+// Define MemOp instructions.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0,
+validSubTargets =HasV4SubT in {
+ let opExtentBits = 6, accessSize = ByteAccess in
+ defm MemOPb : MemOp_base <"memb", 0b00, u6_0Ext>;
+
+ let opExtentBits = 7, accessSize = HalfWordAccess in
+ defm MemOPh : MemOp_base <"memh", 0b01, u6_1Ext>;
+
+ let opExtentBits = 8, accessSize = WordAccess in
+ defm MemOPw : MemOp_base <"memw", 0b10, u6_2Ext>;
+}
//===----------------------------------------------------------------------===//
-// MEMOP: Halfword
-//
-// Implemented:
-// MEMh_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5
-// MEMh_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5
-// MEMh_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt
-// MEMh_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt
-// MEMh_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt
-// MEMh_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt
-// MEMh_ADDi_V4 : memw(Rs+#u6:2)+=#U5
-// MEMh_SUBi_V4 : memw(Rs+#u6:2)-=#U5
-// MEMh_ADDr_V4 : memw(Rs+#u6:2)+=Rt
-// MEMh_SUBr_V4 : memw(Rs+#u6:2)-=Rt
-// MEMh_CLRr_V4 : memw(Rs+#u6:2)&=Rt
-// MEMh_SETr_V4 : memw(Rs+#u6:2)|=Rt
-//
-// Not implemented:
-// MEMh_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5)
-// MEMh_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5)
-// MEMh_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5)
-// MEMh_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5)
+// Multiclass to define 'Def Pats' for ALU operations on the memory
+// Here value used for the ALU operation is an immediate value.
+// mem[bh](Rs+#0) += #U5
+// mem[bh](Rs+#u6) += #U5
//===----------------------------------------------------------------------===//
+multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+ InstHexagon MI, SDNode OpNode> {
+ let AddedComplexity = 180 in
+ def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
+ IntRegs:$addr),
+ (MI IntRegs:$addr, #0, u5ImmPred:$addend )>;
-// memh(Rs+#u6:1) += #U5
-let AddedComplexity = 30 in
-def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend),
- "memh($base+#$offset) += $addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+ let AddedComplexity = 190 in
+ def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)),
+ u5ImmPred:$addend),
+ (add IntRegs:$base, ExtPred:$offset)),
+ (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>;
+}
-// memh(Rs+#u6:1) -= #U5
-let AddedComplexity = 30 in
-def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend),
- "memh($base+#$offset) -= $subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += Rt
-let AddedComplexity = 30 in
-def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend),
- "memh($base+#$offset) += $addend",
- [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
- u6_1ImmPred:$offset)),
- (i32 IntRegs:$addend)),
- (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) -= Rt
-let AddedComplexity = 30 in
-def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend),
- "memh($base+#$offset) -= $subend",
- [(truncstorei16 (sub (sextloadi16 (add (i32 IntRegs:$base),
- u6_1ImmPred:$offset)),
- (i32 IntRegs:$subend)),
- (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) &= Rt
-let AddedComplexity = 30 in
-def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend),
- "memh($base+#$offset) += $andend",
- [(truncstorei16 (and (sextloadi16 (add (i32 IntRegs:$base),
- u6_1ImmPred:$offset)),
- (i32 IntRegs:$andend)),
- (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) |= Rt
-let AddedComplexity = 30 in
-def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend),
- "memh($base+#$offset) |= $orend",
- [(truncstorei16 (or (sextloadi16 (add (i32 IntRegs:$base),
- u6_1ImmPred:$offset)),
- (i32 IntRegs:$orend)),
- (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += #U5
-let AddedComplexity = 30 in
-def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$addend),
- "memh($addr) += $addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+ InstHexagon addMI, InstHexagon subMI> {
+ defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>;
+ defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>;
+}
-// memh(Rs+#u6:1) -= #U5
-let AddedComplexity = 30 in
-def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$subend),
- "memh($addr) -= $subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += Rt
-let AddedComplexity = 30 in
-def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$addend),
- "memh($addr) += $addend",
- [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
- (i32 IntRegs:$addend)), ADDRriU6_1:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) -= Rt
-let AddedComplexity = 30 in
-def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$subend),
- "memh($addr) -= $subend",
- [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr),
- (i32 IntRegs:$subend)), ADDRriU6_1:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) &= Rt
-let AddedComplexity = 30 in
-def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$andend),
- "memh($addr) &= $andend",
- [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr),
- (i32 IntRegs:$andend)), ADDRriU6_1:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) |= Rt
-let AddedComplexity = 30 in
-def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$orend),
- "memh($addr) |= $orend",
- [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr),
- (i32 IntRegs:$orend)), ADDRriU6_1:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
+ MemOPh_ADDi_V4, MemOPh_SUBi_V4>;
+ // Byte
+ defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred,
+ MemOPb_ADDi_V4, MemOPb_SUBi_V4>;
+}
+let Predicates = [HasV4T, UseMEMOP] in {
+ defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend
+
+ // Word
+ defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, MemOPw_ADDi_V4,
+ MemOPw_SUBi_V4>;
+}
//===----------------------------------------------------------------------===//
-// MEMOP: Byte
-//
-// Implemented:
-// MEMb_ADDi_indexed_V4 : memb(Rs+#u6:0)+=#U5
-// MEMb_SUBi_indexed_V4 : memb(Rs+#u6:0)-=#U5
-// MEMb_ADDr_indexed_V4 : memb(Rs+#u6:0)+=Rt
-// MEMb_SUBr_indexed_V4 : memb(Rs+#u6:0)-=Rt
-// MEMb_CLRr_indexed_V4 : memb(Rs+#u6:0)&=Rt
-// MEMb_SETr_indexed_V4 : memb(Rs+#u6:0)|=Rt
-// MEMb_ADDi_V4 : memb(Rs+#u6:0)+=#U5
-// MEMb_SUBi_V4 : memb(Rs+#u6:0)-=#U5
-// MEMb_ADDr_V4 : memb(Rs+#u6:0)+=Rt
-// MEMb_SUBr_V4 : memb(Rs+#u6:0)-=Rt
-// MEMb_CLRr_V4 : memb(Rs+#u6:0)&=Rt
-// MEMb_SETr_V4 : memb(Rs+#u6:0)|=Rt
-//
-// Not implemented:
-// MEMb_CLRi_indexed_V4 : memb(Rs+#u6:0)=clrbit(#U5)
-// MEMb_SETi_indexed_V4 : memb(Rs+#u6:0)=setbit(#U5)
-// MEMb_CLRi_V4 : memb(Rs+#u6:0)=clrbit(#U5)
-// MEMb_SETi_V4 : memb(Rs+#u6:0)=setbit(#U5)
+// multiclass to define 'Def Pats' for ALU operations on the memory.
+// Here value used for the ALU operation is a negative value.
+// mem[bh](Rs+#0) += #m5
+// mem[bh](Rs+#u6) += #m5
//===----------------------------------------------------------------------===//
-// memb(Rs+#u6:0) += #U5
-let AddedComplexity = 30 in
-def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend),
- "memb($base+#$offset) += $addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+ PatLeaf immPred, ComplexPattern addrPred,
+ SDNodeXForm xformFunc, InstHexagon MI> {
+ let AddedComplexity = 190 in
+ def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend),
+ IntRegs:$addr),
+ (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>;
-// memb(Rs+#u6:0) -= #U5
-let AddedComplexity = 30 in
-def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend),
- "memb($base+#$offset) -= $subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += Rt
-let AddedComplexity = 30 in
-def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend),
- "memb($base+#$offset) += $addend",
- [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
- u6_0ImmPred:$offset)),
- (i32 IntRegs:$addend)),
- (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) -= Rt
-let AddedComplexity = 30 in
-def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend),
- "memb($base+#$offset) -= $subend",
- [(truncstorei8 (sub (sextloadi8 (add (i32 IntRegs:$base),
- u6_0ImmPred:$offset)),
- (i32 IntRegs:$subend)),
- (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) &= Rt
-let AddedComplexity = 30 in
-def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend),
- "memb($base+#$offset) += $andend",
- [(truncstorei8 (and (sextloadi8 (add (i32 IntRegs:$base),
- u6_0ImmPred:$offset)),
- (i32 IntRegs:$andend)),
- (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) |= Rt
-let AddedComplexity = 30 in
-def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend),
- "memb($base+#$offset) |= $orend",
- [(truncstorei8 (or (sextloadi8 (add (i32 IntRegs:$base),
- u6_0ImmPred:$offset)),
- (i32 IntRegs:$orend)),
- (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += #U5
-let AddedComplexity = 30 in
-def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$addend),
- "memb($addr) += $addend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
+ let AddedComplexity = 195 in
+ def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)),
+ immPred:$subend),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>;
+}
-// memb(Rs+#u6:0) -= #U5
-let AddedComplexity = 30 in
-def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, u5Imm:$subend),
- "memb($addr) -= $subend",
- []>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += Rt
-let AddedComplexity = 30 in
-def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$addend),
- "memb($addr) += $addend",
- [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
- (i32 IntRegs:$addend)), ADDRriU6_0:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) -= Rt
-let AddedComplexity = 30 in
-def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$subend),
- "memb($addr) -= $subend",
- [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr),
- (i32 IntRegs:$subend)), ADDRriU6_0:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) &= Rt
-let AddedComplexity = 30 in
-def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$andend),
- "memb($addr) &= $andend",
- [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr),
- (i32 IntRegs:$andend)), ADDRriU6_0:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) |= Rt
-let AddedComplexity = 30 in
-def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, IntRegs:$orend),
- "memb($addr) |= $orend",
- [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr),
- (i32 IntRegs:$orend)), ADDRriU6_0:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred,
+ ADDRriU6_1, MEMOPIMM_HALF, MemOPh_SUBi_V4>;
+ // Byte
+ defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred,
+ ADDRriU6_0, MEMOPIMM_BYTE, MemOPb_SUBi_V4>;
+}
+let Predicates = [HasV4T, UseMEMOP] in {
+ defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend
+
+ // Word
+ defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred,
+ ADDRriU6_2, MEMOPIMM, MemOPw_SUBi_V4>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for bit operations on the memory.
+// mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5)
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
+ PatLeaf extPred, ComplexPattern addrPred,
+ SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> {
+
+ // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5)
+ let AddedComplexity = 250 in
+ def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+ immPred:$bitend),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>;
+
+ // mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+ let AddedComplexity = 225 in
+ def : Pat <(stOp (OpNode (ldOp addrPred:$addr), immPred:$bitend),
+ addrPred:$addr),
+ (MI IntRegs:$addr, #0, (xformFunc immPred:$bitend))>;
+}
+
+multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Byte - clrbit
+ defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred,
+ ADDRriU6_0, CLRMEMIMM_BYTE, MemOPb_CLRBITi_V4, and>;
+ // Byte - setbit
+ defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred,
+ ADDRriU6_0, SETMEMIMM_BYTE, MemOPb_SETBITi_V4, or>;
+ // Half Word - clrbit
+ defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred,
+ ADDRriU6_1, CLRMEMIMM_SHORT, MemOPh_CLRBITi_V4, and>;
+ // Half Word - setbit
+ defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred,
+ ADDRriU6_1, SETMEMIMM_SHORT, MemOPh_SETBITi_V4, or>;
+}
+
+let Predicates = [HasV4T, UseMEMOP] in {
+ // mem[bh](Rs+#0) = [clrbit|setbit](#U5)
+ // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5)
+ defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOpi_bitExtType<extloadi8, extloadi16>; // any extend
+
+ // memw(Rs+#0) = [clrbit|setbit](#U5)
+ // memw(Rs+#u6:2) = [clrbit|setbit](#U5)
+ defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2,
+ CLRMEMIMM, MemOPw_CLRBITi_V4, and>;
+ defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2,
+ SETMEMIMM, MemOPw_SETBITi_V4, or>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for ALU operations on the memory
+// where addend is a register.
+// mem[bhw](Rs+#0) [+-&|]= Rt
+// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred,
+ PatLeaf extPred, InstHexagon MI, SDNode OpNode> {
+ let AddedComplexity = 141 in
+ // mem[bhw](Rs+#0) [+-&|]= Rt
+ def : Pat <(stOp (OpNode (ldOp addrPred:$addr), (i32 IntRegs:$addend)),
+ addrPred:$addr),
+ (MI IntRegs:$addr, #0, (i32 IntRegs:$addend) )>;
+
+ // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+ let AddedComplexity = 150 in
+ def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+ (i32 IntRegs:$orend)),
+ (add IntRegs:$base, extPred:$offset)),
+ (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>;
+}
+
+multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp,
+ ComplexPattern addrPred, PatLeaf extPred,
+ InstHexagon addMI, InstHexagon subMI,
+ InstHexagon andMI, InstHexagon orMI > {
+
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>;
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>;
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>;
+ defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI, or>;
+}
+
+multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+ // Half Word
+ defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred,
+ MemOPh_ADDr_V4, MemOPh_SUBr_V4,
+ MemOPh_ANDr_V4, MemOPh_ORr_V4>;
+ // Byte
+ defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred,
+ MemOPb_ADDr_V4, MemOPb_SUBr_V4,
+ MemOPb_ANDr_V4, MemOPb_ORr_V4>;
+}
+
+// Define 'def Pats' for MemOps with register addend.
+let Predicates = [HasV4T, UseMEMOP] in {
+ // Byte, Half Word
+ defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend
+ defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
+ defm : MemOPr_ExtType<extloadi8, extloadi16>; // any extend
+ // Word
+ defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, MemOPw_ADDr_V4,
+ MemOPw_SUBr_V4, MemOPw_ANDr_V4, MemOPw_ORr_V4 >;
+}
//===----------------------------------------------------------------------===//
// XTYPE/PRED +
@@ -3730,6 +2870,108 @@ def : Pat<(store (i64 DoubleRegs:$src1),
(STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>;
}
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with GP-relative addressing mode.
+// mem[bhwd](#global)=Rt
+// if ([!]Pv[.new]) mem[bhwd](##global) = Rt
+//===----------------------------------------------------------------------===//
+multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> {
+ let BaseOpcode = BaseOp, isPredicable = 1 in
+ def NAME#_V4 : STInst2<(outs),
+ (ins globaladdress:$global, RC:$src),
+ mnemonic#"(#$global) = $src",
+ []>;
+
+ // When GP-relative instructions are predicated, their addressing mode is
+ // changed to absolute and they are always constant extended.
+ let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
+ isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred <mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred <mnemonic, RC, 1>;
+ }
+}
+
+let mayStore = 1, isNVStore = 1 in
+multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> {
+ let BaseOpcode = BaseOp, isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins u0AlwaysExt:$global, RC:$src),
+ mnemonic#"(#$global) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+ // When GP-relative instructions are predicated, their addressing mode is
+ // changed to absolute and they are always constant extended.
+ let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
+ isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>;
+ }
+}
+
+let validSubTargets = HasV4SubT, validSubTargets = HasV4SubT in {
+defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>,
+ ST_GP_nv<"memd", "STd_GP", DoubleRegs>, NewValueRel ;
+defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>,
+ ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel ;
+defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>,
+ ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel ;
+defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>,
+ ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel ;
+}
+
+// 64 bit atomic store
+def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
+ (i64 DoubleRegs:$src1)),
+ (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memd(#foo)
+let AddedComplexity = 100 in
+def : Pat <(store (i64 DoubleRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>;
+
+// 8 bit atomic store
+def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+// to "r0 = 1; memw(#foo) = r0"
+let AddedComplexity = 100 in
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>;
+
+def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// 32 bit atomic store
+def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
+ (i32 IntRegs:$src1)),
+ (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass for the load instructions with absolute addressing mode.
+//===----------------------------------------------------------------------===//
multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
let PNewValue = !if(isPredNew, "new", "") in
@@ -3795,6 +3037,107 @@ let Predicates = [HasV4T], AddedComplexity=30 in
def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
(LDriuh_abs_V4 tglobaladdr:$absaddr)>;
+//===----------------------------------------------------------------------===//
+// multiclass for load instructions with GP-relative addressing mode.
+// Rx=mem[bhwd](##global)
+// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
+//===----------------------------------------------------------------------===//
+let neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> {
+ let BaseOpcode = BaseOp in {
+ let isPredicable = 1 in
+ def NAME#_V4 : LDInst2<(outs RC:$dst),
+ (ins globaladdress:$global),
+ "$dst = "#mnemonic#"(#$global)",
+ []>;
+
+ let isExtended = 1, opExtendable = 2, isPredicated = 1 in {
+ defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>;
+ defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
+
+defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>;
+defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>;
+defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>;
+defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>;
+defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>;
+defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>;
+
+def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memw(#foo + 0)
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>;
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memub(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memuh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+
+
// Transfer global address into a register
let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in
def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
diff --git a/lib/Target/Hexagon/HexagonMCInst.h b/lib/Target/Hexagon/HexagonMCInst.h
deleted file mode 100644
index e16636ea48..0000000000
--- a/lib/Target/Hexagon/HexagonMCInst.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class extends MCInst to allow some VLIW annotation.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef HEXAGONMCINST_H
-#define HEXAGONMCINST_H
-
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/MC/MCInst.h"
-
-namespace llvm {
- class HexagonMCInst: public MCInst {
- // Packet start and end markers
- unsigned startPacket: 1, endPacket: 1;
- const MachineInstr *MachineI;
- public:
- explicit HexagonMCInst(): MCInst(),
- startPacket(0), endPacket(0) {}
-
- const MachineInstr* getMI() const { return MachineI; }
-
- void setMI(const MachineInstr *MI) { MachineI = MI; }
-
- bool isStartPacket() const { return (startPacket); }
- bool isEndPacket() const { return (endPacket); }
-
- void setStartPacket(bool yes) { startPacket = yes; }
- void setEndPacket(bool yes) { endPacket = yes; }
- };
-}
-
-#endif
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index cd3d2898d0..5e80e48b01 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -220,7 +220,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
return false;
}
- unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
+ unsigned cmpReg1, cmpOp2;
cmpReg1 = MI->getOperand(1).getReg();
if (secondReg) {
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index f947dfcdf9..d8b4e2fcb3 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -14,25 +14,26 @@
#include "HexagonRegisterInfo.h"
#include "Hexagon.h"
-#include "HexagonMachineFunctionInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -215,28 +216,41 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true);
MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
} else if (TII.isMemOp(&MI)) {
- unsigned resReg = HEXAGON_RESERVED_REG_1;
- if (!MFI.hasVarSizedObjects() &&
- TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
- MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(),
- false, false, true);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset);
- } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_rr),
- resReg).addReg(FrameReg).addReg(resReg);
- MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,
- true);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+ // use the constant extender if the instruction provides it
+ // and we are V4TOps.
+ if (Subtarget.hasV4TOps()) {
+ if (TII.isConstExtended(&MI)) {
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+ TII.immediateExtend(&MI);
+ } else {
+ llvm_unreachable("Need to implement for memops");
+ }
} else {
- BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
- TII.get(Hexagon::ADD_ri),
- resReg).addReg(FrameReg).addImm(Offset);
- MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,
- true);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+ // Only V3 and older instructions here.
+ unsigned ResReg = HEXAGON_RESERVED_REG_1;
+ if (!MFI.hasVarSizedObjects() &&
+ TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
+ MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(),
+ false, false, false);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset);
+ } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr), ResReg).addReg(FrameReg).
+ addReg(ResReg);
+ MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
+ true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg).
+ addImm(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
+ true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+ }
}
} else {
unsigned dstReg = MI.getOperand(0).getReg();
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 4bacb8fa67..07d5ce1d8a 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -29,8 +29,16 @@ EnableV3("enable-hexagon-v3", cl::Hidden,
static cl::opt<bool>
EnableMemOps(
"enable-hexagon-memops",
- cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed,
- cl::desc("Generate V4 memop instructions."));
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
+ cl::desc(
+ "Generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool>
+DisableMemOps(
+ "disable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
+ cl::desc(
+ "Do not generate V4 MEMOP in code generation for Hexagon target"));
static cl::opt<bool>
EnableIEEERndNear(
@@ -64,7 +72,10 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
- if (EnableMemOps)
+ // UseMemOps on by default unless disabled explicitly
+ if (DisableMemOps)
+ UseMemOps = false;
+ else if (EnableMemOps)
UseMemOps = true;
else
UseMemOps = false;
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index d9fef3e455..ce45c626f7 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -35,6 +35,10 @@ opt<bool> DisableHexagonMISched("disable-hexagon-misched",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Hexagon MI Scheduling"));
+static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon CFG Optimization"));
+
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
/// library. In particular, it seems that it is not possible to get
@@ -75,19 +79,20 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
TSInfo(*this),
FrameLowering(Subtarget),
InstrItins(&Subtarget.getInstrItineraryData()) {
- setMCUseCFI(false);
+ setMCUseCFI(false);
}
// addPassesForOptimizations - Allow the backend (target) to add Target
// Independent Optimization passes to the Pass Manager.
bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
-
- PM.add(createConstantPropagationPass());
- PM.add(createLoopSimplifyPass());
- PM.add(createDeadCodeEliminationPass());
- PM.add(createConstantPropagationPass());
- PM.add(createLoopUnrollPass());
- PM.add(createLoopStrengthReducePass());
+ if (getOptLevel() != CodeGenOpt::None) {
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopSimplifyPass());
+ PM.add(createDeadCodeEliminationPass());
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopUnrollPass());
+ PM.add(createLoopStrengthReducePass());
+ }
return true;
}
@@ -121,38 +126,45 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool HexagonPassConfig::addInstSelector() {
- addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+
addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel()));
- addPass(createHexagonPeephole());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonPeephole());
+
return false;
}
bool HexagonPassConfig::addPreRegAlloc() {
- if (!DisableHardwareLoops) {
+ if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
addPass(createHexagonHardwareLoops());
- }
return false;
}
bool HexagonPassConfig::addPostRegAlloc() {
- addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
+ if (!DisableHexagonCFGOpt && getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
return true;
}
bool HexagonPassConfig::addPreSched2() {
- addPass(&IfConverterID);
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&IfConverterID);
return true;
}
bool HexagonPassConfig::addPreEmitPass() {
- if (!DisableHardwareLoops) {
+ if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
addPass(createHexagonFixupHwLoops());
- }
- addPass(createHexagonNewValueJump());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonNewValueJump());
// Expand Spill code for predicate registers.
addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
@@ -161,7 +173,8 @@ bool HexagonPassConfig::addPreEmitPass() {
addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
// Create Packets.
- addPass(createHexagonPacketizer());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createHexagonPacketizer());
return false;
}
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 866beb1688..c0d86da1c0 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -149,7 +149,6 @@ namespace {
bool canReserveResourcesForConstExt(MachineInstr *MI);
void reserveResourcesForConstExt(MachineInstr* MI);
bool isNewValueInst(MachineInstr* MI);
- bool isDotNewInst(MachineInstr* MI);
};
}
@@ -2154,172 +2153,6 @@ static bool GetPredicateSense(MachineInstr* MI,
return false;
}
-bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- if (QII->isNewValueInst(MI))
- return true;
-
- switch (MI->getOpcode()) {
- case Hexagon::TFR_cdnNotPt:
- case Hexagon::TFR_cdnPt:
- case Hexagon::TFRI_cdnNotPt:
- case Hexagon::TFRI_cdnPt:
- case Hexagon::LDrid_cdnPt :
- case Hexagon::LDrid_cdnNotPt :
- case Hexagon::LDrid_indexed_cdnPt :
- case Hexagon::LDrid_indexed_cdnNotPt :
- case Hexagon::POST_LDrid_cdnPt_V4 :
- case Hexagon::POST_LDrid_cdnNotPt_V4 :
- case Hexagon::LDriw_cdnPt :
- case Hexagon::LDriw_cdnNotPt :
- case Hexagon::LDriw_indexed_cdnPt :
- case Hexagon::LDriw_indexed_cdnNotPt :
- case Hexagon::POST_LDriw_cdnPt_V4 :
- case Hexagon::POST_LDriw_cdnNotPt_V4 :
- case Hexagon::LDrih_cdnPt :
- case Hexagon::LDrih_cdnNotPt :
- case Hexagon::LDrih_indexed_cdnPt :
- case Hexagon::LDrih_indexed_cdnNotPt :
- case Hexagon::POST_LDrih_cdnPt_V4 :
- case Hexagon::POST_LDrih_cdnNotPt_V4 :
- case Hexagon::LDrib_cdnPt :
- case Hexagon::LDrib_cdnNotPt :
- case Hexagon::LDrib_indexed_cdnPt :
- case Hexagon::LDrib_indexed_cdnNotPt :
- case Hexagon::POST_LDrib_cdnPt_V4 :
- case Hexagon::POST_LDrib_cdnNotPt_V4 :
- case Hexagon::LDriuh_cdnPt :
- case Hexagon::LDriuh_cdnNotPt :
- case Hexagon::LDriuh_indexed_cdnPt :
- case Hexagon::LDriuh_indexed_cdnNotPt :
- case Hexagon::POST_LDriuh_cdnPt_V4 :
- case Hexagon::POST_LDriuh_cdnNotPt_V4 :
- case Hexagon::LDriub_cdnPt :
- case Hexagon::LDriub_cdnNotPt :
- case Hexagon::LDriub_indexed_cdnPt :
- case Hexagon::LDriub_indexed_cdnNotPt :
- case Hexagon::POST_LDriub_cdnPt_V4 :
- case Hexagon::POST_LDriub_cdnNotPt_V4 :
-
- case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
- case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
- case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
- case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
-
-// Coditional add
- case Hexagon::ADD_ri_cdnPt:
- case Hexagon::ADD_ri_cdnNotPt:
- case Hexagon::ADD_rr_cdnPt:
- case Hexagon::ADD_rr_cdnNotPt:
-
- // Conditional logical operations
- case Hexagon::XOR_rr_cdnPt :
- case Hexagon::XOR_rr_cdnNotPt :
- case Hexagon::AND_rr_cdnPt :
- case Hexagon::AND_rr_cdnNotPt :
- case Hexagon::OR_rr_cdnPt :
- case Hexagon::OR_rr_cdnNotPt :
-
- // Conditonal subtract
- case Hexagon::SUB_rr_cdnPt :
- case Hexagon::SUB_rr_cdnNotPt :
-
- // Conditional combine
- case Hexagon::COMBINE_rr_cdnPt :
- case Hexagon::COMBINE_rr_cdnNotPt :
-
- // Conditional shift operations
- case Hexagon::ASLH_cdnPt_V4:
- case Hexagon::ASLH_cdnNotPt_V4:
- case Hexagon::ASRH_cdnPt_V4:
- case Hexagon::ASRH_cdnNotPt_V4:
- case Hexagon::SXTB_cdnPt_V4:
- case Hexagon::SXTB_cdnNotPt_V4:
- case Hexagon::SXTH_cdnPt_V4:
- case Hexagon::SXTH_cdnNotPt_V4:
- case Hexagon::ZXTB_cdnPt_V4:
- case Hexagon::ZXTB_cdnNotPt_V4:
- case Hexagon::ZXTH_cdnPt_V4:
- case Hexagon::ZXTH_cdnNotPt_V4:
-
- // Conditional stores
- case Hexagon::STrib_imm_cdnPt_V4 :
- case Hexagon::STrib_imm_cdnNotPt_V4 :
- case Hexagon::STrib_cdnPt_V4 :
- case Hexagon::STrib_cdnNotPt_V4 :
- case Hexagon::STrib_indexed_cdnPt_V4 :
- case Hexagon::STrib_indexed_cdnNotPt_V4 :
- case Hexagon::POST_STbri_cdnPt_V4 :
- case Hexagon::POST_STbri_cdnNotPt_V4 :
- case Hexagon::STrib_indexed_shl_cdnPt_V4 :
- case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
-
- // Store doubleword conditionally
- case Hexagon::STrid_indexed_cdnPt_V4 :
- case Hexagon::STrid_indexed_cdnNotPt_V4 :
- case Hexagon::STrid_indexed_shl_cdnPt_V4 :
- case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
- case Hexagon::POST_STdri_cdnPt_V4 :
- case Hexagon::POST_STdri_cdnNotPt_V4 :
-
- // Store halfword conditionally
- case Hexagon::STrih_cdnPt_V4 :
- case Hexagon::STrih_cdnNotPt_V4 :
- case Hexagon::STrih_indexed_cdnPt_V4 :
- case Hexagon::STrih_indexed_cdnNotPt_V4 :
- case Hexagon::STrih_imm_cdnPt_V4 :
- case Hexagon::STrih_imm_cdnNotPt_V4 :
- case Hexagon::STrih_indexed_shl_cdnPt_V4 :
- case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
- case Hexagon::POST_SThri_cdnPt_V4 :
- case Hexagon::POST_SThri_cdnNotPt_V4 :
-
- // Store word conditionally
- case Hexagon::STriw_cdnPt_V4 :
- case Hexagon::STriw_cdnNotPt_V4 :
- case Hexagon::STriw_indexed_cdnPt_V4 :
- case Hexagon::STriw_indexed_cdnNotPt_V4 :
- case Hexagon::STriw_imm_cdnPt_V4 :
- case Hexagon::STriw_imm_cdnNotPt_V4 :
- case Hexagon::STriw_indexed_shl_cdnPt_V4 :
- case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
- case Hexagon::POST_STwri_cdnPt_V4 :
- case Hexagon::POST_STwri_cdnNotPt_V4 :
-
- case Hexagon::LDd_GP_cdnPt_V4:
- case Hexagon::LDd_GP_cdnNotPt_V4:
- case Hexagon::LDb_GP_cdnPt_V4:
- case Hexagon::LDb_GP_cdnNotPt_V4:
- case Hexagon::LDub_GP_cdnPt_V4:
- case Hexagon::LDub_GP_cdnNotPt_V4:
- case Hexagon::LDh_GP_cdnPt_V4:
- case Hexagon::LDh_GP_cdnNotPt_V4:
- case Hexagon::LDuh_GP_cdnPt_V4:
- case Hexagon::LDuh_GP_cdnNotPt_V4:
- case Hexagon::LDw_GP_cdnPt_V4:
- case Hexagon::LDw_GP_cdnNotPt_V4:
-
- case Hexagon::STd_GP_cdnPt_V4:
- case Hexagon::STd_GP_cdnNotPt_V4:
- case Hexagon::STb_GP_cdnPt_V4:
- case Hexagon::STb_GP_cdnNotPt_V4:
- case Hexagon::STh_GP_cdnPt_V4:
- case Hexagon::STh_GP_cdnNotPt_V4:
- case Hexagon::STw_GP_cdnPt_V4:
- case Hexagon::STw_GP_cdnNotPt_V4:
- return true;
- }
- return false;
-}
-
static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
const HexagonInstrInfo *QII) {
assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
@@ -2490,7 +2323,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
// sense, i.e, either both should be negated or both should be none negated.
if (( predRegNumDst != predRegNumSrc) ||
- isDotNewInst(PacketMI) != isDotNewInst(MI) ||
+ QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) ||
GetPredicateSense(MI, QII) != GetPredicateSense(PacketMI, QII)) {
return false;
}
@@ -2600,8 +2433,9 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
MachineBasicBlock::iterator &MII,
const TargetRegisterClass* RC )
{
- // already a dot new instruction
- if (isDotNewInst(MI) && !IsNewifyStore(MI))
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ // Already a dot new instruction.
+ if (QII->isDotNewInst(MI) && !IsNewifyStore(MI))
return false;
if (!isNewifiable(MI))
@@ -2616,7 +2450,6 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
else {
// Create a dot new machine instruction to see if resources can be
// allocated. If not, bail out now.
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
int NewOpcode = GetDotNewOp(MI->getOpcode());
const MCInstrDesc &desc = QII->get(NewOpcode);
DebugLoc dl;
@@ -2759,7 +2592,7 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
// !p0 is not complimentary to p0.new
return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) &&
(GetPredicateSense(MI1, QII) != GetPredicateSense(MI2, QII)) &&
- (isDotNewInst(MI1) == isDotNewInst(MI2)));
+ (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2)));
}
// initPacketizerState - Initialize packetizer flags
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index 86f75d1c2d..3deb8d1deb 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -31,6 +31,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
AscizDirective = "\t.string\t";
WeakRefDirective = "\t.weak\t";
+ SupportsDebugInformation = true;
UsesELFSectionDirectiveForBSS = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
}
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index ad495ff306..dda6e247ac 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -82,29 +82,35 @@ struct MBlazeOperand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ unsigned OffReg;
+ const MCExpr *Off;
+ };
+
+ struct FslImmOp {
+ const MCExpr *Val;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned Base;
- unsigned OffReg;
- const MCExpr *Off;
- } Mem;
-
- struct {
- const MCExpr *Val;
- } FslImm;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
+ struct FslImmOp FslImm;
};
MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index d0fd7dcec1..bd83afc1cc 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -122,7 +122,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
}
void MBlazeRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const {
// Set the stack offset where GP must be saved/loaded from.
MachineFrameInfo *MFI = MF.getFrameInfo();
MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 99a2fac95c..497f3866c9 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -55,7 +55,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index ae2e55617d..e504011dfd 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -285,8 +285,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
}
void
-MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
+MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *) const {
// Create a frame entry for the FPW register that must be saved.
if (hasFP(MF)) {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index a077dd7351..c673f59b5e 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -50,7 +50,8 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
};
} // End llvm namespace
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index ade6084752..c403f216b0 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -101,6 +101,9 @@ class MipsAsmParser : public MCTargetAsmParser {
MipsAsmParser::OperandMatchResultTy
parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned RegisterClass);
+
bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
StringRef Mnemonic);
@@ -119,6 +122,9 @@ class MipsAsmParser : public MCTargetAsmParser {
SmallVectorImpl<MCInst> &Instructions);
void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ void expandMemInst(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad,bool isImmOpnd);
bool reportParseError(StringRef ErrorMsg);
bool parseMemOffset(const MCExpr *&Res);
@@ -133,6 +139,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetReorderDirective();
bool parseSetNoReorderDirective();
+ bool parseSetAssignment();
+
bool parseDirectiveWord(unsigned Size, SMLoc L);
MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
@@ -166,6 +174,9 @@ class MipsAsmParser : public MCTargetAsmParser {
unsigned getReg(int RC,int RegNo);
int getATReg();
+
+ bool processInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
public:
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
: MCTargetAsmParser(), STI(sti), Parser(parser) {
@@ -211,25 +222,30 @@ private:
MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ struct Token {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ RegisterKind Kind;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ const MCExpr *Off;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- RegisterKind Kind;
- } Reg;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned Base;
- const MCExpr *Off;
- } Mem;
+ struct Token Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
};
SMLoc StartLoc, EndLoc;
@@ -385,6 +401,56 @@ public:
};
}
+namespace llvm {
+extern const MCInstrDesc MipsInsts[];
+}
+static const MCInstrDesc &getInstDesc(unsigned Opcode) {
+ return MipsInsts[Opcode];
+}
+
+bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ Inst.setLoc(IDLoc);
+ if (MCID.mayLoad() || MCID.mayStore()) {
+ // Check the offset of memory operand, if it is a symbol
+ // reference or immediate we may have to expand instructions
+ for (unsigned i=0;i<MCID.getNumOperands();i++) {
+ const MCOperandInfo &OpInfo = MCID.OpInfo[i];
+ if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
+ (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+ MCOperand &Op = Inst.getOperand(i);
+ if (Op.isImm()) {
+ int MemOffset = Op.getImm();
+ if (MemOffset < -32768 || MemOffset > 32767) {
+ // Offset can't exceed 16bit value
+ expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true);
+ return false;
+ }
+ } else if (Op.isExpr()) {
+ const MCExpr *Expr = Op.getExpr();
+ if (Expr->getKind() == MCExpr::SymbolRef){
+ const MCSymbolRefExpr *SR =
+ static_cast<const MCSymbolRefExpr*>(Expr);
+ if (SR->getKind() == MCSymbolRefExpr::VK_None) {
+ // Expand symbol
+ expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false);
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (needsExpansion(Inst))
+ expandInstruction(Inst, IDLoc, Instructions);
+ else
+ Instructions.push_back(Inst);
+
+ return false;
+}
+
bool MipsAsmParser::needsExpansion(MCInst &Inst) {
switch(Inst.getOpcode()) {
@@ -531,28 +597,103 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
}
}
+void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad,bool isImmOpnd) {
+ const MCSymbolRefExpr *SR;
+ MCInst TempInst;
+ unsigned ImmOffset,HiOffset,LoOffset;
+ const MCExpr *ExprOffset;
+ unsigned TmpRegNum;
+ unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID:
+ Mips::CPURegsRegClassID,
+ getATReg());
+ // 1st operand is either source or dst register
+ assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+ unsigned RegOpNum = Inst.getOperand(0).getReg();
+ // 2nd operand is base register
+ assert(Inst.getOperand(1).isReg() && "expected register operand kind");
+ unsigned BaseRegNum = Inst.getOperand(1).getReg();
+ // 3rd operand is either immediate or expression
+ if (isImmOpnd) {
+ assert(Inst.getOperand(2).isImm() && "expected immediate operand kind");
+ ImmOffset = Inst.getOperand(2).getImm();
+ LoOffset = ImmOffset & 0x0000ffff;
+ HiOffset = (ImmOffset & 0xffff0000) >> 16;
+ // If msb of LoOffset is 1(negative number) we must increment HiOffset
+ if (LoOffset & 0x8000)
+ HiOffset++;
+ }
+ else
+ ExprOffset = Inst.getOperand(2).getExpr();
+ // All instructions will have the same location
+ TempInst.setLoc(IDLoc);
+ // 1st instruction in expansion is LUi. For load instruction we can use
+ // the dst register as a temporary if base and dst are different,
+ // but for stores we must use $at
+ TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum;
+ TempInst.setOpcode(Mips::LUi);
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ if (isImmOpnd)
+ TempInst.addOperand(MCOperand::CreateImm(HiOffset));
+ else {
+ if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+ SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
+ const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::
+ Create(SR->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_HI,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+ }
+ }
+ // Add the instruction to the list
+ Instructions.push_back(TempInst);
+ // and prepare TempInst for next instruction
+ TempInst.clear();
+ // which is add temp register to base
+ TempInst.setOpcode(Mips::ADDu);
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
+ Instructions.push_back(TempInst);
+ TempInst.clear();
+ // and finaly, create original instruction with low part
+ // of offset and new base
+ TempInst.setOpcode(Inst.getOpcode());
+ TempInst.addOperand(MCOperand::CreateReg(RegOpNum));
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ if (isImmOpnd)
+ TempInst.addOperand(MCOperand::CreateImm(LoOffset));
+ else {
+ if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::
+ Create(SR->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_LO,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+ }
+ }
+ Instructions.push_back(TempInst);
+ TempInst.clear();
+}
+
bool MipsAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
+ SmallVector<MCInst, 8> Instructions;
unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MatchingInlineAsm);
switch (MatchResult) {
default: break;
case Match_Success: {
- if (needsExpansion(Inst)) {
- SmallVector<MCInst, 4> Instructions;
- expandInstruction(Inst, IDLoc, Instructions);
- for(unsigned i =0; i < Instructions.size(); i++){
- Out.EmitInstruction(Instructions[i]);
- }
- } else {
- Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
- }
+ if (processInstruction(Inst,IDLoc,Instructions))
+ return true;
+ for(unsigned i =0; i < Instructions.size(); i++)
+ Out.EmitInstruction(Instructions[i]);
return false;
}
case Match_MissingFeature:
@@ -812,6 +953,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return false;
}
case AsmToken::Identifier:
+ // Look for the existing symbol, we should check if
+ // we need to assigne the propper RegisterKind
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_None))
+ return false;
+ //else drop to expression parsing
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -883,24 +1029,25 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
// Check the type of the expression
if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) {
- // it's a constant, evaluate lo or hi value
- int Val = MCE->getValue();
+ // It's a constant, evaluate lo or hi value
if (Str == "lo") {
- Val = Val & 0xffff;
+ short Val = MCE->getValue();
+ Res = MCConstantExpr::Create(Val, getContext());
} else if (Str == "hi") {
+ int Val = MCE->getValue();
int LoSign = Val & 0x8000;
Val = (Val & 0xffff0000) >> 16;
- //lower part is treated as signed int, so if it is negative
- //we must add 1 to hi part to compensate
+ // Lower part is treated as a signed int, so if it is negative
+ // we must add 1 to the hi part to compensate
if (LoSign)
Val++;
+ Res = MCConstantExpr::Create(Val, getContext());
}
- Res = MCConstantExpr::Create(Val, getContext());
return false;
}
if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) {
- // it's a symbol, create symbolic expression from symbol
+ // It's a symbol, create symbolic expression from symbol
StringRef Symbol = MSRE->getSymbol().getName();
MCSymbolRefExpr::VariantKind VK = getVariantKind(Str);
Res = MCSymbolRefExpr::Create(Symbol,VK,getContext());
@@ -925,6 +1072,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
switch(getLexer().getKind()) {
default:
return true;
+ case AsmToken::Identifier:
case AsmToken::Integer:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -1004,6 +1152,11 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!isMips64())
return MatchOperand_NoMatch;
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs))
+ return MatchOperand_Success;
+ return MatchOperand_NoMatch;
+ }
// if the first token is not '$' we have an error
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
@@ -1018,9 +1171,52 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_NoMatch;
}
+bool MipsAsmParser::
+searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned RegisterKind) {
+
+ MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
+ if (Sym) {
+ SMLoc S = Parser.getTok().getLoc();
+ const MCExpr *Expr;
+ if (Sym->isVariable())
+ Expr = Sym->getVariableValue();
+ else
+ return false;
+ if (Expr->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const StringRef DefSymbol = Ref->getSymbol().getName();
+ if (DefSymbol.startswith("$")) {
+ // Lookup for the register with corresponding name
+ int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64());
+ if (RegNum > -1) {
+ Parser.Lex();
+ MipsOperand *op = MipsOperand::CreateReg(RegNum,S,
+ Parser.getTok().getLoc());
+ op->setRegKind((MipsOperand::RegisterKind)RegisterKind);
+ Operands.push_back(op);
+ return true;
+ }
+ }
+ } else if (Expr->getKind() == MCExpr::Constant) {
+ Parser.Lex();
+ const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
+ MipsOperand *op = MipsOperand::CreateImm(Const,S,
+ Parser.getTok().getLoc());
+ Operands.push_back(op);
+ return true;
+ }
+ }
+ return false;
+}
MipsAsmParser::OperandMatchResultTy
MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs))
+ return MatchOperand_Success;
+ return MatchOperand_NoMatch;
+ }
// if the first token is not '$' we have an error
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
@@ -1316,13 +1512,13 @@ bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
}
bool MipsAsmParser::parseSetNoAtDirective() {
- // line should look like:
+ // Line should look like:
// .set noat
// set at reg to 0
Options.setATReg(0);
// eat noat
Parser.Lex();
- // if this is not the end of the statement, report error
+ // If this is not the end of the statement, report error
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
@@ -1341,12 +1537,12 @@ bool MipsAsmParser::parseSetAtDirective() {
Parser.Lex(); // Consume the EndOfStatement
return false;
} else if (getLexer().is(AsmToken::Equal)) {
- getParser().Lex(); //eat '='
+ getParser().Lex(); // eat '='
if (getLexer().isNot(AsmToken::Dollar)) {
reportParseError("unexpected token in statement");
return false;
}
- Parser.Lex(); // eat '$'
+ Parser.Lex(); // Eat '$'
const AsmToken &Reg = Parser.getTok();
if (Reg.is(AsmToken::Identifier)) {
AtRegNo = matchCPURegisterName(Reg.getIdentifier());
@@ -1366,7 +1562,7 @@ bool MipsAsmParser::parseSetAtDirective() {
reportParseError("unexpected token in statement");
return false;
}
- getParser().Lex(); //eat reg
+ getParser().Lex(); // Eat reg
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
@@ -1382,7 +1578,7 @@ bool MipsAsmParser::parseSetAtDirective() {
bool MipsAsmParser::parseSetReorderDirective() {
Parser.Lex();
- // if this is not the end of the statement, report error
+ // If this is not the end of the statement, report error
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
@@ -1431,6 +1627,31 @@ bool MipsAsmParser::parseSetNoMacroDirective() {
Parser.Lex(); // Consume the EndOfStatement
return false;
}
+
+bool MipsAsmParser::parseSetAssignment() {
+ StringRef Name;
+ const MCExpr *Value;
+
+ if (Parser.parseIdentifier(Name))
+ reportParseError("expected identifier after .set");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return reportParseError("unexpected token in .set directive");
+ Lex(); //eat comma
+
+ if (Parser.parseExpression(Value))
+ reportParseError("expected valid expression after comma");
+
+ // check if the Name already exists as a symbol
+ MCSymbol *Sym = getContext().LookupSymbol(Name);
+ if (Sym) {
+ return reportParseError("symbol already defined");
+ }
+ Sym = getContext().GetOrCreateSymbol(Name);
+ Sym->setVariableValue(Value);
+
+ return false;
+}
bool MipsAsmParser::parseDirectiveSet() {
// get next token
@@ -1456,6 +1677,10 @@ bool MipsAsmParser::parseDirectiveSet() {
// ignore this directive for now
Parser.eatToEndOfStatement();
return false;
+ } else {
+ // it is just an identifier, look for assignment
+ parseSetAssignment();
+ return false;
}
return true;
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 58aa1be34d..cf8bb189e4 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -16,6 +16,8 @@ add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
Mips16FrameLowering.cpp
Mips16InstrInfo.cpp
+ Mips16ISelDAGToDAG.cpp
+ Mips16ISelLowering.cpp
Mips16RegisterInfo.cpp
MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
@@ -33,6 +35,8 @@ add_llvm_target(MipsCodeGen
MipsRegisterInfo.cpp
MipsSEFrameLowering.cpp
MipsSEInstrInfo.cpp
+ MipsSEISelDAGToDAG.cpp
+ MipsSEISelLowering.cpp
MipsSERegisterInfo.cpp
MipsSubtarget.cpp
MipsTargetMachine.cpp
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 025a783f93..59e49d8ddc 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -138,10 +138,10 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
@@ -484,14 +484,14 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo >= 4)
return MCDisassembler::Fail;
- unsigned Reg = getReg(Decoder, Mips::ACRegsRegClassID, RegNo);
+ unsigned Reg = getReg(Decoder, Mips::ACRegsDSPRegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 96f93a0789..e198a7c983 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -160,8 +160,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
const MCOperand &MO = MI.getOperand(OpNo);
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm()) return MO.getImm();
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm() >> 2;
+
assert(MO.isExpr() &&
"getBranchTargetOpValue expects only expressions or immediates");
@@ -179,8 +180,9 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm()) return MO.getImm();
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm()>>2;
+
assert(MO.isExpr() &&
"getJumpTargetOpValue expects only expressions or an immediate");
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 25f4ffb929..54fdb78714 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -20,7 +20,7 @@ namespace llvm {
class Mips16FrameLowering : public MipsFrameLowering {
public:
explicit Mips16FrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI) {}
+ : MipsFrameLowering(STI, 8) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
new file mode 100644
index 0000000000..00b3449300
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -0,0 +1,308 @@
+//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips16ISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = Mips::Mflo16;
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = Mips::Mfhi16;
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC =
+ (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+ V2 = RegInfo.createVirtualRegister(RC);
+
+ BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
+ BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
+ .addReg(V1).addReg(V2);
+}
+
+// Insert instructions to initialize the Mips16 SP Alias register in the
+// first MBB of the function.
+//
+void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->mips16SPAliasRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
+
+ BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
+ .addReg(Mips::SP);
+}
+
+void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+ initMips16SPAliasReg(MF);
+}
+
+/// getMips16SPAliasReg - Output the instructions required to put the
+/// SP into a Mips16 accessible aliased register.
+SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() {
+ unsigned Mips16SPAliasReg =
+ MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
+ return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
+}
+
+void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
+ SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
+ if (Parent) {
+ switch (Parent->getOpcode()) {
+ case ISD::LOAD: {
+ LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ case ISD::STORE: {
+ StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ }
+ }
+ AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
+ return;
+
+}
+
+bool Mips16DAGToDAGISel::selectAddr16(
+ SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
+ SDValue &Alias) {
+ EVT ValTy = Addr.getValueType();
+
+ Alias = CurDAG->getTargetConstant(0, ValTy);
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ return true;
+ }
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ }
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+
+ // If an indexed floating point load/store can be emitted, return false.
+ const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
+
+ if (LS &&
+ (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
+ Subtarget.hasFPIdx())
+ return false;
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE:
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2), CmpLHS;
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned MOp;
+ if (Opcode == ISD::ADDE) {
+ CmpLHS = InFlag.getValue(0);
+ MOp = Mips::AdduRxRyRz16;
+ } else {
+ CmpLHS = InFlag.getOperand(0);
+ MOp = Mips::SubuRxRyRz16;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+
+ EVT VT = LHS.getValueType();
+
+ unsigned Sltu_op = Mips::SltuRxRyRz16;
+ SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2);
+ unsigned Addu_op = Mips::AdduRxRyRz16;
+ SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT,
+ SDValue(Carry,0), RHS);
+
+ SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry,0));
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) {
+ return new Mips16DAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
new file mode 100644
index 0000000000..baa85877d9
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -0,0 +1,51 @@
+//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16ISELDAGTODAG_H
+#define MIPS16ISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class Mips16DAGToDAGISel : public MipsDAGToDAGISel {
+public:
+ explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDValue getMips16SPAliasReg();
+
+ void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+
+ void initMips16SPAliasReg(MachineFunction &MF);
+};
+
+FunctionPass *createMips16ISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
new file mode 100644
index 0000000000..23eb5375ac
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -0,0 +1,689 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-lower"
+#include "Mips16ISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <set>
+
+using namespace llvm;
+
+static cl::opt<bool>
+Mips16HardFloat("mips16-hard-float", cl::NotHidden,
+ cl::desc("MIPS: mips16 hard float enable."),
+ cl::init(false));
+
+static cl::opt<bool> DontExpandCondPseudos16(
+ "mips16-dont-expand-cond-pseudo",
+ cl::init(false),
+ cl::desc("Dont expand conditional move related "
+ "pseudos for Mips 16"),
+ cl::Hidden);
+
+namespace {
+ std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded;
+}
+
+Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ //
+ // set up as if mips32 and then revert so we can test the mechanism
+ // for switching
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+ computeRegisterProperties();
+ clearRegisterClasses();
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
+
+ if (Mips16HardFloat)
+ setMips16HardFloatLibCalls();
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMips16TargetLowering(MipsTargetMachine &TM) {
+ return new Mips16TargetLowering(TM);
+}
+
+bool
+Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ return false;
+}
+
+MachineBasicBlock *
+Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::SelBeqZ:
+ return emitSel16(Mips::BeqzRxImm16, MI, BB);
+ case Mips::SelBneZ:
+ return emitSel16(Mips::BnezRxImm16, MI, BB);
+ case Mips::SelTBteqZCmpi:
+ return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSlti:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSltiu:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBtneZCmpi:
+ return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSlti:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSltiu:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBteqZCmp:
+ return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBteqZSlt:
+ return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBteqZSltu:
+ return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::SelTBtneZCmp:
+ return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBtneZSlt:
+ return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBtneZSltu:
+ return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BteqzT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BteqzT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BtnezT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BtnezT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BtnezT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ break;
+ case Mips::SltCCRxRy16:
+ return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
+ break;
+ case Mips::SltiCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SltiuCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SltuCCRxRy16:
+ return emitFEXT_CCRX16_ins
+ (Mips::SltuRxRy16, MI, BB);
+ }
+}
+
+bool Mips16TargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ // No tail call optimization for mips16.
+ return false;
+}
+
+void Mips16TargetLowering::setMips16LibcallName
+ (RTLIB::Libcall L, const char *Name) {
+ setLibcallName(L, Name);
+ NoHelperNeeded.insert(Name);
+}
+
+void Mips16TargetLowering::setMips16HardFloatLibCalls() {
+ setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
+ setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
+ setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
+ setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
+ setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
+ setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
+ setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
+ setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
+ setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
+ setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
+ setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
+ setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
+ setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
+ setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
+ setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
+ setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
+ setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
+ setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
+ setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
+ setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
+ setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
+ setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
+ setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
+ setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
+ setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
+ setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
+}
+
+
+//
+// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
+// cleaner way to do all of this but it will have to wait until the traditional
+// gcc mechanism is completed.
+//
+// For Pic, in order for Mips16 code to call Mips32 code which according the abi
+// have either arguments or returned values placed in floating point registers,
+// we use a set of helper functions. (This includes functions which return type
+// complex which on Mips are returned in a pair of floating point registers).
+//
+// This is an encoding that we inherited from gcc.
+// In Mips traditional O32, N32 ABI, floating point numbers are passed in
+// floating point argument registers 1,2 only when the first and optionally
+// the second arguments are float (sf) or double (df).
+// For Mips16 we are only concerned with the situations where floating point
+// arguments are being passed in floating point registers by the ABI, because
+// Mips16 mode code cannot execute floating point instructions to load those
+// values and hence helper functions are needed.
+// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
+// the helper function suffixs for these are:
+// 0, 1, 5, 9, 2, 6, 10
+// this suffix can then be calculated as follows:
+// for a given argument Arg:
+// Arg1x, Arg2x = 1 : Arg is sf
+// 2 : Arg is df
+// 0: Arg is neither sf or df
+// So this stub is the string for number Arg1x + Arg2x*4.
+// However not all numbers between 0 and 10 are possible, we check anyway and
+// assert if the impossible exists.
+//
+
+unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
+ (ArgListTy &Args) const {
+ unsigned int resultNum = 0;
+ if (Args.size() >= 1) {
+ Type *t = Args[0].Ty;
+ if (t->isFloatTy()) {
+ resultNum = 1;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum = 2;
+ }
+ }
+ if (resultNum) {
+ if (Args.size() >=2) {
+ Type *t = Args[1].Ty;
+ if (t->isFloatTy()) {
+ resultNum += 4;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum += 8;
+ }
+ }
+ }
+ return resultNum;
+}
+
+//
+// prefixs are attached to stub numbers depending on the return type .
+// return type: float sf_
+// double df_
+// single complex sc_
+// double complext dc_
+// others NO PREFIX
+//
+//
+// The full name of a helper function is__mips16_call_stub +
+// return type dependent prefix + stub number
+//
+//
+// This is something that probably should be in a different source file and
+// perhaps done differently but my main purpose is to not waste runtime
+// on something that we can enumerate in the source. Another possibility is
+// to have a python script to generate these mapping tables. This will do
+// for now. There are a whole series of helper function mapping arrays, one
+// for each return type class as outlined above. There there are 11 possible
+// entries. Ones with 0 are ones which should never be selected
+//
+// All the arrays are similar except for ones which return neither
+// sf, df, sc, dc, in which only care about ones which have sf or df as a
+// first parameter.
+//
+#define P_ "__mips16_call_stub_"
+#define MAX_STUB_NUMBER 10
+#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
+#define T P "0" , T1
+#define P P_
+static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
+ {0, T1 };
+#undef P
+#define P P_ "sf_"
+static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "df_"
+static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "sc_"
+static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "dc_"
+static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#undef P_
+
+
+const char* Mips16TargetLowering::
+ getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
+ const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
+#ifndef NDEBUG
+ const unsigned int maxStubNum = 10;
+ assert(stubNum <= maxStubNum);
+ const bool validStubNum[maxStubNum+1] =
+ {true, true, true, false, false, true, true, false, false, true, true};
+ assert(validStubNum[stubNum]);
+#endif
+ const char *result;
+ if (RetTy->isFloatTy()) {
+ result = sfMips16Helper[stubNum];
+ }
+ else if (RetTy ->isDoubleTy()) {
+ result = dfMips16Helper[stubNum];
+ }
+ else if (RetTy->isStructTy()) {
+ // check if it's complex
+ if (RetTy->getNumContainedTypes() == 2) {
+ if ((RetTy->getContainedType(0)->isFloatTy()) &&
+ (RetTy->getContainedType(1)->isFloatTy())) {
+ result = scMips16Helper[stubNum];
+ }
+ else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
+ (RetTy->getContainedType(1)->isDoubleTy())) {
+ result = dcMips16Helper[stubNum];
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ if (stubNum == 0) {
+ needHelper = false;
+ return "";
+ }
+ result = vMips16Helper[stubNum];
+ }
+ needHelper = true;
+ return result;
+}
+
+void Mips16TargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ SelectionDAG &DAG = CLI.DAG;
+ const char* Mips16HelperFunction = 0;
+ bool NeedMips16Helper = false;
+
+ if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) {
+ //
+ // currently we don't have symbols tagged with the mips16 or mips32
+ // qualifier so we will assume that we don't know what kind it is.
+ // and generate the helper
+ //
+ bool LookupHelper = true;
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
+ if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) {
+ LookupHelper = false;
+ }
+ }
+ if (LookupHelper) Mips16HelperFunction =
+ getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper);
+
+ }
+
+ SDValue JumpTarget = Callee;
+
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned V0Reg = Mips::V0;
+ if (NeedMips16Helper) {
+ RegsToPass.push_front(std::make_pair(V0Reg, Callee));
+ JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
+ JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
+ } else
+ RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee));
+ }
+
+ Ops.push_back(JumpTarget);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+MachineBasicBlock *Mips16TargetLowering::
+emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
+ .addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSelT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addReg(MI->getOperand(4).getReg());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSeliT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addImm(MI->getOperand(4).getImm());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock
+ *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ unsigned regY = MI->getOperand(1).getReg();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ int64_t imm = MI->getOperand(1).getImm();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ unsigned CmpOpc;
+ if (isUInt<8>(imm))
+ CmpOpc = CmpiOpc;
+ else if (isUInt<16>(imm))
+ CmpOpc = CmpiXOpc;
+ else
+ llvm_unreachable("immediate field not usable");
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+static unsigned Mips16WhichOp8uOr16simm
+ (unsigned shortOp, unsigned longOp, int64_t Imm) {
+ if (isUInt<8>(Imm))
+ return shortOp;
+ else if (isInt<16>(Imm))
+ return longOp;
+ else
+ llvm_unreachable("immediate field not usable");
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ unsigned regY = MI->getOperand(2).getReg();
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ int64_t Imm = MI->getOperand(2).getImm();
+ unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addImm(Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
new file mode 100644
index 0000000000..b23e2a1f37
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -0,0 +1,80 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Mips16ISELLOWERING_H
+#define Mips16ISELLOWERING_H
+
+#include "MipsISelLowering.h"
+
+namespace llvm {
+ class Mips16TargetLowering : public MipsTargetLowering {
+ public:
+ explicit Mips16TargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ void setMips16LibcallName(RTLIB::Libcall, const char *Name);
+
+ void setMips16HardFloatLibCalls();
+
+ unsigned int
+ getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+
+ const char *getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const;
+ };
+}
+
+#endif // Mips16ISELLOWERING_H
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index fd3cc8f190..17dd2c0796 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -98,10 +98,10 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
void Mips16InstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+ int64_t Offset) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
@@ -110,14 +110,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = Mips::SwRxSpImmX16;
assert(Opc && "Register class not handled!");
BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
}
void Mips16InstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI, int64_t Offset) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
@@ -126,7 +125,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (Mips::CPU16RegsRegClass.hasSubClassEq(RC))
Opc = Mips::LwRxSpImmX16;
assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
.addMemOperand(MMO);
}
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index 1cb1dfe196..a77a9043bb 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -48,17 +48,19 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ virtual void storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
+
+ virtual void loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index a9e9c52716..aa51aaf465 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -15,7 +15,7 @@
// Mips Address
//
def addr16 :
- ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>;
+ ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>;
//
// Address operand
@@ -1466,14 +1466,14 @@ def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
// MipsDivRem
//
def: Mips16Pat
- <(MipsDivRem CPU16Regs:$rx, CPU16Regs:$ry),
+ <(MipsDivRem16 CPU16Regs:$rx, CPU16Regs:$ry),
(DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
//
// MipsDivRemU
//
def: Mips16Pat
- <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry),
+ <(MipsDivRemU16 CPU16Regs:$rx, CPU16Regs:$ry),
(DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
// signed a,b
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index 0ea9368949..6cca227685 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -72,6 +72,12 @@ bool Mips16RegisterInfo::saveScavengerRegister
return true;
}
+const TargetRegisterClass *
+Mips16RegisterInfo::intRegClass(unsigned Size) const {
+ assert(Size == 4);
+ return &Mips::CPU16RegsRegClass;
+}
+
void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned OpNo, int FrameIndex,
uint64_t StackSize,
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index b8f818a478..2b3d2b1a4e 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -37,6 +37,8 @@ public:
const TargetRegisterClass *RC,
unsigned Reg) const;
+ virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
+
private:
virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 5903b9e623..846a8224af 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -66,6 +66,14 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc],
defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64>;
}
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+ defm LOAD_AC128 : LoadM<"load_ac128", ACRegs128>;
+ defm STORE_AC128 : StoreM<"store_ac128", ACRegs128>;
+}
+
+def COPY_AC128 : PseudoSE<(outs ACRegs128:$dst), (ins ACRegs128:$src), []>;
+
//===----------------------------------------------------------------------===//
// Instruction definition
//===----------------------------------------------------------------------===//
@@ -179,10 +187,16 @@ def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
MULT_FM<0, 0x1c>;
def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
MULT_FM<0, 0x1d>;
-def DSDIV : Div<MipsDivRem, "ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>,
- MULT_FM<0, 0x1e>;
-def DUDIV : Div<MipsDivRemU, "ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>,
- MULT_FM<0, 0x1f>;
+def PseudoDMULT : MultDivPseudo<DMULT, ACRegs128, CPU64RegsOpnd, MipsMult,
+ IIImul>;
+def PseudoDMULTu : MultDivPseudo<DMULTu, ACRegs128, CPU64RegsOpnd, MipsMultu,
+ IIImul>;
+def DSDIV : Div<"ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1e>;
+def DUDIV : Div<"ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1f>;
+def PseudoDSDIV : MultDivPseudo<DSDIV, ACRegs128, CPU64RegsOpnd, MipsDivRem,
+ IIIdiv, 0>;
+def PseudoDUDIV : MultDivPseudo<DUDIV, ACRegs128, CPU64RegsOpnd, MipsDivRemU,
+ IIIdiv, 0>;
def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>;
def MTLO64 : MoveToLOHI<"mtlo", CPU64Regs, [LO64]>, MTLO_FM<0x13>;
@@ -306,6 +320,10 @@ def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)),
// bswap MipsPattern
def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
+// mflo/hi patterns.
+def : MipsPat<(i64 (ExtractLOHI ACRegs128:$ac, imm:$lohi_idx)),
+ (EXTRACT_SUBREG ACRegs128:$ac, imm:$lohi_idx)>;
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
@@ -332,13 +350,19 @@ def : InstAlias<"not $rt, $rs",
def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs), 0>, Requires<[HasMips64]>;
def : InstAlias<"jalr $rs", (JALR64 RA_64, CPU64Regs:$rs)>,
Requires<[HasMips64]>;
+def : InstAlias<"jal $rs", (JALR64 RA_64, CPU64Regs:$rs), 0>,
+ Requires<[HasMips64]>;
+def : InstAlias<"jal $rd,$rs", (JALR64 CPU64Regs:$rd, CPU64Regs:$rs), 0>,
+ Requires<[HasMips64]>;
def : InstAlias<"daddu $rs, $rt, $imm",
(DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
1>;
def : InstAlias<"dadd $rs, $rt, $imm",
(DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
1>;
-
+def : InstAlias<"or $rs, $rt, $imm",
+ (ORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+ 1>, Requires<[HasMips64]>;
/// Move between CPU and coprocessor registers
let DecoderNamespace = "Mips64" in {
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index 9531b91487..3c116e1264 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -20,17 +20,18 @@ def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
// Mips-specific dsp nodes
-def SDT_MipsExtr : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
-def SDT_MipsShilo : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def SDT_MipsDPA : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
+def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, untyped>]>;
+def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+ SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>;
+def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
+ SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
class MipsDSPBase<string Opc, SDTypeProfile Prof> :
- SDNode<!strconcat("MipsISD::", Opc), Prof,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+ SDNode<!strconcat("MipsISD::", Opc), Prof>;
class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> :
- SDNode<!strconcat("MipsISD::", Opc), Prof,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect]>;
+ SDNode<!strconcat("MipsISD::", Opc), Prof, [SDNPHasChain, SDNPSideEffect]>;
def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>;
def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>;
@@ -40,7 +41,7 @@ def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>;
def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>;
def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>;
-def MipsMTHLIP : MipsDSPBase<"MTHLIP", SDT_MipsShilo>;
+def MipsMTHLIP : MipsDSPSideEffectBase<"MTHLIP", SDT_MipsShilo>;
def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>;
def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>;
@@ -383,7 +384,7 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
dag OutOperandList = (outs CPURegs:$rt);
- dag InOperandList = (ins ACRegs:$ac, CPURegs:$shift_rs);
+ dag InOperandList = (ins ACRegsDSP:$ac, CPURegs:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
list<Register> Defs = [DSPCtrl];
@@ -392,46 +393,40 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
dag OutOperandList = (outs CPURegs:$rt);
- dag InOperandList = (ins ACRegs:$ac, uimm16:$shift_rs);
+ dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
list<Register> Defs = [DSPCtrl];
}
-class SHILO_R1_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
- Instruction realinst> :
- PseudoDSP<(outs), (ins simm16:$shift), [(OpNode immSExt6:$shift)]>,
- PseudoInstExpansion<(realinst AC0, simm16:$shift)> {
- list<Register> Defs = [DSPCtrl, AC0];
- list<Register> Uses = [AC0];
- InstrItinClass Itinerary = itin;
-}
-
-class SHILO_R1_DESC_BASE<string instr_asm> {
- dag OutOperandList = (outs ACRegs:$ac);
- dag InOperandList = (ins simm16:$shift);
+class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins simm16:$shift, ACRegsDSP:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode immSExt6:$shift, ACRegsDSP:$acin))];
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$acin = $ac";
}
-class SHILO_R2_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
- Instruction realinst> :
- PseudoDSP<(outs), (ins CPURegs:$rs), [(OpNode CPURegs:$rs)]>,
- PseudoInstExpansion<(realinst AC0, CPURegs:$rs)> {
- list<Register> Defs = [DSPCtrl, AC0];
- list<Register> Uses = [AC0];
- InstrItinClass Itinerary = itin;
-}
-
-class SHILO_R2_DESC_BASE<string instr_asm> {
- dag OutOperandList = (outs ACRegs:$ac);
- dag InOperandList = (ins CPURegs:$rs);
+class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$acin = $ac";
}
-class MTHLIP_DESC_BASE<string instr_asm> {
- dag OutOperandList = (outs ACRegs:$ac);
- dag InOperandList = (ins CPURegs:$rs);
+class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin);
string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
+ list<Register> Uses = [DSPCtrl];
+ string Constraints = "$acin = $ac";
}
class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -454,35 +449,37 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<Register> Defs = [DSPCtrl];
}
-class DPA_W_PH_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
- Instruction realinst> :
- PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
- [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
- PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
- list<Register> Defs = [DSPCtrl, AC0];
- list<Register> Uses = [AC0];
- InstrItinClass Itinerary = itin;
+class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin);
+ string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
+ list<Register> Defs = [DSPCtrl];
+ string Constraints = "$acin = $ac";
}
-class DPA_W_PH_DESC_BASE<string instr_asm> {
- dag OutOperandList = (outs ACRegs:$ac);
+class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
-}
-
-class MULT_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
- Instruction realinst> :
- PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
- [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
- PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
- list<Register> Defs = [DSPCtrl, AC0];
+ list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode CPURegs:$rs, CPURegs:$rt))];
InstrItinClass Itinerary = itin;
+ int AddedComplexity = 20;
+ bit isCommutable = 1;
}
-class MULT_DESC_BASE<string instr_asm> {
- dag OutOperandList = (outs ACRegs:$ac);
- dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
+class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs ACRegsDSP:$ac);
+ dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+ list<dag> Pattern = [(set ACRegsDSP:$ac,
+ (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
+ InstrItinClass Itinerary = itin;
+ int AddedComplexity = 20;
+ string Constraints = "$acin = $ac";
}
class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
@@ -717,44 +714,40 @@ class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
NoItinerary, DSPRegs, DSPRegs>,
IsCommutable;
-class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph">;
+class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph",
+ MipsMULSAQ_S_W_PH>;
-class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl">;
+class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>;
-class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr">;
+class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>;
-class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl">;
+class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>;
-class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr">;
+class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>;
// Dot product with accumulate/subtract
-class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl">;
-
-class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr">;
-
-class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl">;
-
-class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr">;
+class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>;
-class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph">;
+class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr", MipsDPAU_H_QBR>;
-class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph">;
+class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>;
-class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w">;
+class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>;
-class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w">;
+class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>;
-class MULT_DSP_DESC : MULT_DESC_BASE<"mult">;
+class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>;
-class MULTU_DSP_DESC : MULT_DESC_BASE<"multu">;
+class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>;
-class MADD_DSP_DESC : MULT_DESC_BASE<"madd">;
+class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>;
-class MADDU_DSP_DESC : MULT_DESC_BASE<"maddu">;
-
-class MSUB_DSP_DESC : MULT_DESC_BASE<"msub">;
-
-class MSUBU_DSP_DESC : MULT_DESC_BASE<"msubu">;
+class MULT_DSP_DESC : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>;
+class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>;
+class MADD_DSP_DESC : MADD_DESC_BASE<"madd", MipsMAdd, NoItinerary>;
+class MADDU_DSP_DESC : MADD_DESC_BASE<"maddu", MipsMAddu, NoItinerary>;
+class MSUB_DSP_DESC : MADD_DESC_BASE<"msub", MipsMSub, NoItinerary>;
+class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>;
// Comparison
class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
@@ -867,11 +860,11 @@ class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
NoItinerary>;
-class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo">;
+class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>;
-class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov">;
+class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>;
-class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip">;
+class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>;
class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>;
@@ -975,23 +968,25 @@ class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
IsCommutable;
// Dot product with accumulate/subtract
-class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph">;
+class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>;
-class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph">;
+class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>;
-class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph">;
+class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>;
-class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph">;
+class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph",
+ MipsDPAQX_SA_W_PH>;
-class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph">;
+class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>;
-class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph">;
+class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>;
-class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph">;
+class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>;
-class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph">;
+class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph",
+ MipsDPSQX_SA_W_PH>;
-class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph">;
+class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>;
// Precision reduce/expand
class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
@@ -1206,71 +1201,14 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC;
}
// Pseudos.
-def MULSAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSAQ_S_W_PH, NoItinerary,
- MULSAQ_S_W_PH>;
-def MAQ_S_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHL, NoItinerary,
- MAQ_S_W_PHL>;
-def MAQ_S_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHR, NoItinerary,
- MAQ_S_W_PHR>;
-def MAQ_SA_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHL, NoItinerary,
- MAQ_SA_W_PHL>;
-def MAQ_SA_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHR, NoItinerary,
- MAQ_SA_W_PHR>;
-def DPAU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBL, NoItinerary,
- DPAU_H_QBL>;
-def DPAU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBR, NoItinerary,
- DPAU_H_QBR>;
-def DPSU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBL, NoItinerary,
- DPSU_H_QBL>;
-def DPSU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBR, NoItinerary,
- DPSU_H_QBR>;
-def DPAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_S_W_PH, NoItinerary,
- DPAQ_S_W_PH>;
-def DPSQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_S_W_PH, NoItinerary,
- DPSQ_S_W_PH>;
-def DPAQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_SA_L_W, NoItinerary,
- DPAQ_SA_L_W>;
-def DPSQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_SA_L_W, NoItinerary,
- DPSQ_SA_L_W>;
-
-def MULT_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULT, NoItinerary, MULT_DSP>,
- IsCommutable;
-def MULTU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULTU, NoItinerary, MULTU_DSP>,
- IsCommutable;
-def MADD_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADD_DSP, NoItinerary, MADD_DSP>,
- IsCommutable, UseAC;
-def MADDU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADDU_DSP, NoItinerary, MADDU_DSP>,
- IsCommutable, UseAC;
-def MSUB_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUB_DSP, NoItinerary, MSUB_DSP>,
- UseAC;
-def MSUBU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUBU_DSP, NoItinerary, MSUBU_DSP>,
- UseAC;
-
-def SHILO_PSEUDO : SHILO_R1_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILO>;
-def SHILOV_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILOV>;
-def MTHLIP_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsMTHLIP, NoItinerary, MTHLIP>;
-
-let Predicates = [HasDSPR2] in {
-
-def DPA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPA_W_PH, NoItinerary, DPA_W_PH>;
-def DPS_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPS_W_PH, NoItinerary, DPS_W_PH>;
-def DPAQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_S_W_PH, NoItinerary,
- DPAQX_S_W_PH>;
-def DPAQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_SA_W_PH, NoItinerary,
- DPAQX_SA_W_PH>;
-def DPAX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAX_W_PH, NoItinerary,
- DPAX_W_PH>;
-def DPSX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSX_W_PH, NoItinerary,
- DPSX_W_PH>;
-def DPSQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_S_W_PH, NoItinerary,
- DPSQX_S_W_PH>;
-def DPSQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_SA_W_PH, NoItinerary,
- DPSQX_SA_W_PH>;
-def MULSA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSA_W_PH, NoItinerary,
- MULSA_W_PH>;
-
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+ defm LOAD_AC_DSP : LoadM<"load_ac_dsp", ACRegsDSP>;
+ defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSP>;
}
+def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>;
+
// Patterns.
class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
Pat<pattern, result>, Requires<[pred]>;
@@ -1296,10 +1234,12 @@ def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
// Extr patterns.
class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
- DSPPat<(i32 (OpNode CPURegs:$rs)), (Instr AC0, CPURegs:$rs)>;
+ DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)),
+ (Instr ACRegsDSP:$ac, CPURegs:$rs)>;
class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> :
- DSPPat<(i32 (OpNode immZExt5:$shift)), (Instr AC0, immZExt5:$shift)>;
+ DSPPat<(i32 (OpNode immZExt5:$shift, ACRegsDSP:$ac)),
+ (Instr ACRegsDSP:$ac, immZExt5:$shift)>;
def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>;
def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>;
@@ -1313,3 +1253,19 @@ def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>;
def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>;
def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>;
def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>;
+
+// mflo/hi patterns.
+let AddedComplexity = 20 in
+def : DSPPat<(i32 (ExtractLOHI ACRegsDSP:$ac, imm:$lohi_idx)),
+ (EXTRACT_SUBREG ACRegsDSP:$ac, imm:$lohi_idx)>;
+
+// Indexed load patterns.
+class IndexedLoadPat<SDPatternOperator LoadNode, Instruction Instr> :
+ DSPPat<(i32 (LoadNode (add i32:$base, i32:$index))),
+ (Instr i32:$base, i32:$index)>;
+
+let AddedComplexity = 20 in {
+ def : IndexedLoadPat<zextloadi8, LBUX>;
+ def : IndexedLoadPat<sextloadi16, LHX>;
+ def : IndexedLoadPat<load, LWX>;
+}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index e265590141..d07a595af3 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -220,9 +220,9 @@ namespace {
/// that can be moved to the delay slot. Returns true on success.
bool searchForward(MachineBasicBlock &MBB, Iter Slot) const;
- /// This function searches MBB's successor blocks for an instruction that
- /// can be moved to the delay slot and inserts clones of the instruction
- /// into the successor blocks.
+ /// This function searches one of MBB's successor blocks for an instruction
+ /// that can be moved to the delay slot and inserts clones of the
+ /// instruction into the successor's predecessor blocks.
bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const;
/// Pick a successor block of MBB. Return NULL if MBB doesn't have a
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 14268d2130..6a5f79d0df 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -26,9 +26,8 @@ protected:
const MipsSubtarget &STI;
public:
- explicit MipsFrameLowering(const MipsSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0,
- sti.hasMips64() ? 16 : 8), STI(sti) {}
+ explicit MipsFrameLowering(const MipsSubtarget &sti, unsigned Alignment)
+ : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {}
static const MipsFrameLowering *create(MipsTargetMachine &TM,
const MipsSubtarget &ST);
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 6dff548505..77b08cb11e 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -12,19 +12,19 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-isel"
+#include "MipsISelDAGToDAG.h"
+#include "Mips16ISelDAGToDAG.h"
+#include "MipsSEISelDAGToDAG.h"
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
@@ -45,270 +45,11 @@ using namespace llvm;
// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
// instructions for SelectionDAG operations.
//===----------------------------------------------------------------------===//
-namespace {
-
-class MipsDAGToDAGISel : public SelectionDAGISel {
-
- /// TM - Keep a reference to MipsTargetMachine.
- MipsTargetMachine &TM;
-
- /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const MipsSubtarget &Subtarget;
-
-public:
- explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
- SelectionDAGISel(tm),
- TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
-
- // Pass Name
- virtual const char *getPassName() const {
- return "MIPS DAG->DAG Pattern Instruction Selection";
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
-private:
- // Include the pieces autogenerated from the target description.
- #include "MipsGenDAGISel.inc"
-
- /// getTargetMachine - Return a reference to the TargetMachine, casted
- /// to the target-specific type.
- const MipsTargetMachine &getTargetMachine() {
- return static_cast<const MipsTargetMachine &>(TM);
- }
-
- /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
- /// to the target-specific type.
- const MipsInstrInfo *getInstrInfo() {
- return getTargetMachine().getInstrInfo();
- }
-
- SDNode *getGlobalBaseReg();
-
- SDValue getMips16SPAliasReg();
-
- void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg);
-
- std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
- EVT Ty, bool HasLo, bool HasHi);
-
- SDNode *Select(SDNode *N);
-
- // Complex Pattern.
- /// (reg + imm).
- bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- /// Fall back on this function if all else fails.
- bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- /// Match integer address pattern.
- bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &Alias);
-
- // getImm - Return a target constant with the specified value.
- inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
- return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
- }
-
- void ProcessFunctionAfterISel(MachineFunction &MF);
- bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
- void InitGlobalBaseReg(MachineFunction &MF);
- void InitMips16SPAliasReg(MachineFunction &MF);
-
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps);
-};
-
-}
-
-// Insert instructions to initialize the global base register in the
-// first MBB of the function. When the ABI is O32 and the relocation model is
-// PIC, the necessary instructions are emitted later to prevent optimization
-// passes from moving them.
-void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
- if (!MipsFI->globalBaseRegSet())
- return;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.begin();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
- const TargetRegisterClass *RC;
-
- if (Subtarget.isABI_N64())
- RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
- else if (Subtarget.inMips16Mode())
- RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
- else
- RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
-
- V0 = RegInfo.createVirtualRegister(RC);
- V1 = RegInfo.createVirtualRegister(RC);
- V2 = RegInfo.createVirtualRegister(RC);
-
- if (Subtarget.isABI_N64()) {
- MF.getRegInfo().addLiveIn(Mips::T9_64);
- MBB.addLiveIn(Mips::T9_64);
-
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // daddu $v1, $v0, $t9
- // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
- .addReg(Mips::T9_64);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- if (Subtarget.inMips16Mode()) {
- BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
- BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
- BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
- .addReg(V1).addReg(V2);
- return;
- }
-
- if (MF.getTarget().getRelocationModel() == Reloc::Static) {
- // Set global register to __gnu_local_gp.
- //
- // lui $v0, %hi(__gnu_local_gp)
- // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
- return;
- }
-
- MF.getRegInfo().addLiveIn(Mips::T9);
- MBB.addLiveIn(Mips::T9);
-
- if (Subtarget.isABI_N32()) {
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // addu $v1, $v0, $t9
- // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- assert(Subtarget.isABI_O32());
-
- // For O32 ABI, the following instruction sequence is emitted to initialize
- // the global base register:
- //
- // 0. lui $2, %hi(_gp_disp)
- // 1. addiu $2, $2, %lo(_gp_disp)
- // 2. addu $globalbasereg, $2, $t9
- //
- // We emit only the last instruction here.
- //
- // GNU linker requires that the first two instructions appear at the beginning
- // of a function and no instructions be inserted before or between them.
- // The two instructions are emitted during lowering to MC layer in order to
- // avoid any reordering.
- //
- // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
- // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
- // reads it.
- MF.getRegInfo().addLiveIn(Mips::V0);
- MBB.addLiveIn(Mips::V0);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
- .addReg(Mips::V0).addReg(Mips::T9);
-}
-
-// Insert instructions to initialize the Mips16 SP Alias register in the
-// first MBB of the function.
-//
-void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
- if (!MipsFI->mips16SPAliasRegSet())
- return;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.begin();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
-
- BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
- .addReg(Mips::SP);
-}
-
-
-bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
- const MachineInstr& MI) {
- unsigned DstReg = 0, ZeroReg = 0;
-
- // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
- if ((MI.getOpcode() == Mips::ADDiu) &&
- (MI.getOperand(1).getReg() == Mips::ZERO) &&
- (MI.getOperand(2).getImm() == 0)) {
- DstReg = MI.getOperand(0).getReg();
- ZeroReg = Mips::ZERO;
- } else if ((MI.getOpcode() == Mips::DADDiu) &&
- (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
- (MI.getOperand(2).getImm() == 0)) {
- DstReg = MI.getOperand(0).getReg();
- ZeroReg = Mips::ZERO_64;
- }
-
- if (!DstReg)
- return false;
-
- // Replace uses with ZeroReg.
- for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
- E = MRI->use_end(); U != E;) {
- MachineOperand &MO = U.getOperand();
- unsigned OpNo = U.getOperandNo();
- MachineInstr *MI = MO.getParent();
- ++U;
-
- // Do not replace if it is a phi's operand or is tied to def operand.
- if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
- continue;
-
- MO.setReg(ZeroReg);
- }
-
- return true;
-}
-
-void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) {
- InitGlobalBaseReg(MF);
- InitMips16SPAliasReg(MF);
-
- MachineRegisterInfo *MRI = &MF.getRegInfo();
-
- for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
- ++MFI)
- for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
- ReplaceUsesWithZeroReg(MRI, *I);
-}
bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
- ProcessFunctionAfterISel(MF);
+ processFunctionAfterISel(MF);
return Ret;
}
@@ -320,233 +61,36 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
-/// getMips16SPAliasReg - Output the instructions required to put the
-/// SP into a Mips16 accessible aliased register.
-SDValue MipsDAGToDAGISel::getMips16SPAliasReg() {
- unsigned Mips16SPAliasReg =
- MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
- return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
-}
-
/// ComplexPattern used on MipsInstrInfo
/// Used on Mips Load/Store instructions
bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- EVT ValTy = Addr.getValueType();
-
- // if Address is FI, get the TargetFrameIndex.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- Offset = CurDAG->getTargetConstant(0, ValTy);
- return true;
- }
-
- // on PIC code Load GA
- if (Addr.getOpcode() == MipsISD::Wrapper) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
- }
-
- if (TM.getRelocationModel() != Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress))
- return false;
- }
-
- // Addresses of the form FI+const or FI|const
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<16>(CN->getSExtValue())) {
-
- // If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
- (Addr.getOperand(0)))
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- else
- Base = Addr.getOperand(0);
-
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
- return true;
- }
- }
-
- // Operand is a result from an ADD.
- if (Addr.getOpcode() == ISD::ADD) {
- // When loading from constant pools, load the lower address part in
- // the instruction itself. Example, instead of:
- // lui $2, %hi($CPI1_0)
- // addiu $2, $2, %lo($CPI1_0)
- // lwc1 $f0, 0($2)
- // Generate:
- // lui $2, %hi($CPI1_0)
- // lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
- Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
- SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
- if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
- isa<JumpTableSDNode>(Opnd0)) {
- Base = Addr.getOperand(0);
- Offset = Opnd0;
- return true;
- }
- }
- }
-
+ llvm_unreachable("Unimplemented function.");
return false;
}
bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
- return true;
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- return selectAddrRegImm(Addr, Base, Offset) ||
- selectAddrDefault(Addr, Base, Offset);
-}
-
-void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
- SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
- if (Parent) {
- switch (Parent->getOpcode()) {
- case ISD::LOAD: {
- LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
- switch (SD->getMemoryVT().getSizeInBits()) {
- case 8:
- case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
- return;
- }
- break;
- }
- case ISD::STORE: {
- StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
- switch (SD->getMemoryVT().getSizeInBits()) {
- case 8:
- case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
- return;
- }
- break;
- }
- }
- }
- AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
- return;
-
-}
-bool MipsDAGToDAGISel::SelectAddr16(
- SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
- SDValue &Alias) {
- EVT ValTy = Addr.getValueType();
-
- Alias = CurDAG->getTargetConstant(0, ValTy);
-
- // if Address is FI, get the TargetFrameIndex.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- Offset = CurDAG->getTargetConstant(0, ValTy);
- getMips16SPRefReg(Parent, Alias);
- return true;
- }
- // on PIC code Load GA
- if (Addr.getOpcode() == MipsISD::Wrapper) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
- }
- if (TM.getRelocationModel() != Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress))
- return false;
- }
- // Addresses of the form FI+const or FI|const
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<16>(CN->getSExtValue())) {
-
- // If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
- (Addr.getOperand(0))) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- getMips16SPRefReg(Parent, Alias);
- }
- else
- Base = Addr.getOperand(0);
-
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
- return true;
- }
- }
- // Operand is a result from an ADD.
- if (Addr.getOpcode() == ISD::ADD) {
- // When loading from constant pools, load the lower address part in
- // the instruction itself. Example, instead of:
- // lui $2, %hi($CPI1_0)
- // addiu $2, $2, %lo($CPI1_0)
- // lwc1 $f0, 0($2)
- // Generate:
- // lui $2, %hi($CPI1_0)
- // lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
- Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
- SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
- if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
- isa<JumpTableSDNode>(Opnd0)) {
- Base = Addr.getOperand(0);
- Offset = Opnd0;
- return true;
- }
- }
-
- // If an indexed floating point load/store can be emitted, return false.
- const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
-
- if (LS &&
- (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
- Subtarget.hasFPIdx())
- return false;
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, ValTy);
- return true;
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-/// Select multiply instructions.
-std::pair<SDNode*, SDNode*>
-MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty,
- bool HasLo, bool HasHi) {
- SDNode *Lo = 0, *Hi = 0;
- SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0),
- N->getOperand(1));
- SDValue InFlag = SDValue(Mul, 0);
-
- if (HasLo) {
- unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 :
- (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
- Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag);
- InFlag = SDValue(Lo, 1);
- }
- if (HasHi) {
- unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 :
- (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
- Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag);
- }
- return std::make_pair(Lo, Hi);
+bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias) {
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
- DebugLoc dl = Node->getDebugLoc();
// Dump information about the Node being selected
DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
@@ -557,167 +101,19 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
return NULL;
}
- ///
- // Instruction Selection not handled by the auto-generated
- // tablegen selection should be handled here.
- ///
- EVT NodeTy = Node->getValueType(0);
- unsigned MultOpc;
+ // See if subclasses can handle this node.
+ std::pair<bool, SDNode*> Ret = selectNode(Node);
+
+ if (Ret.first)
+ return Ret.second;
switch(Opcode) {
default: break;
- case ISD::SUBE:
- case ISD::ADDE: {
- bool inMips16Mode = Subtarget.inMips16Mode();
- SDValue InFlag = Node->getOperand(2), CmpLHS;
- unsigned Opc = InFlag.getOpcode(); (void)Opc;
- assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
- (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
- "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
-
- unsigned MOp;
- if (Opcode == ISD::ADDE) {
- CmpLHS = InFlag.getValue(0);
- if (inMips16Mode)
- MOp = Mips::AdduRxRyRz16;
- else
- MOp = Mips::ADDu;
- } else {
- CmpLHS = InFlag.getOperand(0);
- if (inMips16Mode)
- MOp = Mips::SubuRxRyRz16;
- else
- MOp = Mips::SUBu;
- }
-
- SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
-
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
-
- EVT VT = LHS.getValueType();
-
- unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu;
- SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2);
- unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu;
- SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT,
- SDValue(Carry,0), RHS);
-
- return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
- LHS, SDValue(AddCarry,0));
- }
-
- /// Mul with two results
- case ISD::SMUL_LOHI:
- case ISD::UMUL_LOHI: {
- if (NodeTy == MVT::i32) {
- if (Subtarget.inMips16Mode())
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 :
- Mips::MultRxRy16);
- else
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
- }
- else
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
-
- std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy,
- true, true);
-
- if (!SDValue(Node, 0).use_empty())
- ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
-
- if (!SDValue(Node, 1).use_empty())
- ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
-
- return NULL;
- }
-
- /// Special Muls
- case ISD::MUL: {
- // Mips32 has a 32-bit three operand mul instruction.
- if (Subtarget.hasMips32() && NodeTy == MVT::i32)
- break;
- return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT,
- dl, NodeTy, true, false).first;
- }
- case ISD::MULHS:
- case ISD::MULHU: {
- if (NodeTy == MVT::i32) {
- if (Subtarget.inMips16Mode())
- MultOpc = (Opcode == ISD::MULHU ?
- Mips::MultuRxRy16 : Mips::MultRxRy16);
- else
- MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
- }
- else
- MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
-
- return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second;
- }
-
// Get target GOT address.
case ISD::GLOBAL_OFFSET_TABLE:
return getGlobalBaseReg();
- case ISD::ConstantFP: {
- ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
- if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
- if (Subtarget.hasMips64()) {
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Mips::ZERO_64, MVT::i64);
- return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero);
- }
-
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Mips::ZERO, MVT::i32);
- return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
- Zero);
- }
- break;
- }
-
- case ISD::Constant: {
- const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
- unsigned Size = CN->getValueSizeInBits(0);
-
- if (Size == 32)
- break;
-
- MipsAnalyzeImmediate AnalyzeImm;
- int64_t Imm = CN->getSExtValue();
-
- const MipsAnalyzeImmediate::InstSeq &Seq =
- AnalyzeImm.Analyze(Imm, Size, false);
-
- MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
- DebugLoc DL = CN->getDebugLoc();
- SDNode *RegOpnd;
- SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
- MVT::i64);
-
- // The first instruction can be a LUi which is different from other
- // instructions (ADDiu, ORI and SLL) in that it does not have a register
- // operand.
- if (Inst->Opc == Mips::LUi64)
- RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
- else
- RegOpnd =
- CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
- CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
- ImmOpnd);
-
- // The remaining instructions in the sequence are handled here.
- for (++Inst; Inst != Seq.end(); ++Inst) {
- ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
- MVT::i64);
- RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
- SDValue(RegOpnd, 0), ImmOpnd);
- }
-
- return RegOpnd;
- }
-
#ifndef NDEBUG
case ISD::LOAD:
case ISD::STORE:
@@ -726,31 +122,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
"Unexpected unaligned loads/stores.");
break;
#endif
-
- case MipsISD::ThreadPointer: {
- EVT PtrVT = TLI.getPointerTy();
- unsigned RdhwrOpc, SrcReg, DestReg;
-
- if (PtrVT == MVT::i32) {
- RdhwrOpc = Mips::RDHWR;
- SrcReg = Mips::HWR29;
- DestReg = Mips::V1;
- } else {
- RdhwrOpc = Mips::RDHWR64;
- SrcReg = Mips::HWR29_64;
- DestReg = Mips::V1_64;
- }
-
- SDNode *Rdhwr =
- CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
- Node->getValueType(0),
- CurDAG->getRegister(SrcReg, PtrVT));
- SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
- SDValue(Rdhwr, 0));
- SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT);
- ReplaceUses(SDValue(Node, 0), ResNode);
- return ResNode.getNode();
- }
}
// Select the default instruction
@@ -776,5 +147,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
/// createMipsISelDag - This pass converts a legalized DAG into a
/// MIPS-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
- return new MipsDAGToDAGISel(TM);
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16ISelDag(TM);
+
+ return llvm::createMipsSEISelDag(TM);
}
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
new file mode 100644
index 0000000000..cf0f9c58aa
--- /dev/null
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -0,0 +1,93 @@
+//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSISELDAGTODAG_H
+#define MIPSISELDAGTODAG_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+class MipsDAGToDAGISel : public SelectionDAGISel {
+public:
+ explicit MipsDAGToDAGISel(MipsTargetMachine &TM)
+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+protected:
+ SDNode *getGlobalBaseReg();
+
+ /// Keep a pointer to the MipsSubtarget around so that we can make the right
+ /// decision when generating code for different targets.
+ const MipsSubtarget &Subtarget;
+
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MipsGenDAGISel.inc"
+
+ // Complex Pattern.
+ /// (reg + imm).
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Fall back on this function if all else fails.
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Match integer address pattern.
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual SDNode *Select(SDNode *N);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0;
+
+ // getImm - Return a target constant with the specified value.
+ inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+ }
+
+ virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
+
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+};
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index c452dee8d0..e2219f257e 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-lower"
-#include <set>
#include "MipsISelLowering.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
@@ -42,26 +41,9 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
-EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
- cl::desc("MIPS: Enable tail calls."), cl::init(false));
-
-static cl::opt<bool>
LargeGOT("mxgot", cl::Hidden,
cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false));
-static cl::opt<bool>
-Mips16HardFloat("mips16-hard-float", cl::NotHidden,
- cl::desc("MIPS: mips16 hard float enable."),
- cl::init(false));
-
-static cl::opt<bool> DontExpandCondPseudos16(
- "mips16-dont-expand-cond-pseudo",
- cl::init(false),
- cl::desc("Dont expand conditional move related "
- "pseudos for Mips 16"),
- cl::Hidden);
-
-
static const uint16_t O32IntRegs[4] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
@@ -88,7 +70,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
return true;
}
-static SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) {
+SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
}
@@ -123,7 +105,8 @@ static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
}
-static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
+SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG,
+ bool HasMips64) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
@@ -137,7 +120,8 @@ static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
}
-static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
+SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG,
+ unsigned Flag) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
@@ -146,8 +130,9 @@ static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
MachinePointerInfo::getGOT(), false, false, false, 0);
}
-static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
- unsigned HiFlag, unsigned LoFlag) {
+SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag,
+ unsigned LoFlag) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag));
@@ -173,12 +158,18 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T";
case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F";
case MipsISD::FPRound: return "MipsISD::FPRound";
+ case MipsISD::ExtractLOHI: return "MipsISD::ExtractLOHI";
+ case MipsISD::InsertLOHI: return "MipsISD::InsertLOHI";
+ case MipsISD::Mult: return "MipsISD::Mult";
+ case MipsISD::Multu: return "MipsISD::Multu";
case MipsISD::MAdd: return "MipsISD::MAdd";
case MipsISD::MAddu: return "MipsISD::MAddu";
case MipsISD::MSub: return "MipsISD::MSub";
case MipsISD::MSubu: return "MipsISD::MSubu";
case MipsISD::DivRem: return "MipsISD::DivRem";
case MipsISD::DivRemU: return "MipsISD::DivRemU";
+ case MipsISD::DivRem16: return "MipsISD::DivRem16";
+ case MipsISD::DivRemU16: return "MipsISD::DivRemU16";
case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
case MipsISD::Wrapper: return "MipsISD::Wrapper";
@@ -211,110 +202,17 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-namespace {
- struct LTStr {
- bool operator()(const char *S1, const char *S2) const
- {
- return strcmp(S1, S2) < 0;
- }
- };
-
- std::set<const char*, LTStr> NoHelperNeeded;
-}
-
-void MipsTargetLowering::setMips16LibcallName
- (RTLIB::Libcall L, const char *Name) {
- setLibcallName(L, Name);
- NoHelperNeeded.insert(Name);
-}
-
-void MipsTargetLowering::setMips16HardFloatLibCalls() {
- setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
- setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
- setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
- setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
- setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
- setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
- setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
- setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
- setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
- setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
- setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
- setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
- setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
- setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
- setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
- setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
- setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
- setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
- setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
- setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
- setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
- setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
- setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
- setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
- setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
- setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
- setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
- setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
- setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
- setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
- setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
- setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
-}
-
MipsTargetLowering::
MipsTargetLowering(MipsTargetMachine &TM)
: TargetLowering(TM, new MipsTargetObjectFile()),
Subtarget(&TM.getSubtarget<MipsSubtarget>()),
HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()),
IsO32(Subtarget->isABI_O32()) {
-
// Mips does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- // Set up the register classes
- addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
-
- if (HasMips64)
- addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
-
- if (Subtarget->inMips16Mode()) {
- addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
- if (Mips16HardFloat)
- setMips16HardFloatLibCalls();
- }
-
- if (Subtarget->hasDSP()) {
- MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
-
- for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
- addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
-
- // Expand all builtin opcodes.
- for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
- setOperationAction(Opc, VecTys[i], Expand);
-
- setOperationAction(ISD::LOAD, VecTys[i], Legal);
- setOperationAction(ISD::STORE, VecTys[i], Legal);
- setOperationAction(ISD::BITCAST, VecTys[i], Legal);
- }
- }
-
- if (!TM.Options.UseSoftFloat) {
- addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
-
- // When dealing with single precision only, use libcalls
- if (!Subtarget->isSingleFloat()) {
- if (HasMips64)
- addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
- else
- addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
- }
- }
-
// Load extented operations for i1 types must be promoted
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
@@ -348,18 +246,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
- if (Subtarget->inMips16Mode()) {
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
- }
- else {
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
- }
- if (!Subtarget->inMips16Mode()) {
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::i32, Custom);
- }
if (!TM.Options.NoNaNsFPMath) {
setOperationAction(ISD::FABS, MVT::f32, Custom);
@@ -472,21 +358,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
- if (Subtarget->inMips16Mode()) {
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
- }
-
setInsertFencesForAtomic(true);
if (!Subtarget->hasSEInReg()) {
@@ -511,8 +382,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setTruncStoreAction(MVT::i64, MVT::i32, Custom);
}
- setTargetDAGCombine(ISD::ADDE);
- setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::SDIVREM);
setTargetDAGCombine(ISD::UDIVREM);
setTargetDAGCombine(ISD::SELECT);
@@ -523,7 +392,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setMinFunctionAlignment(HasMips64 ? 3 : 2);
setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
- computeRegisterProperties();
setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
@@ -531,22 +399,11 @@ MipsTargetLowering(MipsTargetMachine &TM)
MaxStoresPerMemcpy = 16;
}
-bool
-MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
- MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
-
- if (Subtarget->inMips16Mode())
- return false;
+const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16TargetLowering(TM);
- switch (SVT) {
- case MVT::i64:
- case MVT::i32:
- if (Fast)
- *Fast = true;
- return true;
- default:
- return false;
- }
+ return llvm::createMipsSETargetLowering(TM);
}
EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
@@ -555,178 +412,6 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
return VT.changeVectorElementTypeToInteger();
}
-// selectMADD -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-// (addc multLo, Lo0), (adde multHi, Hi0),
-// where,
-// multHi/Lo: product of multiplication
-// Lo0: initial value of Lo register
-// Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
- // ADDENode's second operand must be a flag output of an ADDC node in order
- // for the matching to be successful.
- SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
-
- if (ADDCNode->getOpcode() != ISD::ADDC)
- return false;
-
- SDValue MultHi = ADDENode->getOperand(0);
- SDValue MultLo = ADDCNode->getOperand(0);
- SDNode *MultNode = MultHi.getNode();
- unsigned MultOpc = MultHi.getOpcode();
-
- // MultHi and MultLo must be generated by the same node,
- if (MultLo.getNode() != MultNode)
- return false;
-
- // and it must be a multiplication.
- if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
- return false;
-
- // MultLo amd MultHi must be the first and second output of MultNode
- // respectively.
- if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
- return false;
-
- // Transform this to a MADD only if ADDENode and ADDCNode are the only users
- // of the values of MultNode, in which case MultNode will be removed in later
- // phases.
- // If there exist users other than ADDENode or ADDCNode, this function returns
- // here, which will result in MultNode being mapped to a single MULT
- // instruction node rather than a pair of MULT and MADD instructions being
- // produced.
- if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
- return false;
-
- SDValue Chain = CurDAG->getEntryNode();
- DebugLoc DL = ADDENode->getDebugLoc();
-
- // create MipsMAdd(u) node
- MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
-
- SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Glue,
- MultNode->getOperand(0),// Factor 0
- MultNode->getOperand(1),// Factor 1
- ADDCNode->getOperand(1),// Lo0
- ADDENode->getOperand(1));// Hi0
-
- // create CopyFromReg nodes
- SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32,
- MAdd);
- SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL,
- Mips::HI, MVT::i32,
- CopyFromLo.getValue(2));
-
- // replace uses of adde and addc here
- if (!SDValue(ADDCNode, 0).use_empty())
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo);
-
- if (!SDValue(ADDENode, 0).use_empty())
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi);
-
- return true;
-}
-
-// selectMSUB -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-// (addc Lo0, multLo), (sube Hi0, multHi),
-// where,
-// multHi/Lo: product of multiplication
-// Lo0: initial value of Lo register
-// Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
- // SUBENode's second operand must be a flag output of an SUBC node in order
- // for the matching to be successful.
- SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
-
- if (SUBCNode->getOpcode() != ISD::SUBC)
- return false;
-
- SDValue MultHi = SUBENode->getOperand(1);
- SDValue MultLo = SUBCNode->getOperand(1);
- SDNode *MultNode = MultHi.getNode();
- unsigned MultOpc = MultHi.getOpcode();
-
- // MultHi and MultLo must be generated by the same node,
- if (MultLo.getNode() != MultNode)
- return false;
-
- // and it must be a multiplication.
- if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
- return false;
-
- // MultLo amd MultHi must be the first and second output of MultNode
- // respectively.
- if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
- return false;
-
- // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
- // of the values of MultNode, in which case MultNode will be removed in later
- // phases.
- // If there exist users other than SUBENode or SUBCNode, this function returns
- // here, which will result in MultNode being mapped to a single MULT
- // instruction node rather than a pair of MULT and MSUB instructions being
- // produced.
- if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
- return false;
-
- SDValue Chain = CurDAG->getEntryNode();
- DebugLoc DL = SUBENode->getDebugLoc();
-
- // create MipsSub(u) node
- MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
-
- SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
- MultNode->getOperand(0),// Factor 0
- MultNode->getOperand(1),// Factor 1
- SUBCNode->getOperand(0),// Lo0
- SUBENode->getOperand(0));// Hi0
-
- // create CopyFromReg nodes
- SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32,
- MSub);
- SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL,
- Mips::HI, MVT::i32,
- CopyFromLo.getValue(2));
-
- // replace uses of sube and subc here
- if (!SDValue(SUBCNode, 0).use_empty())
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo);
-
- if (!SDValue(SUBENode, 0).use_empty())
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi);
-
- return true;
-}
-
-static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
- if (DCI.isBeforeLegalize())
- return SDValue();
-
- if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
- selectMADD(N, &DAG))
- return SDValue(N, 0);
-
- return SDValue();
-}
-
-static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget *Subtarget) {
- if (DCI.isBeforeLegalize())
- return SDValue();
-
- if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
- selectMSUB(N, &DAG))
- return SDValue(N, 0);
-
- return SDValue();
-}
-
static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
@@ -736,8 +421,8 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
EVT Ty = N->getValueType(0);
unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
- unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
- MipsISD::DivRemU;
+ unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 :
+ MipsISD::DivRemU16;
DebugLoc DL = N->getDebugLoc();
SDValue DivRem = DAG.getNode(Opc, DL, MVT::Glue,
@@ -791,8 +476,9 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
}
-// Returns true if condition code has to be inverted.
-static bool invertFPCondCode(Mips::CondCode CC) {
+/// This function returns true if the floating point conditional branches and
+/// conditional moves which use condition code CC should be inverted.
+static bool invertFPCondCodeUser(Mips::CondCode CC) {
if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
return false;
@@ -828,9 +514,8 @@ static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) {
// Creates and returns a CMovFPT/F node.
static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
SDValue False, DebugLoc DL) {
- bool invert = invertFPCondCode((Mips::CondCode)
- cast<ConstantSDNode>(Cond.getOperand(2))
- ->getSExtValue());
+ ConstantSDNode *CC = cast<ConstantSDNode>(Cond.getOperand(2));
+ bool invert = invertFPCondCodeUser((Mips::CondCode)CC->getSExtValue());
return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL,
True.getValueType(), True, False, Cond);
@@ -997,10 +682,6 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
switch (Opc) {
default: break;
- case ISD::ADDE:
- return performADDECombine(N, DAG, DCI, Subtarget);
- case ISD::SUBE:
- return performSUBECombine(N, DAG, DCI, Subtarget);
case ISD::SDIVREM:
case ISD::UDIVREM:
return performDivRemCombine(N, DAG, DCI, Subtarget);
@@ -1042,32 +723,32 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
switch (Op.getOpcode())
{
- case ISD::BR_JT: return lowerBR_JT(Op, DAG);
- case ISD::BRCOND: return lowerBRCOND(Op, DAG);
- case ISD::ConstantPool: return lowerConstantPool(Op, DAG);
- case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG);
- case ISD::BlockAddress: return lowerBlockAddress(Op, DAG);
- case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG);
- case ISD::JumpTable: return lowerJumpTable(Op, DAG);
- case ISD::SELECT: return lowerSELECT(Op, DAG);
- case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG);
- case ISD::SETCC: return lowerSETCC(Op, DAG);
- case ISD::VASTART: return lowerVASTART(Op, DAG);
- case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG);
- case ISD::FABS: return lowerFABS(Op, DAG);
- case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG);
- case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG);
- case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG);
- case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG);
- case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG);
- case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG);
- case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true);
- case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false);
- case ISD::LOAD: return lowerLOAD(Op, DAG);
- case ISD::STORE: return lowerSTORE(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
- case ISD::ADD: return lowerADD(Op, DAG);
+ case ISD::BR_JT: return lowerBR_JT(Op, DAG);
+ case ISD::BRCOND: return lowerBRCOND(Op, DAG);
+ case ISD::ConstantPool: return lowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG);
+ case ISD::BlockAddress: return lowerBlockAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return lowerJumpTable(Op, DAG);
+ case ISD::SELECT: return lowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG);
+ case ISD::SETCC: return lowerSETCC(Op, DAG);
+ case ISD::VASTART: return lowerVASTART(Op, DAG);
+ case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG);
+ case ISD::FABS: return lowerFABS(Op, DAG);
+ case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG);
+ case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG);
+ case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG);
+ case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG);
+ case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG);
+ case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG);
+ case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true);
+ case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false);
+ case ISD::LOAD: return lowerLOAD(Op, DAG);
+ case ISD::STORE: return lowerSTORE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::ADD: return lowerADD(Op, DAG);
}
return SDValue();
}
@@ -1087,358 +768,6 @@ addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
return VReg;
}
-// Get fp branch code (not opcode) from condition code.
-static Mips::FPBranchCode getFPBranchCodeFromCond(Mips::CondCode CC) {
- if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
- return Mips::BRANCH_T;
-
- assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) &&
- "Invalid CondCode.");
-
- return Mips::BRANCH_F;
-}
-
-MachineBasicBlock *
-MipsTargetLowering::emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
- // $bb:
- // bposge32_pseudo $vr0
- // =>
- // $bb:
- // bposge32 $tbb
- // $fbb:
- // li $vr2, 0
- // b $sink
- // $tbb:
- // li $vr1, 1
- // $sink:
- // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
-
- MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
- DebugLoc DL = MI->getDebugLoc();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, FBB);
- F->insert(It, TBB);
- F->insert(It, Sink);
-
- // Transfer the remainder of BB and its successor edges to Sink.
- Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- Sink->transferSuccessorsAndUpdatePHIs(BB);
-
- // Add successors.
- BB->addSuccessor(FBB);
- BB->addSuccessor(TBB);
- FBB->addSuccessor(Sink);
- TBB->addSuccessor(Sink);
-
- // Insert the real bposge32 instruction to $BB.
- BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
-
- // Fill $FBB.
- unsigned VR2 = RegInfo.createVirtualRegister(RC);
- BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
- .addReg(Mips::ZERO).addImm(0);
- BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
-
- // Fill $TBB.
- unsigned VR1 = RegInfo.createVirtualRegister(RC);
- BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
- .addReg(Mips::ZERO).addImm(1);
-
- // Insert phi function to $Sink.
- BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
- MI->getOperand(0).getReg())
- .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return Sink;
-}
-
-MachineBasicBlock *MipsTargetLowering::emitSel16(unsigned Opc, MachineInstr *MI,
- MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
- .addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), DL,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-MachineBasicBlock *MipsTargetLowering::emitSelT16
- (unsigned Opc1, unsigned Opc2,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
- .addReg(MI->getOperand(4).getReg());
- BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), DL,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
-
-
-MachineBasicBlock *MipsTargetLowering::emitSeliT16
- (unsigned Opc1, unsigned Opc2,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
- .addImm(MI->getOperand(4).getImm());
- BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), DL,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
-
-
-MachineBasicBlock
- *MipsTargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
- MachineInstr *MI,
- MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned regX = MI->getOperand(0).getReg();
- unsigned regY = MI->getOperand(1).getReg();
- MachineBasicBlock *target = MI->getOperand(2).getMBB();
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-
-MachineBasicBlock *MipsTargetLowering::emitFEXT_T8I8I16_ins(
- unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned regX = MI->getOperand(0).getReg();
- int64_t imm = MI->getOperand(1).getImm();
- MachineBasicBlock *target = MI->getOperand(2).getMBB();
- unsigned CmpOpc;
- if (isUInt<8>(imm))
- CmpOpc = CmpiOpc;
- else if (isUInt<16>(imm))
- CmpOpc = CmpiXOpc;
- else
- llvm_unreachable("immediate field not usable");
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-
-static unsigned Mips16WhichOp8uOr16simm
- (unsigned shortOp, unsigned longOp, int64_t Imm) {
- if (isUInt<8>(Imm))
- return shortOp;
- else if (isInt<16>(Imm))
- return longOp;
- else
- llvm_unreachable("immediate field not usable");
-}
-
-MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRX16_ins(
- unsigned SltOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned CC = MI->getOperand(0).getReg();
- unsigned regX = MI->getOperand(1).getReg();
- unsigned regY = MI->getOperand(2).getReg();
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(SltOpc)).addReg(regX).addReg(regY);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRXI16_ins(
- unsigned SltiOpc, unsigned SltiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB )const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned CC = MI->getOperand(0).getReg();
- unsigned regX = MI->getOperand(1).getReg();
- int64_t Imm = MI->getOperand(2).getImm();
- unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(SltOpc)).addReg(regX).addImm(Imm);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
MachineBasicBlock *
MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -1548,77 +877,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case Mips::ATOMIC_CMP_SWAP_I64:
case Mips::ATOMIC_CMP_SWAP_I64_P8:
return emitAtomicCmpSwap(MI, BB, 8);
- case Mips::BPOSGE32_PSEUDO:
- return emitBPOSGE32(MI, BB);
- case Mips::SelBeqZ:
- return emitSel16(Mips::BeqzRxImm16, MI, BB);
- case Mips::SelBneZ:
- return emitSel16(Mips::BnezRxImm16, MI, BB);
- case Mips::SelTBteqZCmpi:
- return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::SelTBteqZSlti:
- return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SelTBteqZSltiu:
- return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SelTBtneZCmpi:
- return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::SelTBtneZSlti:
- return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SelTBtneZSltiu:
- return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SelTBteqZCmp:
- return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
- case Mips::SelTBteqZSlt:
- return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
- case Mips::SelTBteqZSltu:
- return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
- case Mips::SelTBtneZCmp:
- return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
- case Mips::SelTBtneZSlt:
- return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
- case Mips::SelTBtneZSltu:
- return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BteqzT8CmpX16:
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
- case Mips::BteqzT8SltX16:
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
- case Mips::BteqzT8SltuX16:
- // TBD: figure out a way to get this or remove the instruction
- // altogether.
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BtnezT8CmpX16:
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
- case Mips::BtnezT8SltX16:
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
- case Mips::BtnezT8SltuX16:
- // TBD: figure out a way to get this or remove the instruction
- // altogether.
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- break;
- case Mips::SltCCRxRy16:
- return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
- break;
- case Mips::SltiCCRxImmX16:
- return emitFEXT_CCRXI16_ins
- (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SltiuCCRxImmX16:
- return emitFEXT_CCRXI16_ins
- (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SltuCCRxRy16:
- return emitFEXT_CCRX16_ins
- (Mips::SltuRxRy16, MI, BB);
}
}
@@ -2140,8 +1398,8 @@ lowerBRCOND(SDValue Op, SelectionDAG &DAG) const
SDValue CCNode = CondRes.getOperand(2);
Mips::CondCode CC =
(Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
- SDValue BrCode = DAG.getConstant(getFPBranchCodeFromCond(CC), MVT::i32);
-
+ unsigned Opc = invertFPCondCodeUser(CC) ? Mips::BRANCH_F : Mips::BRANCH_T;
+ SDValue BrCode = DAG.getConstant(Opc, MVT::i32);
return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode,
Dest, CondRes);
}
@@ -2792,6 +2050,22 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
}
+static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) {
+ SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+ DAG.getConstant(0, MVT::i32));
+ SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
+}
+
+static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) {
+ SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+ DAG.getConstant(Mips::sub_lo, MVT::i32));
+ SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+ DAG.getConstant(Mips::sub_hi, MVT::i32));
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+}
+
// This function expands mips intrinsic nodes which have 64-bit input operands
// or output values.
//
@@ -2804,48 +2078,51 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// v1 = copy hi
// out64 = merge-values (v0, v1)
//
-static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG,
- unsigned Opc, bool HasI64In, bool HasI64Out) {
+static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
DebugLoc DL = Op.getDebugLoc();
bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
- SDValue Chain = HasChainIn ? Op->getOperand(0) : DAG.getEntryNode();
SmallVector<SDValue, 3> Ops;
+ unsigned OpNo = 0;
- if (HasI64In) {
- SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
- Op->getOperand(1 + HasChainIn),
- DAG.getConstant(0, MVT::i32));
- SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
- Op->getOperand(1 + HasChainIn),
- DAG.getConstant(1, MVT::i32));
+ // See if Op has a chain input.
+ if (HasChainIn)
+ Ops.push_back(Op->getOperand(OpNo++));
- Chain = DAG.getCopyToReg(Chain, DL, Mips::LO, InLo, SDValue());
- Chain = DAG.getCopyToReg(Chain, DL, Mips::HI, InHi, Chain.getValue(1));
+ // The next operand is the intrinsic opcode.
+ assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
- Ops.push_back(Chain);
- Ops.append(Op->op_begin() + HasChainIn + 2, Op->op_end());
- Ops.push_back(Chain.getValue(1));
- } else {
- Ops.push_back(Chain);
- Ops.append(Op->op_begin() + HasChainIn + 1, Op->op_end());
- }
+ // See if the next operand has type i64.
+ SDValue Opnd = Op->getOperand(++OpNo), In64;
+
+ if (Opnd.getValueType() == MVT::i64)
+ In64 = initAccumulator(Opnd, DL, DAG);
+ else
+ Ops.push_back(Opnd);
+
+ // Push the remaining operands.
+ for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
+ Ops.push_back(Op->getOperand(OpNo));
+
+ // Add In64 to the end of the list.
+ if (In64.getNode())
+ Ops.push_back(In64);
- if (!HasI64Out)
- return DAG.getNode(Opc, DL, Op->value_begin(), Op->getNumValues(),
- Ops.begin(), Ops.size());
+ // Scan output.
+ SmallVector<EVT, 2> ResTys;
- SDValue Intr = DAG.getNode(Opc, DL, DAG.getVTList(MVT::Other, MVT::Glue),
- Ops.begin(), Ops.size());
- SDValue OutLo = DAG.getCopyFromReg(Intr.getValue(0), DL, Mips::LO, MVT::i32,
- Intr.getValue(1));
- SDValue OutHi = DAG.getCopyFromReg(OutLo.getValue(1), DL, Mips::HI, MVT::i32,
- OutLo.getValue(2));
- SDValue Out = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, OutLo, OutHi);
+ for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
+ I != E; ++I)
+ ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
+
+ // Create node.
+ SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
+ SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
if (!HasChainIn)
return Out;
- SDValue Vals[] = { Out, OutHi.getValue(1) };
+ assert(Val->getValueType(1) == MVT::Other);
+ SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
return DAG.getMergeValues(Vals, 2, DL);
}
@@ -2855,37 +2132,37 @@ SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
default:
return SDValue();
case Intrinsic::mips_shilo:
- return lowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
case Intrinsic::mips_dpau_h_qbl:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
case Intrinsic::mips_dpau_h_qbr:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
case Intrinsic::mips_dpsu_h_qbl:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
case Intrinsic::mips_dpsu_h_qbr:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
case Intrinsic::mips_dpa_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
case Intrinsic::mips_dps_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
case Intrinsic::mips_dpax_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
case Intrinsic::mips_dpsx_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
case Intrinsic::mips_mulsa_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
case Intrinsic::mips_mult:
- return lowerDSPIntr(Op, DAG, MipsISD::MULT, false, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::Mult);
case Intrinsic::mips_multu:
- return lowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::Multu);
case Intrinsic::mips_madd:
- return lowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
case Intrinsic::mips_maddu:
- return lowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
case Intrinsic::mips_msub:
- return lowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MSub);
case Intrinsic::mips_msubu:
- return lowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
}
}
@@ -2895,45 +2172,45 @@ SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
default:
return SDValue();
case Intrinsic::mips_extp:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
case Intrinsic::mips_extpdp:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
case Intrinsic::mips_extr_w:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
case Intrinsic::mips_extr_r_w:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
case Intrinsic::mips_extr_rs_w:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
case Intrinsic::mips_extr_s_h:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false);
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
case Intrinsic::mips_mthlip:
- return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
case Intrinsic::mips_mulsaq_s_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
case Intrinsic::mips_maq_s_w_phl:
- return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
case Intrinsic::mips_maq_s_w_phr:
- return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
case Intrinsic::mips_maq_sa_w_phl:
- return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
case Intrinsic::mips_maq_sa_w_phr:
- return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
case Intrinsic::mips_dpaq_s_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
case Intrinsic::mips_dpsq_s_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
case Intrinsic::mips_dpaq_sa_l_w:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
case Intrinsic::mips_dpsq_sa_l_w:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
case Intrinsic::mips_dpaqx_s_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
case Intrinsic::mips_dpaqx_sa_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
case Intrinsic::mips_dpsqx_s_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
case Intrinsic::mips_dpsqx_sa_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true);
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
}
}
@@ -3074,28 +2351,6 @@ static unsigned getNextIntArgReg(unsigned Reg) {
return (Reg == Mips::A0) ? Mips::A1 : Mips::A3;
}
-/// isEligibleForTailCallOptimization - Check whether the call is eligible
-/// for tail call optimization.
-bool MipsTargetLowering::
-isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const {
- if (!EnableMipsTailCalls)
- return false;
-
- // No tail call optimization for mips16.
- if (Subtarget->inMips16Mode())
- return false;
-
- // Return false if either the callee or caller has a byval argument.
- if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
- return false;
-
- // Return true if the callee's argument area is no larger than the
- // caller's.
- return NextStackOffset <= FI.getIncomingArgSize();
-}
-
SDValue
MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
SDValue Chain, SDValue Arg, DebugLoc DL,
@@ -3114,161 +2369,48 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
/*isVolatile=*/ true, false, 0);
}
-//
-// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
-// cleaner way to do all of this but it will have to wait until the traditional
-// gcc mechanism is completed.
-//
-// For Pic, in order for Mips16 code to call Mips32 code which according the abi
-// have either arguments or returned values placed in floating point registers,
-// we use a set of helper functions. (This includes functions which return type
-// complex which on Mips are returned in a pair of floating point registers).
-//
-// This is an encoding that we inherited from gcc.
-// In Mips traditional O32, N32 ABI, floating point numbers are passed in
-// floating point argument registers 1,2 only when the first and optionally
-// the second arguments are float (sf) or double (df).
-// For Mips16 we are only concerned with the situations where floating point
-// arguments are being passed in floating point registers by the ABI, because
-// Mips16 mode code cannot execute floating point instructions to load those
-// values and hence helper functions are needed.
-// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
-// the helper function suffixs for these are:
-// 0, 1, 5, 9, 2, 6, 10
-// this suffix can then be calculated as follows:
-// for a given argument Arg:
-// Arg1x, Arg2x = 1 : Arg is sf
-// 2 : Arg is df
-// 0: Arg is neither sf or df
-// So this stub is the string for number Arg1x + Arg2x*4.
-// However not all numbers between 0 and 10 are possible, we check anyway and
-// assert if the impossible exists.
-//
-
-unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber
- (ArgListTy &Args) const {
- unsigned int resultNum = 0;
- if (Args.size() >= 1) {
- Type *t = Args[0].Ty;
- if (t->isFloatTy()) {
- resultNum = 1;
- }
- else if (t->isDoubleTy()) {
- resultNum = 2;
- }
- }
- if (resultNum) {
- if (Args.size() >=2) {
- Type *t = Args[1].Ty;
- if (t->isFloatTy()) {
- resultNum += 4;
- }
- else if (t->isDoubleTy()) {
- resultNum += 8;
- }
- }
+void MipsTargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // Insert node "GP copy globalreg" before call to function.
+ //
+ // R_MIPS_CALL* operators (emitted when non-internal functions are called
+ // in PIC mode) allow symbols to be resolved via lazy binding.
+ // The lazy binding stub requires GP to point to the GOT.
+ if (IsPICCall && !InternalLinkage) {
+ unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
+ EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+ RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
}
- return resultNum;
-}
-//
-// prefixs are attached to stub numbers depending on the return type .
-// return type: float sf_
-// double df_
-// single complex sc_
-// double complext dc_
-// others NO PREFIX
-//
-//
-// The full name of a helper function is__mips16_call_stub +
-// return type dependent prefix + stub number
-//
-//
-// This is something that probably should be in a different source file and
-// perhaps done differently but my main purpose is to not waste runtime
-// on something that we can enumerate in the source. Another possibility is
-// to have a python script to generate these mapping tables. This will do
-// for now. There are a whole series of helper function mapping arrays, one
-// for each return type class as outlined above. There there are 11 possible
-// entries. Ones with 0 are ones which should never be selected
-//
-// All the arrays are similar except for ones which return neither
-// sf, df, sc, dc, in which only care about ones which have sf or df as a
-// first parameter.
-//
-#define P_ "__mips16_call_stub_"
-#define MAX_STUB_NUMBER 10
-#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
-#define T P "0" , T1
-#define P P_
-static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
- {0, T1 };
-#undef P
-#define P P_ "sf_"
-static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "df_"
-static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "sc_"
-static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "dc_"
-static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#undef P_
-
-
-const char* MipsTargetLowering::
- getMips16HelperFunction
- (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
- const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
-#ifndef NDEBUG
- const unsigned int maxStubNum = 10;
- assert(stubNum <= maxStubNum);
- const bool validStubNum[maxStubNum+1] =
- {true, true, true, false, false, true, true, false, false, true, true};
- assert(validStubNum[stubNum]);
-#endif
- const char *result;
- if (RetTy->isFloatTy()) {
- result = sfMips16Helper[stubNum];
- }
- else if (RetTy ->isDoubleTy()) {
- result = dfMips16Helper[stubNum];
- }
- else if (RetTy->isStructTy()) {
- // check if it's complex
- if (RetTy->getNumContainedTypes() == 2) {
- if ((RetTy->getContainedType(0)->isFloatTy()) &&
- (RetTy->getContainedType(1)->isFloatTy())) {
- result = scMips16Helper[stubNum];
- }
- else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
- (RetTy->getContainedType(1)->isDoubleTy())) {
- result = dcMips16Helper[stubNum];
- }
- else {
- llvm_unreachable("Uncovered condition");
- }
- }
- else {
- llvm_unreachable("Uncovered condition");
- }
- }
- else {
- if (stubNum == 0) {
- needHelper = false;
- return "";
- }
- result = vMips16Helper[stubNum];
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
}
- needHelper = true;
- return result;
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(CLI.DAG.getRegisterMask(Mask));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
}
/// LowerCall - functions arguments are copied from virtual regs to
@@ -3287,26 +2429,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
- const char* Mips16HelperFunction = 0;
- bool NeedMips16Helper = false;
-
- if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat &&
- Mips16HardFloat) {
- //
- // currently we don't have symbols tagged with the mips16 or mips32
- // qualifier so we will assume that we don't know what kind it is.
- // and generate the helper
- //
- bool LookupHelper = true;
- if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) {
- LookupHelper = false;
- }
- }
- if (LookupHelper) Mips16HelperFunction =
- getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper);
-
- }
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
@@ -3466,80 +2588,17 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
GlobalOrExternal = true;
}
- SDValue JumpTarget = Callee;
-
- // T9 should contain the address of the callee function if
- // -reloction-model=pic or it is an indirect call.
- if (IsPICCall || !GlobalOrExternal) {
- unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
- unsigned V0Reg = Mips::V0;
- if (NeedMips16Helper) {
- RegsToPass.push_front(std::make_pair(V0Reg, Callee));
- JumpTarget = DAG.getExternalSymbol(
- Mips16HelperFunction, getPointerTy());
- JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
- }
- else {
- RegsToPass.push_front(std::make_pair(T9Reg, Callee));
-
- if (!Subtarget->inMips16Mode())
- JumpTarget = SDValue();
- }
- }
-
- // Insert node "GP copy globalreg" before call to function.
- //
- // R_MIPS_CALL* operators (emitted when non-internal functions are called
- // in PIC mode) allow symbols to be resolved via lazy binding.
- // The lazy binding stub requires GP to point to the GOT.
- if (IsPICCall && !InternalLinkage) {
- unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
- EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
- RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(DAG, Ty)));
- }
-
- // Build a sequence of copy-to-reg nodes chained together with token
- // chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emitted instructions must be
- // stuck together.
- SDValue InFlag;
-
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // MipsJmpLink = #chain, #target_address, #opt_in_flags...
- // = Chain, Callee, Reg#1, Reg#2, ...
- //
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops(1, Chain);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- if (JumpTarget.getNode())
- Ops.push_back(JumpTarget);
-
- // Add argument registers to the end of the list so that they are
- // known live into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
-
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
+ CLI, Callee, Chain);
if (IsTailCall)
return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size());
Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
+ SDValue InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
@@ -4124,14 +3183,14 @@ static bool isF128SoftLibCall(const char *CallSym) {
const char * const *End = LibCalls + array_lengthof(LibCalls);
// Check that LibCalls is sorted alphabetically.
-#ifndef NDEBUG
- LTStr Comp;
+ MipsTargetLowering::LTStr Comp;
+#ifndef NDEBUG
for (const char * const *I = LibCalls; I < End - 1; ++I)
assert(Comp(*I, *(I + 1)));
#endif
- return std::binary_search(LibCalls, End, CallSym, LTStr());
+ return std::binary_search(LibCalls, End, CallSym, Comp);
}
/// This function returns true if Ty is fp128 or i128 which was originally a
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index de925e16ab..cab71a61e0 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -68,6 +68,16 @@ namespace llvm {
EH_RETURN,
+ // Node used to extract integer from accumulator.
+ ExtractLOHI,
+
+ // Node used to insert integers to accumulator.
+ InsertLOHI,
+
+ // Mult nodes.
+ Mult,
+ Multu,
+
// MAdd/Sub nodes
MAdd,
MAddu,
@@ -77,6 +87,8 @@ namespace llvm {
// DivRem(u)
DivRem,
DivRemU,
+ DivRem16,
+ DivRemU16,
BuildPairF64,
ExtractElementF64,
@@ -152,9 +164,9 @@ namespace llvm {
public:
explicit MipsTargetLowering(MipsTargetMachine &TM);
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ static const MipsTargetLowering *create(MipsTargetMachine &TM);
- virtual bool allowsUnalignedMemoryAccesses (EVT VT, bool *Fast) const;
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
virtual void LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
@@ -177,17 +189,34 @@ namespace llvm {
EVT getSetCCResultType(EVT VT) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- private:
- void setMips16LibcallName(RTLIB::Libcall, const char *Name);
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ struct LTStr {
+ bool operator()(const char *S1, const char *S2) const {
+ return strcmp(S1, S2) < 0;
+ }
+ };
- void setMips16HardFloatLibCalls();
+ protected:
+ SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
- unsigned int
- getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+ SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const;
- const char *getMips16HelperFunction
- (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+ SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const;
+
+ SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag, unsigned LoFlag) const;
+
+ /// This function fills Ops, which is the list of operands that will later
+ /// be used when a function call node is created. It also generates
+ /// copyToReg nodes to set up argument registers.
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
/// ByValArgInfo - Byval argument information.
struct ByValArgInfo {
@@ -283,6 +312,7 @@ namespace llvm {
bool HasMips64, IsN64, IsO32;
+ private:
// Lower Operand helpers
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -321,9 +351,10 @@ namespace llvm {
/// isEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
- bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const;
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const = 0;
/// copyByValArg - Copy argument registers which were used to pass a byval
/// argument to the stack. Create a stack frame object for the byval
@@ -377,10 +408,6 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
- virtual MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
-
// Inline asm support
ConstraintType getConstraintType(const std::string &Constraint) const;
@@ -419,8 +446,6 @@ namespace llvm {
virtual unsigned getJumpTableEncoding() const;
- MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
- MachineBasicBlock *BB) const;
MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI,
@@ -430,29 +455,11 @@ namespace llvm {
MachineBasicBlock *BB, unsigned Size) const;
MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
- MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
-
- MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_T8I8I16_ins(
- unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_CCRX16_ins(
- unsigned SltOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_CCRXI16_ins(
- unsigned SltiOpc, unsigned SltiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB )const;
-
};
+
+ /// Create MipsTargetLowering objects.
+ const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM);
+ const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM);
}
#endif // MipsISELLOWERING_H
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 891bdc1224..6b23057c9c 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -503,32 +503,27 @@ let Predicates = [IsFP64bit, HasStdEnc] in {
def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
}
-// Load/Store patterns.
+// Patterns for loads/stores with a reg+imm operand.
let AddedComplexity = 40 in {
let Predicates = [IsN64, HasStdEnc] in {
- def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1_P8 addrRegImm:$a)>;
- def : MipsPat<(store FGR32:$v, addrRegImm:$a),
- (SWC1_P8 FGR32:$v, addrRegImm:$a)>;
- def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164_P8 addrRegImm:$a)>;
- def : MipsPat<(store FGR64:$v, addrRegImm:$a),
- (SDC164_P8 FGR64:$v, addrRegImm:$a)>;
+ def : LoadRegImmPat<LWC1_P8, f32, load>;
+ def : StoreRegImmPat<SWC1_P8, f32>;
+ def : LoadRegImmPat<LDC164_P8, f64, load>;
+ def : StoreRegImmPat<SDC164_P8, f64>;
}
let Predicates = [NotN64, HasStdEnc] in {
- def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1 addrRegImm:$a)>;
- def : MipsPat<(store FGR32:$v, addrRegImm:$a),
- (SWC1 FGR32:$v, addrRegImm:$a)>;
+ def : LoadRegImmPat<LWC1, f32, load>;
+ def : StoreRegImmPat<SWC1, f32>;
}
let Predicates = [NotN64, HasMips64, HasStdEnc] in {
- def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164 addrRegImm:$a)>;
- def : MipsPat<(store FGR64:$v, addrRegImm:$a),
- (SDC164 FGR64:$v, addrRegImm:$a)>;
+ def : LoadRegImmPat<LDC164, f64, load>;
+ def : StoreRegImmPat<SDC164, f64>;
}
let Predicates = [NotN64, NotMips64, HasStdEnc] in {
- def : MipsPat<(f64 (load addrRegImm:$a)), (LDC1 addrRegImm:$a)>;
- def : MipsPat<(store AFGR64:$v, addrRegImm:$a),
- (SDC1 AFGR64:$v, addrRegImm:$a)>;
+ def : LoadRegImmPat<LDC1, f64, load>;
+ def : StoreRegImmPat<SDC1, f64>;
}
}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 3cd9088140..8c05d97bea 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -86,6 +86,36 @@ public:
/// Return the number of bytes of code the specified instruction may be.
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0);
+ }
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0);
+ }
+
+ virtual void storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const = 0;
+
+ virtual void loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const = 0;
+
protected:
bool isZeroImm(const MachineOperand &op) const;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 25b5d240be..3a82e81713 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -23,13 +23,16 @@ def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
SDTCisInt<4>]>;
def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDT_MipsMAddMSub : SDTypeProfile<0, 4,
- [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<1, 2>,
- SDTCisSameAs<2, 3>]>;
-def SDT_MipsDivRem : SDTypeProfile<0, 2,
- [SDTCisInt<0>,
- SDTCisSameAs<0, 1>]>;
+def SDT_ExtractLOHI : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, untyped>,
+ SDTCisVT<2, i32>]>;
+def SDT_InsertLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+ SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsMultDiv : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_MipsMAddMSub : SDTypeProfile<1, 3,
+ [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
+ SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsDivRem16 : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
@@ -82,20 +85,27 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
[SDNPHasChain, SDNPSideEffect,
SDNPOptInGlue, SDNPOutGlue]>;
+// Node used to extract integer from LO/HI register.
+def ExtractLOHI : SDNode<"MipsISD::ExtractLOHI", SDT_ExtractLOHI>;
+
+// Node used to insert 32-bit integers to LOHI register pair.
+def InsertLOHI : SDNode<"MipsISD::InsertLOHI", SDT_InsertLOHI>;
+
+// Mult nodes.
+def MipsMult : SDNode<"MipsISD::Mult", SDT_MipsMultDiv>;
+def MipsMultu : SDNode<"MipsISD::Multu", SDT_MipsMultDiv>;
+
// MAdd*/MSub* nodes
-def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
- [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub,
- [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
- [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
- [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub>;
+def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub>;
+def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub>;
+def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub>;
// DivRem(u) nodes
-def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsDivRem,
- [SDNPOutGlue]>;
-def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
+def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsMultDiv>;
+def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsMultDiv>;
+def MipsDivRem16 : SDNode<"MipsISD::DivRem16", SDT_MipsDivRem16, [SDNPOutGlue]>;
+def MipsDivRemU16 : SDNode<"MipsISD::DivRemU16", SDT_MipsDivRem16,
[SDNPOutGlue]>;
// Target constant nodes that are not part of any isel patterns and remain
@@ -256,6 +266,7 @@ def mem : Operand<i32> {
let MIOperandInfo = (ops CPURegs, simm16);
let EncoderMethod = "getMemEncoding";
let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
def mem64 : Operand<i64> {
@@ -263,18 +274,21 @@ def mem64 : Operand<i64> {
let MIOperandInfo = (ops CPU64Regs, simm16_64);
let EncoderMethod = "getMemEncoding";
let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
def mem_ea : Operand<i32> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops CPURegs, simm16);
let EncoderMethod = "getMemEncoding";
+ let OperandType = "OPERAND_MEMORY";
}
def mem_ea_64 : Operand<i64> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops CPU64Regs, simm16_64);
let EncoderMethod = "getMemEncoding";
+ let OperandType = "OPERAND_MEMORY";
}
// size operand of ext instruction
@@ -378,10 +392,9 @@ class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
}
// Arithmetic Multiply ADD/SUB
-class MArithR<string opstr, SDPatternOperator op = null_frag, bit isComm = 0> :
+class MArithR<string opstr, bit isComm = 0> :
InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt),
- !strconcat(opstr, "\t$rs, $rt"),
- [(op CPURegsOpnd:$rs, CPURegsOpnd:$rt, LO, HI)], IIImul, FrmR> {
+ !strconcat(opstr, "\t$rs, $rt"), [], IIImul, FrmR> {
let Defs = [HI, LO];
let Uses = [HI, LO];
let isCommutable = isComm;
@@ -427,33 +440,39 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
// Memory Load/Store
class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC,
- Operand MemOpnd> :
+ Operand MemOpnd, ComplexPattern Addr> :
InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(set RC:$rt, (OpNode addr:$addr))], NoItinerary, FrmI> {
+ [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI> {
let DecoderMethod = "DecodeMem";
let canFoldAsLoad = 1;
+ let mayLoad = 1;
}
class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
- Operand MemOpnd> :
+ Operand MemOpnd, ComplexPattern Addr> :
InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(OpNode RC:$rt, addr:$addr)], NoItinerary, FrmI> {
+ [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI> {
let DecoderMethod = "DecodeMem";
+ let mayStore = 1;
}
multiclass LoadM<string opstr, RegisterClass RC,
- SDPatternOperator OpNode = null_frag> {
- def NAME : Load<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>;
- def _P8 : Load<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> {
+ SDPatternOperator OpNode = null_frag,
+ ComplexPattern Addr = addr> {
+ def NAME : Load<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Load<opstr, OpNode, RC, mem64, Addr>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
}
multiclass StoreM<string opstr, RegisterClass RC,
- SDPatternOperator OpNode = null_frag> {
- def NAME : Store<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>;
- def _P8 : Store<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> {
+ SDPatternOperator OpNode = null_frag,
+ ComplexPattern Addr = addr> {
+ def NAME : Store<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Store<opstr, OpNode, RC, mem64, Addr>,
+ Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
}
@@ -623,11 +642,34 @@ class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
let neverHasSideEffects = 1;
}
-class Div<SDNode op, string opstr, InstrItinClass itin, RegisterOperand RO,
+// Pseudo multiply/divide instruction with explicit accumulator register
+// operands.
+class MultDivPseudo<Instruction RealInst, RegisterClass R0, RegisterOperand R1,
+ SDPatternOperator OpNode, InstrItinClass Itin,
+ bit IsComm = 1, bit HasSideEffects = 0> :
+ PseudoSE<(outs R0:$ac), (ins R1:$rs, R1:$rt),
+ [(set R0:$ac, (OpNode R1:$rs, R1:$rt))], Itin>,
+ PseudoInstExpansion<(RealInst R1:$rs, R1:$rt)> {
+ let isCommutable = IsComm;
+ let hasSideEffects = HasSideEffects;
+}
+
+// Pseudo multiply add/sub instruction with explicit accumulator register
+// operands.
+class MAddSubPseudo<Instruction RealInst, SDPatternOperator OpNode>
+ : PseudoSE<(outs ACRegs:$ac),
+ (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin),
+ [(set ACRegs:$ac,
+ (OpNode CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin))],
+ IIImul>,
+ PseudoInstExpansion<(RealInst CPURegsOpnd:$rs, CPURegsOpnd:$rt)> {
+ string Constraints = "$acin = $ac";
+}
+
+class Div<string opstr, InstrItinClass itin, RegisterOperand RO,
list<Register> DefRegs> :
- InstSE<(outs), (ins RO:$rs, RO:$rt),
- !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RO:$rs, RO:$rt)], itin,
- FrmR> {
+ InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$$zero, $rs, $rt"),
+ [], itin, FrmR> {
let Defs = DefRegs;
}
@@ -790,6 +832,14 @@ let usesCustomInserter = 1 in {
defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32>;
}
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+ defm LOAD_AC64 : LoadM<"load_ac64", ACRegs>;
+ defm STORE_AC64 : StoreM<"store_ac64", ACRegs>;
+}
+
+def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>;
+
//===----------------------------------------------------------------------===//
// Instruction definition
//===----------------------------------------------------------------------===//
@@ -845,10 +895,10 @@ let Predicates = [HasMips32r2, HasStdEnc] in {
/// Load and Store Instructions
/// aligned
defm LB : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>;
-defm LBu : LoadM<"lbu", CPURegs, zextloadi8>, LW_FM<0x24>;
-defm LH : LoadM<"lh", CPURegs, sextloadi16>, LW_FM<0x21>;
+defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, LW_FM<0x24>;
+defm LH : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, LW_FM<0x21>;
defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>;
-defm LW : LoadM<"lw", CPURegs, load>, LW_FM<0x23>;
+defm LW : LoadM<"lw", CPURegs, load, addrDefault>, LW_FM<0x23>;
defm SB : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>;
defm SH : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>;
defm SW : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>;
@@ -920,10 +970,13 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
/// Multiply and Divide Instructions.
def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>;
def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>;
-def SDIV : Div<MipsDivRem, "div", IIIdiv, CPURegsOpnd, [HI, LO]>,
- MULT_FM<0, 0x1a>;
-def UDIV : Div<MipsDivRemU, "divu", IIIdiv, CPURegsOpnd, [HI, LO]>,
- MULT_FM<0, 0x1b>;
+def PseudoMULT : MultDivPseudo<MULT, ACRegs, CPURegsOpnd, MipsMult, IIImul>;
+def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, CPURegsOpnd, MipsMultu, IIImul>;
+def SDIV : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>;
+def UDIV : Div<"divu", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1b>;
+def PseudoSDIV : MultDivPseudo<SDIV, ACRegs, CPURegsOpnd, MipsDivRem, IIIdiv, 0>;
+def PseudoUDIV : MultDivPseudo<UDIV, ACRegs, CPURegsOpnd, MipsDivRemU, IIIdiv,
+ 0>;
def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>;
def MTLO : MoveToLOHI<"mtlo", CPURegs, [LO]>, MTLO_FM<0x13>;
@@ -951,10 +1004,14 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
def LEA_ADDiu : EffectiveAddress<"addiu", CPURegs, mem_ea>, LW_FM<9>;
// MADD*/MSUB*
-def MADD : MArithR<"madd", MipsMAdd, 1>, MULT_FM<0x1c, 0>;
-def MADDU : MArithR<"maddu", MipsMAddu, 1>, MULT_FM<0x1c, 1>;
-def MSUB : MArithR<"msub", MipsMSub>, MULT_FM<0x1c, 4>;
-def MSUBU : MArithR<"msubu", MipsMSubu>, MULT_FM<0x1c, 5>;
+def MADD : MArithR<"madd", 1>, MULT_FM<0x1c, 0>;
+def MADDU : MArithR<"maddu", 1>, MULT_FM<0x1c, 1>;
+def MSUB : MArithR<"msub">, MULT_FM<0x1c, 4>;
+def MSUBU : MArithR<"msubu">, MULT_FM<0x1c, 5>;
+def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd>;
+def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu>;
+def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub>;
+def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu>;
def RDHWR : ReadHardware<CPURegs, HWRegsOpnd>, RDHWR_FM;
@@ -997,6 +1054,9 @@ def : InstAlias<"and $rs, $rt, $imm",
def : InstAlias<"j $rs", (JR CPURegs:$rs), 0>,
Requires<[NotMips64]>;
def : InstAlias<"jalr $rs", (JALR RA, CPURegs:$rs)>, Requires<[NotMips64]>;
+def : InstAlias<"jal $rs", (JALR RA, CPURegs:$rs), 0>, Requires<[NotMips64]>;
+def : InstAlias<"jal $rd,$rs", (JALR CPURegs:$rd, CPURegs:$rs), 0>,
+ Requires<[NotMips64]>;
def : InstAlias<"not $rt, $rs",
(NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO), 1>;
def : InstAlias<"neg $rt, $rs",
@@ -1008,6 +1068,9 @@ def : InstAlias<"slt $rs, $rt, $imm",
def : InstAlias<"xor $rs, $rt, $imm",
(XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
Requires<[NotMips64]>;
+def : InstAlias<"or $rs, $rt, $imm",
+ (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+ Requires<[NotMips64]>;
def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
def : InstAlias<"mfc0 $rt, $rd",
(MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>;
@@ -1043,6 +1106,13 @@ def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>;
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
+// Load/store pattern templates.
+class LoadRegImmPat<Instruction LoadInst, ValueType ValTy, PatFrag Node> :
+ MipsPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>;
+
+class StoreRegImmPat<Instruction StoreInst, ValueType ValTy> :
+ MipsPat<(store ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>;
+
// Small immediates
def : MipsPat<(i32 immSExt16:$in),
(ADDiu ZERO, imm:$in)>;
@@ -1220,6 +1290,24 @@ defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
// bswap pattern
def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
+// mflo/hi patterns.
+def : MipsPat<(i32 (ExtractLOHI ACRegs:$ac, imm:$lohi_idx)),
+ (EXTRACT_SUBREG ACRegs:$ac, imm:$lohi_idx)>;
+
+// Load halfword/word patterns.
+let AddedComplexity = 40 in {
+ let Predicates = [NotN64, HasStdEnc] in {
+ def : LoadRegImmPat<LBu, i32, zextloadi8>;
+ def : LoadRegImmPat<LH, i32, sextloadi16>;
+ def : LoadRegImmPat<LW, i32, load>;
+ }
+ let Predicates = [IsN64, HasStdEnc] in {
+ def : LoadRegImmPat<LBu_P8, i32, zextloadi8>;
+ def : LoadRegImmPat<LH_P8, i32, sextloadi16>;
+ def : LoadRegImmPat<LW_P8, i32, load>;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Floating Point Support
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 13b2a6ac17..5ed5124139 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -58,7 +58,8 @@ public:
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -67,6 +68,9 @@ public:
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
+ /// \brief Return GPR register class.
+ virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
+
private:
virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index f93dd86c17..64458bcef7 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -18,6 +18,10 @@ def sub_lo : SubRegIndex;
def sub_hi : SubRegIndex;
}
+class Unallocatable {
+ bit isAllocatable = 0;
+}
+
// We have banks of 32 registers each.
class MipsReg<bits<16> Enc, string n> : Register<n> {
let HWEncoding = Enc;
@@ -54,6 +58,13 @@ class AFPR64<bits<16> Enc, string n, list<Register> subregs>
let SubRegIndices = [sub_32];
}
+// Accumulator Registers
+class ACC<bits<16> Enc, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<Enc, n, subregs> {
+ let SubRegIndices = [sub_lo, sub_hi];
+ let CoveredBySubRegs = 1;
+}
+
// Mips Hardware Registers
class HWR<bits<16> Enc, string n> : MipsReg<Enc, n>;
@@ -219,7 +230,13 @@ let Namespace = "Mips" in {
// Hi/Lo registers
def HI : Register<"hi">, DwarfRegNum<[64]>;
+ def HI1 : Register<"hi1">, DwarfRegNum<[176]>;
+ def HI2 : Register<"hi2">, DwarfRegNum<[178]>;
+ def HI3 : Register<"hi3">, DwarfRegNum<[180]>;
def LO : Register<"lo">, DwarfRegNum<[65]>;
+ def LO1 : Register<"lo1">, DwarfRegNum<[177]>;
+ def LO2 : Register<"lo2">, DwarfRegNum<[179]>;
+ def LO3 : Register<"lo3">, DwarfRegNum<[181]>;
let SubRegIndices = [sub_32] in {
def HI64 : RegisterWithSubRegs<"hi", [HI]>;
@@ -240,11 +257,12 @@ let Namespace = "Mips" in {
def HWR29_64 : MipsReg<29, "29">;
// Accum registers
- let SubRegIndices = [sub_lo, sub_hi] in
- def AC0 : MipsRegWithSubRegs<0, "ac0", [LO, HI]>;
- def AC1 : MipsReg<1, "ac1">;
- def AC2 : MipsReg<2, "ac2">;
- def AC3 : MipsReg<3, "ac3">;
+ def AC0 : ACC<0, "ac0", [LO, HI]>;
+ def AC1 : ACC<1, "ac1", [LO1, HI1]>;
+ def AC2 : ACC<2, "ac2", [LO2, HI2]>;
+ def AC3 : ACC<3, "ac3", [LO3, HI3]>;
+
+ def AC0_64 : ACC<0, "ac0", [LO64, HI64]>;
def DSPCtrl : Register<"dspctrl">;
}
@@ -291,9 +309,9 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add
// Callee save
S0, S1)>;
-def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>;
+def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable;
-def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>;
+def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable;
// 64bit fp:
// * FGR64 - 32 64-bit registers
@@ -319,18 +337,28 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
// Condition Register for floating point operations
-def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>;
+def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable;
// Hi/Lo Registers
-def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
-def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>;
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable;
+def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable;
// Hardware registers
-def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
-def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
+def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable;
// Accumulator Registers
-def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>;
+def ACRegs : RegisterClass<"Mips", [untyped], 64, (add AC0)> {
+ let Size = 64;
+}
+
+def ACRegs128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> {
+ let Size = 128;
+}
+
+def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
+ let Size = 64;
+}
def CPURegsAsmOperand : AsmOperandClass {
let Name = "CPURegsAsm";
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 0dd671376f..68ec921888 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -29,6 +29,155 @@
using namespace llvm;
+namespace {
+typedef MachineBasicBlock::iterator Iter;
+
+/// Helper class to expand accumulator pseudos.
+class ExpandACCPseudo {
+public:
+ ExpandACCPseudo(MachineFunction &MF);
+ bool expand();
+
+private:
+ bool expandInstr(MachineBasicBlock &MBB, Iter I);
+ void expandLoad(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+ void expandStore(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+ void expandCopy(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+
+ MachineFunction &MF;
+ const MipsSEInstrInfo &TII;
+ const MipsRegisterInfo &RegInfo;
+ MachineRegisterInfo &MRI;
+};
+}
+
+ExpandACCPseudo::ExpandACCPseudo(MachineFunction &MF_)
+ : MF(MF_),
+ TII(*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo())),
+ RegInfo(TII.getRegisterInfo()), MRI(MF.getRegInfo()) {}
+
+bool ExpandACCPseudo::expand() {
+ bool Expanded = false;
+
+ for (MachineFunction::iterator BB = MF.begin(), BBEnd = MF.end();
+ BB != BBEnd; ++BB)
+ for (Iter I = BB->begin(), End = BB->end(); I != End;)
+ Expanded |= expandInstr(*BB, I++);
+
+ return Expanded;
+}
+
+bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
+ switch(I->getOpcode()) {
+ case Mips::LOAD_AC64:
+ case Mips::LOAD_AC64_P8:
+ case Mips::LOAD_AC_DSP:
+ case Mips::LOAD_AC_DSP_P8:
+ expandLoad(MBB, I, 4);
+ break;
+ case Mips::LOAD_AC128:
+ case Mips::LOAD_AC128_P8:
+ expandLoad(MBB, I, 8);
+ break;
+ case Mips::STORE_AC64:
+ case Mips::STORE_AC64_P8:
+ case Mips::STORE_AC_DSP:
+ case Mips::STORE_AC_DSP_P8:
+ expandStore(MBB, I, 4);
+ break;
+ case Mips::STORE_AC128:
+ case Mips::STORE_AC128_P8:
+ expandStore(MBB, I, 8);
+ break;
+ case Mips::COPY_AC64:
+ case Mips::COPY_AC_DSP:
+ expandCopy(MBB, I, 4);
+ break;
+ case Mips::COPY_AC128:
+ expandCopy(MBB, I, 8);
+ break;
+ default:
+ return false;
+ }
+
+ MBB.erase(I);
+ return true;
+}
+
+void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I,
+ unsigned RegSize) {
+ // load $vr0, FI
+ // copy lo, $vr0
+ // load $vr1, FI + 4
+ // copy hi, $vr1
+
+ assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+ const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+ unsigned VR0 = MRI.createVirtualRegister(RC);
+ unsigned VR1 = MRI.createVirtualRegister(RC);
+ unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+ unsigned Lo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+ unsigned Hi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+ DebugLoc DL = I->getDebugLoc();
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+
+ TII.loadRegFromStack(MBB, I, VR0, FI, RC, &RegInfo, 0);
+ BuildMI(MBB, I, DL, Desc, Lo).addReg(VR0, RegState::Kill);
+ TII.loadRegFromStack(MBB, I, VR1, FI, RC, &RegInfo, RegSize);
+ BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill);
+}
+
+void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I,
+ unsigned RegSize) {
+ // copy $vr0, lo
+ // store $vr0, FI
+ // copy $vr1, hi
+ // store $vr1, FI + 4
+
+ assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+ const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+ unsigned VR0 = MRI.createVirtualRegister(RC);
+ unsigned VR1 = MRI.createVirtualRegister(RC);
+ unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+ unsigned SrcKill = getKillRegState(I->getOperand(0).isKill());
+ unsigned Lo = RegInfo.getSubReg(Src, Mips::sub_lo);
+ unsigned Hi = RegInfo.getSubReg(Src, Mips::sub_hi);
+ DebugLoc DL = I->getDebugLoc();
+
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(Lo, SrcKill);
+ TII.storeRegToStack(MBB, I, VR0, true, FI, RC, &RegInfo, 0);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(Hi, SrcKill);
+ TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize);
+}
+
+void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
+ unsigned RegSize) {
+ // copy $vr0, src_lo
+ // copy dst_lo, $vr0
+ // copy $vr1, src_hi
+ // copy dst_hi, $vr1
+
+ const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+ unsigned VR0 = MRI.createVirtualRegister(RC);
+ unsigned VR1 = MRI.createVirtualRegister(RC);
+ unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
+ unsigned SrcKill = getKillRegState(I->getOperand(1).isKill());
+ unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+ unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+ unsigned SrcLo = RegInfo.getSubReg(Src, Mips::sub_lo);
+ unsigned SrcHi = RegInfo.getSubReg(Src, Mips::sub_hi);
+ DebugLoc DL = I->getDebugLoc();
+
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(SrcLo, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstLo)
+ .addReg(VR0, RegState::Kill);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi)
+ .addReg(VR1, RegState::Kill);
+}
+
unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
static const unsigned EhDataReg[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
@@ -246,7 +395,10 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
// Reserve call frame if the size of the maximum call frame fits into 16-bit
// immediate field and there are no variable sized objects on the stack.
- return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
+ // Make sure the second register scavenger spill slot can be accessed with one
+ // instruction.
+ return isInt<16>(MFI->getMaxCallFrameSize() + getStackAlignment()) &&
+ !MFI->hasVarSizedObjects();
}
// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
@@ -284,6 +436,18 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (MipsFI->callsEhReturn())
MipsFI->createEhDataRegsFI();
+ // Expand pseudo instructions which load, store or copy accumulators.
+ // Add an emergency spill slot if a pseudo was expanded.
+ if (ExpandACCPseudo(MF).expand()) {
+ // The spill slot should be half the size of the accumulator. If target is
+ // mips64, it should be 64-bit, otherwise it should be 32-bt.
+ const TargetRegisterClass *RC = STI.hasMips64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+ int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment(), false);
+ RS->addScavengingFrameIndex(FI);
+ }
+
// Set scavenging frame index if necessary.
uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() +
estimateStackSize(MF);
@@ -295,7 +459,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
&Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
RC->getAlignment(), false);
- RS->setScavengingFrameIndex(FI);
+ RS->addScavengingFrameIndex(FI);
}
const MipsFrameLowering *
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 7becd25455..193a66cc65 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -21,7 +21,7 @@ namespace llvm {
class MipsSEFrameLowering : public MipsFrameLowering {
public:
explicit MipsSEFrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI) {}
+ : MipsFrameLowering(STI, STI.hasMips64() ? 16 : 8) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
new file mode 100644
index 0000000000..d6d220750c
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -0,0 +1,473 @@
+//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+
+bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+ const MachineInstr& MI) {
+ unsigned DstReg = 0, ZeroReg = 0;
+
+ // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+ if ((MI.getOpcode() == Mips::ADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO;
+ } else if ((MI.getOpcode() == Mips::DADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO_64;
+ }
+
+ if (!DstReg)
+ return false;
+
+ // Replace uses with ZeroReg.
+ for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+ E = MRI->use_end(); U != E;) {
+ MachineOperand &MO = U.getOperand();
+ unsigned OpNo = U.getOperandNo();
+ MachineInstr *MI = MO.getParent();
+ ++U;
+
+ // Do not replace if it is a phi's operand or is tied to def operand.
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
+ continue;
+
+ MO.setReg(ZeroReg);
+ }
+
+ return true;
+}
+
+void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC;
+
+ if (Subtarget.isABI_N64())
+ RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
+ else
+ RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+
+ if (Subtarget.isABI_N64()) {
+ MF.getRegInfo().addLiveIn(Mips::T9_64);
+ MBB.addLiveIn(Mips::T9_64);
+
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // daddu $v1, $v0, $t9
+ // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+ .addReg(Mips::T9_64);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ if (MF.getTarget().getRelocationModel() == Reloc::Static) {
+ // Set global register to __gnu_local_gp.
+ //
+ // lui $v0, %hi(__gnu_local_gp)
+ // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+ return;
+ }
+
+ MF.getRegInfo().addLiveIn(Mips::T9);
+ MBB.addLiveIn(Mips::T9);
+
+ if (Subtarget.isABI_N32()) {
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // addu $v1, $v0, $t9
+ // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ assert(Subtarget.isABI_O32());
+
+ // For O32 ABI, the following instruction sequence is emitted to initialize
+ // the global base register:
+ //
+ // 0. lui $2, %hi(_gp_disp)
+ // 1. addiu $2, $2, %lo(_gp_disp)
+ // 2. addu $globalbasereg, $2, $t9
+ //
+ // We emit only the last instruction here.
+ //
+ // GNU linker requires that the first two instructions appear at the beginning
+ // of a function and no instructions be inserted before or between them.
+ // The two instructions are emitted during lowering to MC layer in order to
+ // avoid any reordering.
+ //
+ // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
+ // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
+ // reads it.
+ MF.getRegInfo().addLiveIn(Mips::V0);
+ MBB.addLiveIn(Mips::V0);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
+ .addReg(Mips::V0).addReg(Mips::T9);
+}
+
+void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+ ++MFI)
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ replaceUsesWithZeroReg(MRI, *I);
+}
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
+ SDValue CmpLHS, DebugLoc DL,
+ SDNode *Node) const {
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+ SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+
+ SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2);
+ SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
+ SDValue(Carry, 0), RHS);
+ return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry, 0));
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ EVT ValTy = Addr.getValueType();
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0)))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
+ return true;
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ return selectAddrRegImm(Addr, Base, Offset) ||
+ selectAddrDefault(Addr, Base, Offset);
+}
+
+std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ SDNode *Result;
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
+
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ /// Special Muls
+ case ISD::MUL: {
+ // Mips32 has a 32-bit three operand mul instruction.
+ if (Subtarget.hasMips32() && NodeTy == MVT::i32)
+ break;
+ MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT;
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first;
+ return std::make_pair(true, Result);
+ }
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
+
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+ if (Subtarget.hasMips64()) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO_64, MVT::i64);
+ Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero);
+ } else {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO, MVT::i32);
+ Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero,
+ Zero);
+ }
+
+ return std::make_pair(true, Result);
+ }
+ break;
+ }
+
+ case ISD::Constant: {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
+ unsigned Size = CN->getValueSizeInBits(0);
+
+ if (Size == 32)
+ break;
+
+ MipsAnalyzeImmediate AnalyzeImm;
+ int64_t Imm = CN->getSExtValue();
+
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, Size, false);
+
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+ DebugLoc DL = CN->getDebugLoc();
+ SDNode *RegOpnd;
+ SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+
+ // The first instruction can be a LUi which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == Mips::LUi64)
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
+ else
+ RegOpnd =
+ CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
+ ImmOpnd);
+
+ // The remaining instructions in the sequence are handled here.
+ for (++Inst; Inst != Seq.end(); ++Inst) {
+ ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ SDValue(RegOpnd, 0), ImmOpnd);
+ }
+
+ return std::make_pair(true, RegOpnd);
+ }
+
+ case MipsISD::ThreadPointer: {
+ EVT PtrVT = TLI.getPointerTy();
+ unsigned RdhwrOpc, SrcReg, DestReg;
+
+ if (PtrVT == MVT::i32) {
+ RdhwrOpc = Mips::RDHWR;
+ SrcReg = Mips::HWR29;
+ DestReg = Mips::V1;
+ } else {
+ RdhwrOpc = Mips::RDHWR64;
+ SrcReg = Mips::HWR29_64;
+ DestReg = Mips::V1_64;
+ }
+
+ SDNode *Rdhwr =
+ CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+ Node->getValueType(0),
+ CurDAG->getRegister(SrcReg, PtrVT));
+ SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
+ SDValue(Rdhwr, 0));
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT);
+ ReplaceUses(SDValue(Node, 0), ResNode);
+ return std::make_pair(true, ResNode.getNode());
+ }
+
+ case MipsISD::InsertLOHI: {
+ unsigned RCID = Subtarget.hasDSP() ? Mips::ACRegsDSPRegClassID :
+ Mips::ACRegsRegClassID;
+ SDValue RegClass = CurDAG->getTargetConstant(RCID, MVT::i32);
+ SDValue LoIdx = CurDAG->getTargetConstant(Mips::sub_lo, MVT::i32);
+ SDValue HiIdx = CurDAG->getTargetConstant(Mips::sub_hi, MVT::i32);
+ const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx,
+ Node->getOperand(1), HiIdx };
+ SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+ MVT::Untyped, Ops, 5);
+ return std::make_pair(true, Res);
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) {
+ return new MipsSEDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
new file mode 100644
index 0000000000..6137ab040b
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -0,0 +1,57 @@
+//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEISELDAGTODAG_H
+#define MIPSSEISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
+
+public:
+ explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
+
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS,
+ DebugLoc DL, SDNode *Node) const;
+
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+};
+
+FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
new file mode 100644
index 0000000000..4f219218d3
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -0,0 +1,442 @@
+//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsSEISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
+ cl::desc("MIPS: Enable tail calls."), cl::init(false));
+
+MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+
+ if (HasMips64)
+ addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
+
+ if (Subtarget->hasDSP()) {
+ MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
+
+ for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
+ addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
+
+ // Expand all builtin opcodes.
+ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+ setOperationAction(Opc, VecTys[i], Expand);
+
+ setOperationAction(ISD::LOAD, VecTys[i], Legal);
+ setOperationAction(ISD::STORE, VecTys[i], Legal);
+ setOperationAction(ISD::BITCAST, VecTys[i], Legal);
+ }
+ }
+
+ if (!TM.Options.UseSoftFloat) {
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+
+ // When dealing with single precision only, use libcalls
+ if (!Subtarget->isSingleFloat()) {
+ if (HasMips64)
+ addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
+ else
+ addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
+ }
+ }
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::i32, Custom);
+
+ if (HasMips64)
+ setOperationAction(ISD::MUL, MVT::i64, Custom);
+
+ setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+ setTargetDAGCombine(ISD::ADDE);
+ setTargetDAGCombine(ISD::SUBE);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
+ return new MipsSETargetLowering(TM);
+}
+
+
+bool
+MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
+
+ switch (SVT) {
+ case MVT::i64:
+ case MVT::i32:
+ if (Fast)
+ *Fast = true;
+ return true;
+ default:
+ return false;
+ }
+}
+
+SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch(Op.getOpcode()) {
+ case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
+ case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
+ case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
+ case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
+ case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
+ case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
+ case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, DAG);
+ }
+
+ return MipsTargetLowering::LowerOperation(Op, DAG);
+}
+
+// selectMADD -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
+ // ADDENode's second operand must be a flag output of an ADDC node in order
+ // for the matching to be successful.
+ SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
+
+ if (ADDCNode->getOpcode() != ISD::ADDC)
+ return false;
+
+ SDValue MultHi = ADDENode->getOperand(0);
+ SDValue MultLo = ADDCNode->getOperand(0);
+ SDNode *MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than ADDENode or ADDCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MADD instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ DebugLoc DL = ADDENode->getDebugLoc();
+
+ // Initialize accumulator.
+ SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+ ADDCNode->getOperand(1),
+ ADDENode->getOperand(1));
+
+ // create MipsMAdd(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+ SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ACCIn);
+
+ // replace uses of adde and addc here
+ if (!SDValue(ADDCNode, 0).use_empty()) {
+ SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
+ SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
+ LoIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
+ }
+ if (!SDValue(ADDENode, 0).use_empty()) {
+ SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
+ SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
+ HiIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
+ }
+
+ return true;
+}
+
+// selectMSUB -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
+ // SUBENode's second operand must be a flag output of an SUBC node in order
+ // for the matching to be successful.
+ SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
+
+ if (SUBCNode->getOpcode() != ISD::SUBC)
+ return false;
+
+ SDValue MultHi = SUBENode->getOperand(1);
+ SDValue MultLo = SUBCNode->getOperand(1);
+ SDNode *MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than SUBENode or SUBCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MSUB instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ DebugLoc DL = SUBENode->getDebugLoc();
+
+ // Initialize accumulator.
+ SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+ SUBCNode->getOperand(0),
+ SUBENode->getOperand(0));
+
+ // create MipsSub(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+ SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ACCIn);
+
+ // replace uses of sube and subc here
+ if (!SDValue(SUBCNode, 0).use_empty()) {
+ SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
+ SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
+ LoIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
+ }
+ if (!SDValue(SUBENode, 0).use_empty()) {
+ SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
+ SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
+ HiIdx);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
+ }
+
+ return true;
+}
+
+static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+ selectMADD(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+ selectMSUB(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue
+MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ switch (N->getOpcode()) {
+ case ISD::ADDE:
+ return performADDECombine(N, DAG, DCI, Subtarget);
+ case ISD::SUBE:
+ return performSUBECombine(N, DAG, DCI, Subtarget);
+ default:
+ return MipsTargetLowering::PerformDAGCombine(N, DCI);
+ }
+}
+
+MachineBasicBlock *
+MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::BPOSGE32_PSEUDO:
+ return emitBPOSGE32(MI, BB);
+ }
+}
+
+bool MipsSETargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ if (!EnableMipsTailCalls)
+ return false;
+
+ // Return false if either the callee or caller has a byval argument.
+ if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
+ return false;
+
+ // Return true if the callee's argument area is no larger than the
+ // caller's.
+ return NextStackOffset <= FI.getIncomingArgSize();
+}
+
+void MipsSETargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
+ RegsToPass.push_front(std::make_pair(T9Reg, Callee));
+ } else
+ Ops.push_back(Callee);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
+ bool HasLo, bool HasHi,
+ SelectionDAG &DAG) const {
+ EVT Ty = Op.getOperand(0).getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Lo, Hi;
+
+ if (HasLo)
+ Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
+ DAG.getConstant(Mips::sub_lo, MVT::i32));
+ if (HasHi)
+ Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
+ DAG.getConstant(Mips::sub_hi, MVT::i32));
+
+ if (!HasLo || !HasHi)
+ return HasLo ? Lo : Hi;
+
+ SDValue Vals[] = { Lo, Hi };
+ return DAG.getMergeValues(Vals, 2, DL);
+}
+
+MachineBasicBlock * MipsSETargetLowering::
+emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
+ // $bb:
+ // bposge32_pseudo $vr0
+ // =>
+ // $bb:
+ // bposge32 $tbb
+ // $fbb:
+ // li $vr2, 0
+ // b $sink
+ // $tbb:
+ // li $vr1, 1
+ // $sink:
+ // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
+ DebugLoc DL = MI->getDebugLoc();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, FBB);
+ F->insert(It, TBB);
+ F->insert(It, Sink);
+
+ // Transfer the remainder of BB and its successor edges to Sink.
+ Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add successors.
+ BB->addSuccessor(FBB);
+ BB->addSuccessor(TBB);
+ FBB->addSuccessor(Sink);
+ TBB->addSuccessor(Sink);
+
+ // Insert the real bposge32 instruction to $BB.
+ BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
+
+ // Fill $FBB.
+ unsigned VR2 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
+ .addReg(Mips::ZERO).addImm(0);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+ // Fill $TBB.
+ unsigned VR1 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
+ .addReg(Mips::ZERO).addImm(1);
+
+ // Insert phi function to $Sink.
+ BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return Sink;
+}
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
new file mode 100644
index 0000000000..186f6a343d
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -0,0 +1,62 @@
+//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsSEISELLOWERING_H
+#define MipsSEISELLOWERING_H
+
+#include "MipsISelLowering.h"
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+ class MipsSETargetLowering : public MipsTargetLowering {
+ public:
+ explicit MipsSETargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
+ if (VT == MVT::Untyped)
+ return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass :
+ &Mips::ACRegsRegClass;
+
+ return TargetLowering::getRepRegClassFor(VT);
+ }
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
+ SelectionDAG &DAG) const;
+
+ MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ };
+}
+
+#endif // MipsSEISELLOWERING_H
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index a9809ef712..9d08172081 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -136,6 +136,12 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (Mips::FGR64RegClass.contains(DestReg))
Opc = Mips::DMTC1;
}
+ else if (Mips::ACRegsRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::COPY_AC64;
+ else if (Mips::ACRegsDSPRegClass.contains(DestReg, SrcReg))
+ Opc = Mips::COPY_AC_DSP;
+ else if (Mips::ACRegs128RegClass.contains(DestReg, SrcReg))
+ Opc = Mips::COPY_AC128;
assert(Opc && "Cannot copy registers");
@@ -152,10 +158,10 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
void MipsSEInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+ int64_t Offset) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
@@ -166,6 +172,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
+ else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::STORE_AC64_P8 : Mips::STORE_AC64;
+ else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP;
+ else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128;
else if (Mips::FGR32RegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
@@ -175,15 +187,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
assert(Opc && "Register class not handled!");
BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
}
void MipsSEInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const
-{
+loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI, int64_t Offset) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
@@ -193,6 +203,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
+ else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LOAD_AC64_P8 : Mips::LOAD_AC64;
+ else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP;
+ else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128;
else if (Mips::FGR32RegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
@@ -201,7 +217,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
.addMemOperand(MMO);
}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index 3e22b33ed7..0bf7876f0f 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -49,17 +49,19 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ virtual void storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
+
+ virtual void loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const;
virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index a39b393e4e..96967380b2 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -54,6 +54,15 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
return true;
}
+const TargetRegisterClass *
+MipsSERegisterInfo::intRegClass(unsigned Size) const {
+ if (Size == 4)
+ return &Mips::CPURegsRegClass;
+
+ assert(Size == 8);
+ return &Mips::CPU64RegsRegClass;
+}
+
void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned OpNo, int FrameIndex,
uint64_t StackSize,
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index f6827e9663..2f7c37bb46 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -31,6 +31,8 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+ virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
+
private:
virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index fd930f0335..33363580ab 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -54,7 +54,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
"E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
InstrInfo(MipsInstrInfo::create(*this)),
FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
- TLInfo(*this), TSInfo(*this), JITInfo() {
+ TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() {
}
void MipsebTargetMachine::anchor() { }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index c4928c21eb..7e5f192264 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -34,7 +34,7 @@ class MipsTargetMachine : public LLVMTargetMachine {
const DataLayout DL; // Calculates type size & alignment
OwningPtr<const MipsInstrInfo> InstrInfo;
OwningPtr<const MipsFrameLowering> FrameLowering;
- MipsTargetLowering TLInfo;
+ OwningPtr<const MipsTargetLowering> TLInfo;
MipsSelectionDAGInfo TSInfo;
MipsJITInfo JITInfo;
@@ -63,7 +63,7 @@ public:
}
virtual const MipsTargetLowering *getTargetLowering() const {
- return &TLInfo;
+ return TLInfo.get();
}
virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 47baef6696..7da2fed4cd 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -22,6 +22,7 @@ set(NVPTXCodeGen_sources
NVPTXAllocaHoisting.cpp
NVPTXAsmPrinter.cpp
NVPTXUtilities.cpp
+ NVVMReflect.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index 454583850b..b3e8b5d262 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -52,25 +52,24 @@ enum PropertyAnnotation {
};
const unsigned AnnotationNameLen = 8; // length of each annotation name
-const char
-PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
- "maxntidx", // PROPERTY_MAXNTID_X
- "maxntidy", // PROPERTY_MAXNTID_Y
- "maxntidz", // PROPERTY_MAXNTID_Z
- "reqntidx", // PROPERTY_REQNTID_X
- "reqntidy", // PROPERTY_REQNTID_Y
- "reqntidz", // PROPERTY_REQNTID_Z
- "minctasm", // PROPERTY_MINNCTAPERSM
- "texture", // PROPERTY_ISTEXTURE
- "surface", // PROPERTY_ISSURFACE
- "sampler", // PROPERTY_ISSAMPLER
- "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
- "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
- "kernel", // PROPERTY_ISKERNEL_FUNCTION
- "align", // PROPERTY_ALIGN
+const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
+ "maxntidx", // PROPERTY_MAXNTID_X
+ "maxntidy", // PROPERTY_MAXNTID_Y
+ "maxntidz", // PROPERTY_MAXNTID_Z
+ "reqntidx", // PROPERTY_REQNTID_X
+ "reqntidy", // PROPERTY_REQNTID_Y
+ "reqntidz", // PROPERTY_REQNTID_Z
+ "minctasm", // PROPERTY_MINNCTAPERSM
+ "texture", // PROPERTY_ISTEXTURE
+ "surface", // PROPERTY_ISSURFACE
+ "sampler", // PROPERTY_ISSAMPLER
+ "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
+ "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
+ "kernel", // PROPERTY_ISKERNEL_FUNCTION
+ "align", // PROPERTY_ALIGN
- // last property
- "proplast", // PROPERTY_LAST
+ // last property
+ "proplast", // PROPERTY_LAST
};
// name of named metadata used for global annotations
@@ -80,9 +79,8 @@ PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
// compiling those .cpp files, hence __attribute__((unused)).
__attribute__((unused))
#endif
-static const char* NamedMDForAnnotations = "nvvm.annotations";
+ static const char *NamedMDForAnnotations = "nvvm.annotations";
}
-
#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 619181994a..459cd96cb0 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -23,10 +23,9 @@ bool CompileForDebugging;
// compile for debugging
static cl::opt<bool, true>
Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
- cl::location(CompileForDebugging),
- cl::init(false));
+ cl::location(CompileForDebugging), cl::init(false));
-void NVPTXMCAsmInfo::anchor() { }
+void NVPTXMCAsmInfo::anchor() {}
NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
Triple TheTriple(TT);
@@ -55,7 +54,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
Data32bitsDirective = " .b32 ";
Data64bitsDirective = " .b64 ";
PrivateGlobalPrefix = "";
- ZeroDirective = " .b8";
+ ZeroDirective = " .b8";
AsciiDirective = " .b8";
AscizDirective = " .b8";
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 44aa01ca6e..ccd29705df 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -28,7 +28,6 @@
#define GET_REGINFO_MC_DESC
#include "NVPTXGenRegisterInfo.inc"
-
using namespace llvm;
static MCInstrInfo *createNVPTXMCInstrInfo() {
@@ -44,22 +43,20 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
return X;
}
-static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS) {
+static MCSubtargetInfo *
+createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
return X;
}
-static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
+static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
+ StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
-
// Force static initialization.
extern "C" void LLVMInitializeNVPTXTargetMC() {
// Register the MC asm info.
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index b5684883fc..d6c79b5110 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-
#ifndef LLVM_SUPPORT_MANAGED_STRING_H
#define LLVM_SUPPORT_MANAGED_STRING_H
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index b46ea881c4..6a53a443bf 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -41,18 +41,24 @@ enum CondCodes {
inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
switch (CC) {
- case NVPTXCC::NE: return "ne";
- case NVPTXCC::EQ: return "eq";
- case NVPTXCC::LT: return "lt";
- case NVPTXCC::LE: return "le";
- case NVPTXCC::GT: return "gt";
- case NVPTXCC::GE: return "ge";
+ case NVPTXCC::NE:
+ return "ne";
+ case NVPTXCC::EQ:
+ return "eq";
+ case NVPTXCC::LT:
+ return "lt";
+ case NVPTXCC::LE:
+ return "le";
+ case NVPTXCC::GT:
+ return "gt";
+ case NVPTXCC::GE:
+ return "ge";
}
llvm_unreachable("Unknown condition code");
}
-FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
- llvm::CodeGenOpt::Level OptLevel);
+FunctionPass *
+createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
@@ -62,8 +68,7 @@ bool isImageOrSamplerVal(const Value *, const Module *);
extern Target TheNVPTXTarget32;
extern Target TheNVPTXTarget64;
-namespace NVPTX
-{
+namespace NVPTX {
enum DrvInterface {
NVCL,
CUDA,
@@ -102,7 +107,7 @@ enum LoadStore {
};
namespace PTXLdStInstCode {
-enum AddressSpace{
+enum AddressSpace {
GENERIC = 0,
GLOBAL = 1,
CONSTANT = 2,
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index 7aee3595c6..d78b4e81a3 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -26,14 +26,6 @@ include "NVPTXInstrInfo.td"
//===----------------------------------------------------------------------===//
// SM Versions
-def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10",
- "Target SM 1.0">;
-def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11",
- "Target SM 1.1">;
-def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12",
- "Target SM 1.2">;
-def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13",
- "Target SM 1.3">;
def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20",
"Target SM 2.0">;
def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
@@ -56,10 +48,6 @@ def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
-def : Proc<"sm_10", [SM10]>;
-def : Proc<"sm_11", [SM11]>;
-def : Proc<"sm_12", [SM12]>;
-def : Proc<"sm_13", [SM13]>;
def : Proc<"sm_20", [SM20]>;
def : Proc<"sm_21", [SM21]>;
def : Proc<"sm_30", [SM30]>;
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 60f52a46da..0f792ec682 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -19,9 +19,9 @@
namespace llvm {
bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
- bool functionModified = false;
- Function::iterator I = function.begin();
- TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
+ bool functionModified = false;
+ Function::iterator I = function.begin();
+ TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
for (Function::iterator E = function.end(); I != E; ++I) {
for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
@@ -37,12 +37,10 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
}
char NVPTXAllocaHoisting::ID = 1;
-RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting",
- "Hoisting alloca instructions in non-entry "
- "blocks to the entry block");
+RegisterPass<NVPTXAllocaHoisting>
+X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
+ "blocks to the entry block");
-FunctionPass *createAllocaHoisting() {
- return new NVPTXAllocaHoisting();
-}
+FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 0115e1f5d3..ce5d78afa3 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -47,7 +47,6 @@
#include <sstream>
using namespace llvm;
-
#include "NVPTXGenAsmWriter.inc"
bool RegAllocNilUsed = true;
@@ -59,21 +58,17 @@ EmitLineNumbers("nvptx-emit-line-numbers",
cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
cl::init(true));
-namespace llvm {
-bool InterleaveSrcInPtx = false;
-}
-
-static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src",
- cl::ZeroOrMore,
- cl::desc("NVPTX Specific: Emit source line in ptx file"),
- cl::location(llvm::InterleaveSrcInPtx));
+namespace llvm { bool InterleaveSrcInPtx = false; }
+static cl::opt<bool, true>
+InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: Emit source line in ptx file"),
+ cl::location(llvm::InterleaveSrcInPtx));
namespace {
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
/// depends.
-void DiscoverDependentGlobals(Value *V,
- DenseSet<GlobalVariable*> &Globals) {
+void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
Globals.insert(GV);
else {
@@ -88,12 +83,12 @@ void DiscoverDependentGlobals(Value *V,
/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
/// instances to be emitted, but only after any dependents have been added
/// first.
-void VisitGlobalVariableForEmission(GlobalVariable *GV,
- SmallVectorImpl<GlobalVariable*> &Order,
- DenseSet<GlobalVariable*> &Visited,
- DenseSet<GlobalVariable*> &Visiting) {
+void VisitGlobalVariableForEmission(
+ GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
+ DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
// Have we already visited this one?
- if (Visited.count(GV)) return;
+ if (Visited.count(GV))
+ return;
// Do we have a circular dependency?
if (Visiting.count(GV))
@@ -103,12 +98,13 @@ void VisitGlobalVariableForEmission(GlobalVariable *GV,
Visiting.insert(GV);
// Make sure we visit all dependents first
- DenseSet<GlobalVariable*> Others;
+ DenseSet<GlobalVariable *> Others;
for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
DiscoverDependentGlobals(GV->getOperand(i), Others);
-
- for (DenseSet<GlobalVariable*>::iterator I = Others.begin(),
- E = Others.end(); I != E; ++I)
+
+ for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
+ E = Others.end();
+ I != E; ++I)
VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
// Now we can visit ourself
@@ -142,25 +138,23 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
if (CE == 0)
llvm_unreachable("Unknown constant value to lower!");
-
switch (CE->getOpcode()) {
default:
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C =
- ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
if (C != CE)
return LowerConstant(C, AP);
// Otherwise report the problem to the user.
{
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- WriteAsOperand(OS, CE, /*PrintType=*/false,
- !AP.MF ? 0 : AP.MF->getFunction()->getParent());
- report_fatal_error(OS.str());
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ WriteAsOperand(OS, CE, /*PrintType=*/ false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
const DataLayout &TD = *AP.TM.getDataLayout();
@@ -182,7 +176,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
// expression properly. This is important for differences between
// blockaddress labels. Since the two labels are in the same function, it
// is reasonable to treat their delta as a 32-bit value.
- // FALL THROUGH.
+ // FALL THROUGH.
case Instruction::BitCast:
return LowerConstant(CE->getOperand(0), AP);
@@ -192,7 +186,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
- false/*ZExt*/);
+ false /*ZExt*/);
return LowerConstant(Op, AP);
}
@@ -214,11 +208,12 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
// the high bits so we are sure to get a proper truncation if the input is
// a constant expr.
unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
- const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+ const MCExpr *MaskExpr =
+ MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
}
- // The MC library also has a right-shift operator, but it isn't consistently
+ // The MC library also has a right-shift operator, but it isn't consistently
// signed or unsigned between different targets.
case Instruction::Add:
case Instruction::Sub:
@@ -232,24 +227,32 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
switch (CE->getOpcode()) {
- default: llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
- case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
- case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
- case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
- case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
- case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
- case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
- case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
- case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+ default:
+ llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add:
+ return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Sub:
+ return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+ case Instruction::Mul:
+ return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+ case Instruction::SDiv:
+ return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+ case Instruction::SRem:
+ return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+ case Instruction::Shl:
+ return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+ case Instruction::And:
+ return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+ case Instruction::Or:
+ return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
+ case Instruction::Xor:
+ return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
}
}
}
}
-
-void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
-{
+void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
if (!EmitLineNumbers)
return;
if (ignoreLoc(MI))
@@ -268,7 +271,6 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
if (curLoc.isUnknown())
return;
-
const MachineFunction *MF = MI.getParent()->getParent();
//const TargetMachine &TM = MF->getTarget();
@@ -289,14 +291,13 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
if (filenameMap.find(fileName.str()) == filenameMap.end())
return;
-
// Emit the line from the source file.
if (llvm::InterleaveSrcInPtx)
this->emitSrcInText(fileName.str(), curLoc.getLine());
std::stringstream temp;
- temp << "\t.loc " << filenameMap[fileName.str()]
- << " " << curLoc.getLine() << " " << curLoc.getCol();
+ temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
+ << " " << curLoc.getCol();
OutStreamer.EmitRawText(Twine(temp.str().c_str()));
}
@@ -309,9 +310,7 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitRawText(OS.str());
}
-void NVPTXAsmPrinter::printReturnValStr(const Function *F,
- raw_ostream &O)
-{
+void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
const DataLayout *TD = TM.getDataLayout();
const TargetLowering *TLI = TM.getTargetLowering();
@@ -329,53 +328,49 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F,
unsigned size = 0;
if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
size = ITy->getBitWidth();
- if (size < 32) size = 32;
+ if (size < 32)
+ size = 32;
} else {
- assert(Ty->isFloatingPointTy() &&
- "Floating point type expected here");
+ assert(Ty->isFloatingPointTy() && "Floating point type expected here");
size = Ty->getPrimitiveSizeInBits();
}
O << ".param .b" << size << " func_retval0";
- }
- else if (isa<PointerType>(Ty)) {
+ } else if (isa<PointerType>(Ty)) {
O << ".param .b" << TLI->getPointerTy().getSizeInBits()
- << " func_retval0";
+ << " func_retval0";
} else {
- if ((Ty->getTypeID() == Type::StructTyID) ||
- isa<VectorType>(Ty)) {
+ if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*TLI, Ty, vtparts);
unsigned totalsz = 0;
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
elems = vtparts[i].getVectorNumElements();
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0, je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8)) sz = 8;
- totalsz += sz/8;
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ totalsz += sz / 8;
}
}
unsigned retAlignment = 0;
if (!llvm::getAlign(*F, 0, retAlignment))
retAlignment = TD->getABITypeAlignment(Ty);
- O << ".param .align "
- << retAlignment
- << " .b8 func_retval0["
- << totalsz << "]";
+ O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
+ << "]";
} else
- assert(false &&
- "Unknown return type");
+ assert(false && "Unknown return type");
}
} else {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*TLI, Ty, vtparts);
unsigned idx = 0;
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
@@ -383,14 +378,16 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F,
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0, je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
O << ".reg .b" << sz << " func_retval" << idx;
- if (j<je-1) O << ", ";
+ if (j < je - 1)
+ O << ", ";
++idx;
}
- if (i < e-1)
+ if (i < e - 1)
O << ", ";
}
}
@@ -411,7 +408,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
// Set up
MRI = &MF->getRegInfo();
F = MF->getFunction();
- emitLinkageDirective(F,O);
+ emitLinkageDirective(F, O);
if (llvm::isKernelFunction(*F))
O << ".entry ";
else {
@@ -434,7 +431,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
void NVPTXAsmPrinter::EmitFunctionBodyStart() {
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
unsigned numRegClasses = TRI.getNumRegClasses();
- VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1];
+ VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1];
OutStreamer.EmitRawText(StringRef("{\n"));
setAndEmitFunctionVirtualRegisters(*MF);
@@ -446,54 +443,63 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() {
void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
OutStreamer.EmitRawText(StringRef("}\n"));
- delete []VRidGlobal2LocalMap;
+ delete[] VRidGlobal2LocalMap;
}
-
-void
-NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F,
- raw_ostream &O) const {
+void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
+ raw_ostream &O) const {
// If the NVVM IR has some of reqntid* specified, then output
// the reqntid directive, and set the unspecified ones to 1.
// If none of reqntid* is specified, don't output reqntid directive.
unsigned reqntidx, reqntidy, reqntidz;
bool specified = false;
- if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1;
- else specified = true;
- if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1;
- else specified = true;
- if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1;
- else specified = true;
+ if (llvm::getReqNTIDx(F, reqntidx) == false)
+ reqntidx = 1;
+ else
+ specified = true;
+ if (llvm::getReqNTIDy(F, reqntidy) == false)
+ reqntidy = 1;
+ else
+ specified = true;
+ if (llvm::getReqNTIDz(F, reqntidz) == false)
+ reqntidz = 1;
+ else
+ specified = true;
if (specified)
- O << ".reqntid " << reqntidx << ", "
- << reqntidy << ", " << reqntidz << "\n";
+ O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
+ << "\n";
// If the NVVM IR has some of maxntid* specified, then output
// the maxntid directive, and set the unspecified ones to 1.
// If none of maxntid* is specified, don't output maxntid directive.
unsigned maxntidx, maxntidy, maxntidz;
specified = false;
- if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1;
- else specified = true;
- if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1;
- else specified = true;
- if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1;
- else specified = true;
+ if (llvm::getMaxNTIDx(F, maxntidx) == false)
+ maxntidx = 1;
+ else
+ specified = true;
+ if (llvm::getMaxNTIDy(F, maxntidy) == false)
+ maxntidy = 1;
+ else
+ specified = true;
+ if (llvm::getMaxNTIDz(F, maxntidz) == false)
+ maxntidz = 1;
+ else
+ specified = true;
if (specified)
- O << ".maxntid " << maxntidx << ", "
- << maxntidy << ", " << maxntidz << "\n";
+ O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
+ << "\n";
unsigned mincta;
if (llvm::getMinCTASm(F, mincta))
O << ".minnctapersm " << mincta << "\n";
}
-void
-NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
- raw_ostream &O) {
- const TargetRegisterClass * RC = MRI->getRegClass(vr);
+void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
+ raw_ostream &O) {
+ const TargetRegisterClass *RC = MRI->getRegClass(vr);
unsigned id = RC->getID();
std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
@@ -506,44 +512,38 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
report_fatal_error("Bad register!");
}
-void
-NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
- raw_ostream &O) {
+void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
+ raw_ostream &O) {
getVirtualRegisterName(vr, isVec, O);
}
-void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO,
- const char *Modifier,
- raw_ostream &O) {
- static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'};
- int Imm = (int)MO.getImm();
- if(0 == strcmp(Modifier, "vecelem"))
+void NVPTXAsmPrinter::printVecModifiedImmediate(
+ const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
+ static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
+ int Imm = (int) MO.getImm();
+ if (0 == strcmp(Modifier, "vecelem"))
O << "_" << vecelem[Imm];
- else if(0 == strcmp(Modifier, "vecv4comm1")) {
- if((Imm < 0) || (Imm > 3))
+ else if (0 == strcmp(Modifier, "vecv4comm1")) {
+ if ((Imm < 0) || (Imm > 3))
O << "//";
- }
- else if(0 == strcmp(Modifier, "vecv4comm2")) {
- if((Imm < 4) || (Imm > 7))
+ } else if (0 == strcmp(Modifier, "vecv4comm2")) {
+ if ((Imm < 4) || (Imm > 7))
O << "//";
- }
- else if(0 == strcmp(Modifier, "vecv4pos")) {
- if(Imm < 0) Imm = 0;
- O << "_" << vecelem[Imm%4];
- }
- else if(0 == strcmp(Modifier, "vecv2comm1")) {
- if((Imm < 0) || (Imm > 1))
+ } else if (0 == strcmp(Modifier, "vecv4pos")) {
+ if (Imm < 0)
+ Imm = 0;
+ O << "_" << vecelem[Imm % 4];
+ } else if (0 == strcmp(Modifier, "vecv2comm1")) {
+ if ((Imm < 0) || (Imm > 1))
O << "//";
- }
- else if(0 == strcmp(Modifier, "vecv2comm2")) {
- if((Imm < 2) || (Imm > 3))
+ } else if (0 == strcmp(Modifier, "vecv2comm2")) {
+ if ((Imm < 2) || (Imm > 3))
O << "//";
- }
- else if(0 == strcmp(Modifier, "vecv2pos")) {
- if(Imm < 0) Imm = 0;
- O << "_" << vecelem[Imm%2];
- }
- else
+ } else if (0 == strcmp(Modifier, "vecv2pos")) {
+ if (Imm < 0)
+ Imm = 0;
+ O << "_" << vecelem[Imm % 2];
+ } else
llvm_unreachable("Unknown Modifier on immediate operand");
}
@@ -565,7 +565,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
emitVirtualRegister(MO.getReg(), true, O);
else
llvm_unreachable(
- "Don't know how to handle the modifier on virtual register.");
+ "Don't know how to handle the modifier on virtual register.");
}
}
return;
@@ -576,7 +576,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
else if (strstr(Modifier, "vec") == Modifier)
printVecModifiedImmediate(MO, Modifier, O);
else
- llvm_unreachable("Don't know how to handle modifier on immediate operand");
+ llvm_unreachable(
+ "Don't know how to handle modifier on immediate operand");
return;
case MachineOperand::MO_FPImmediate:
@@ -588,18 +589,16 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
break;
case MachineOperand::MO_ExternalSymbol: {
- const char * symbname = MO.getSymbolName();
+ const char *symbname = MO.getSymbolName();
if (strstr(symbname, ".PARAM") == symbname) {
unsigned index;
- sscanf(symbname+6, "%u[];", &index);
+ sscanf(symbname + 6, "%u[];", &index);
printParamName(index, O);
- }
- else if (strstr(symbname, ".HLPPARAM") == symbname) {
+ } else if (strstr(symbname, ".HLPPARAM") == symbname) {
unsigned index;
- sscanf(symbname+9, "%u[];", &index);
+ sscanf(symbname + 9, "%u[];", &index);
O << *CurrentFnSym << "_param_" << index << "_offset";
- }
- else
+ } else
O << symbname;
break;
}
@@ -613,8 +612,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
}
}
-void NVPTXAsmPrinter::
-printImplicitDef(const MachineInstr *MI, raw_ostream &O) const {
+void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI,
+ raw_ostream &O) const {
#ifndef __OPTIMIZE__
O << "\t// Implicit def :";
//printOperand(MI, 0);
@@ -628,32 +627,41 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
if (Modifier && !strcmp(Modifier, "add")) {
O << ", ";
- printOperand(MI, opNum+1, O);
+ printOperand(MI, opNum + 1, O);
} else {
- if (MI->getOperand(opNum+1).isImm() &&
- MI->getOperand(opNum+1).getImm() == 0)
+ if (MI->getOperand(opNum + 1).isImm() &&
+ MI->getOperand(opNum + 1).getImm() == 0)
return; // don't print ',0' or '+0'
O << "+";
- printOperand(MI, opNum+1, O);
+ printOperand(MI, opNum + 1, O);
}
}
void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
- raw_ostream &O, const char *Modifier)
-{
+ raw_ostream &O, const char *Modifier) {
if (Modifier) {
const MachineOperand &MO = MI->getOperand(opNum);
- int Imm = (int)MO.getImm();
+ int Imm = (int) MO.getImm();
if (!strcmp(Modifier, "volatile")) {
if (Imm)
O << ".volatile";
} else if (!strcmp(Modifier, "addsp")) {
switch (Imm) {
- case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break;
- case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break;
- case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break;
- case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break;
- case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break;
+ case NVPTX::PTXLdStInstCode::GLOBAL:
+ O << ".global";
+ break;
+ case NVPTX::PTXLdStInstCode::SHARED:
+ O << ".shared";
+ break;
+ case NVPTX::PTXLdStInstCode::LOCAL:
+ O << ".local";
+ break;
+ case NVPTX::PTXLdStInstCode::PARAM:
+ O << ".param";
+ break;
+ case NVPTX::PTXLdStInstCode::CONSTANT:
+ O << ".const";
+ break;
case NVPTX::PTXLdStInstCode::GENERIC:
if (!nvptxSubtarget.hasGenericLdSt())
O << ".global";
@@ -661,31 +669,27 @@ void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
default:
llvm_unreachable("Wrong Address Space");
}
- }
- else if (!strcmp(Modifier, "sign")) {
- if (Imm==NVPTX::PTXLdStInstCode::Signed)
+ } else if (!strcmp(Modifier, "sign")) {
+ if (Imm == NVPTX::PTXLdStInstCode::Signed)
O << "s";
- else if (Imm==NVPTX::PTXLdStInstCode::Unsigned)
+ else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
O << "u";
else
O << "f";
- }
- else if (!strcmp(Modifier, "vec")) {
- if (Imm==NVPTX::PTXLdStInstCode::V2)
+ } else if (!strcmp(Modifier, "vec")) {
+ if (Imm == NVPTX::PTXLdStInstCode::V2)
O << ".v2";
- else if (Imm==NVPTX::PTXLdStInstCode::V4)
+ else if (Imm == NVPTX::PTXLdStInstCode::V4)
O << ".v4";
- }
- else
+ } else
llvm_unreachable("Unknown Modifier");
- }
- else
+ } else
llvm_unreachable("Empty Modifier");
}
-void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
- emitLinkageDirective(F,O);
+ emitLinkageDirective(F, O);
if (llvm::isKernelFunction(*F))
O << ".entry ";
else
@@ -696,8 +700,7 @@ void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
O << ";\n";
}
-static bool usedInGlobalVarDef(const Constant *C)
-{
+static bool usedInGlobalVarDef(const Constant *C) {
if (!C)
return false;
@@ -707,8 +710,8 @@ static bool usedInGlobalVarDef(const Constant *C)
return true;
}
- for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
- ui!=ue; ++ui) {
+ for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+ ui != ue; ++ui) {
const Constant *C = dyn_cast<Constant>(*ui);
if (usedInGlobalVarDef(C))
return true;
@@ -716,8 +719,7 @@ static bool usedInGlobalVarDef(const Constant *C)
return false;
}
-static bool usedInOneFunc(const User *U, Function const *&oneFunc)
-{
+static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
if (othergv->getName().str() == "llvm.used")
return true;
@@ -730,19 +732,17 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc)
return false;
oneFunc = curFunc;
return true;
- }
- else
+ } else
return false;
}
if (const MDNode *md = dyn_cast<MDNode>(U))
if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
- (md->getName().str() == "llvm.dbg.sp")))
+ (md->getName().str() == "llvm.dbg.sp")))
return true;
-
- for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end();
- ui!=ue; ++ui) {
+ for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end();
+ ui != ue; ++ui) {
if (usedInOneFunc(*ui, oneFunc) == false)
return false;
}
@@ -776,16 +776,18 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
static bool useFuncSeen(const Constant *C,
llvm::DenseMap<const Function *, bool> &seenMap) {
- for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
- ui!=ue; ++ui) {
+ for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+ ui != ue; ++ui) {
if (const Constant *cu = dyn_cast<Constant>(*ui)) {
if (useFuncSeen(cu, seenMap))
return true;
} else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
const BasicBlock *bb = I->getParent();
- if (!bb) continue;
+ if (!bb)
+ continue;
const Function *caller = bb->getParent();
- if (!caller) continue;
+ if (!caller)
+ continue;
if (seenMap.find(caller) != seenMap.end())
return true;
}
@@ -793,10 +795,9 @@ static bool useFuncSeen(const Constant *C,
return false;
}
-void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
llvm::DenseMap<const Function *, bool> seenMap;
- for (Module::const_iterator FI=M.begin(), FE=M.end();
- FI!=FE; ++FI) {
+ for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
const Function *F = FI;
if (F->isDeclaration()) {
@@ -808,8 +809,9 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
emitDeclaration(F, O);
continue;
}
- for (Value::const_use_iterator iter=F->use_begin(),
- iterEnd=F->use_end(); iter!=iterEnd; ++iter) {
+ for (Value::const_use_iterator iter = F->use_begin(),
+ iterEnd = F->use_end();
+ iter != iterEnd; ++iter) {
if (const Constant *C = dyn_cast<Constant>(*iter)) {
if (usedInGlobalVarDef(C)) {
// The use is in the initialization of a global variable
@@ -828,12 +830,15 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
}
}
- if (!isa<Instruction>(*iter)) continue;
+ if (!isa<Instruction>(*iter))
+ continue;
const Instruction *instr = cast<Instruction>(*iter);
const BasicBlock *bb = instr->getParent();
- if (!bb) continue;
+ if (!bb)
+ continue;
const Function *caller = bb->getParent();
- if (!caller) continue;
+ if (!caller)
+ continue;
// If a caller has already been seen, then the caller is
// appearing in the module before the callee. so print out
@@ -852,9 +857,10 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
DebugInfoFinder DbgFinder;
DbgFinder.processModule(M);
- unsigned i=1;
+ unsigned i = 1;
for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
- E = DbgFinder.compile_unit_end(); I != E; ++I) {
+ E = DbgFinder.compile_unit_end();
+ I != E; ++I) {
DICompileUnit DIUnit(*I);
StringRef Filename(DIUnit.getFilename());
StringRef Dirname(DIUnit.getDirectory());
@@ -871,7 +877,8 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
}
for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
- E = DbgFinder.subprogram_end(); I != E; ++I) {
+ E = DbgFinder.subprogram_end();
+ I != E; ++I) {
DISubprogram SP(*I);
StringRef Filename(SP.getFilename());
StringRef Dirname(SP.getDirectory());
@@ -887,7 +894,7 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
}
}
-bool NVPTXAsmPrinter::doInitialization (Module &M) {
+bool NVPTXAsmPrinter::doInitialization(Module &M) {
SmallString<128> Str1;
raw_svector_ostream OS1(Str1);
@@ -899,8 +906,8 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
//bool Result = AsmPrinter::doInitialization(M);
// Initialize TargetLoweringObjectFile.
- const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
- .Initialize(OutContext, TM);
+ const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
+ .Initialize(OutContext, TM);
Mang = new Mangler(OutContext, *TM.getDataLayout());
@@ -908,11 +915,9 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
emitHeader(M, OS1);
OutStreamer.EmitRawText(OS1.str());
-
// Already commented out
//bool Result = AsmPrinter::doInitialization(M);
-
if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
recordAndEmitFilenames(M);
@@ -926,16 +931,16 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
// global variable in order, and ensure that we emit it *after* its dependent
// globals. We use a little extra memory maintaining both a set and a list to
// have fast searches while maintaining a strict ordering.
- SmallVector<GlobalVariable*,8> Globals;
- DenseSet<GlobalVariable*> GVVisited;
- DenseSet<GlobalVariable*> GVVisiting;
+ SmallVector<GlobalVariable *, 8> Globals;
+ DenseSet<GlobalVariable *> GVVisited;
+ DenseSet<GlobalVariable *> GVVisiting;
// Visit each global variable, in order
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
+ ++I)
VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
- assert(GVVisited.size() == M.getGlobalList().size() &&
+ assert(GVVisited.size() == M.getGlobalList().size() &&
"Missed a global variable");
assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
@@ -946,10 +951,10 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
OS2 << '\n';
OutStreamer.EmitRawText(OS2.str());
- return false; // success
+ return false; // success
}
-void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
O << "//\n";
O << "// Generated by LLVM NVPTX Back-End\n";
O << "//\n";
@@ -989,12 +994,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
Module::GlobalListType &global_list = M.getGlobalList();
int i, n = global_list.size();
- GlobalVariable **gv_array = new GlobalVariable* [n];
+ GlobalVariable **gv_array = new GlobalVariable *[n];
// first, back-up GlobalVariable in gv_array
i = 0;
for (Module::global_iterator I = global_list.begin(), E = global_list.end();
- I != E; ++I)
+ I != E; ++I)
gv_array[i++] = &*I;
// second, empty global_list
@@ -1005,13 +1010,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
bool ret = AsmPrinter::doFinalization(M);
// now we restore global variables
- for (i = 0; i < n; i ++)
+ for (i = 0; i < n; i++)
global_list.insert(global_list.end(), gv_array[i]);
delete[] gv_array;
return ret;
-
//bool Result = AsmPrinter::doFinalization(M);
// Instead of calling the parents doFinalization, we may
// clone parents doFinalization and customize here.
@@ -1031,8 +1035,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
// external without init -> .extern
// appending -> not allowed, assert.
-void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
-{
+void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
+ raw_ostream &O) {
if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
if (V->hasExternalLinkage()) {
if (isa<GlobalVariable>(V)) {
@@ -1059,8 +1063,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
}
}
-
-void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
+void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
bool processDemoted) {
// Skip meta data
@@ -1111,30 +1114,48 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
if (Initializer)
CI = dyn_cast<ConstantInt>(Initializer);
if (CI) {
- unsigned sample=CI->getZExtValue();
+ unsigned sample = CI->getZExtValue();
O << " = { ";
- for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >>
- __CLK_ADDRESS_BASE) ; i < 3 ; i++) {
+ for (int i = 0,
+ addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
+ i < 3; i++) {
O << "addr_mode_" << i << " = ";
switch (addr) {
- case 0: O << "wrap"; break;
- case 1: O << "clamp_to_border"; break;
- case 2: O << "clamp_to_edge"; break;
- case 3: O << "wrap"; break;
- case 4: O << "mirror"; break;
+ case 0:
+ O << "wrap";
+ break;
+ case 1:
+ O << "clamp_to_border";
+ break;
+ case 2:
+ O << "clamp_to_edge";
+ break;
+ case 3:
+ O << "wrap";
+ break;
+ case 4:
+ O << "mirror";
+ break;
}
- O <<", ";
+ O << ", ";
}
O << "filter_mode = ";
- switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) {
- case 0: O << "nearest"; break;
- case 1: O << "linear"; break;
- case 2: assert ( 0 && "Anisotropic filtering is not supported");
- default: O << "nearest"; break;
+ switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
+ case 0:
+ O << "nearest";
+ break;
+ case 1:
+ O << "linear";
+ break;
+ case 2:
+ assert(0 && "Anisotropic filtering is not supported");
+ default:
+ O << "nearest";
+ break;
}
- if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) {
+ if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
O << ", force_unnormalized_coords = 1";
}
O << " }";
@@ -1176,7 +1197,6 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
else
O << " .align " << GVar->getAlignment();
-
if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
O << " .";
O << getPTXFundamentalTypeStr(ETy, false);
@@ -1186,17 +1206,17 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
// Ptx allows variable initilization only for constant and global state
// spaces.
if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
- && GVar->hasInitializer()) {
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+ GVar->hasInitializer()) {
Constant *Initializer = GVar->getInitializer();
if (!Initializer->isNullValue()) {
- O << " = " ;
+ O << " = ";
printScalarConstant(Initializer, O);
}
}
} else {
- unsigned int ElementSize =0;
+ unsigned int ElementSize = 0;
// Although PTX has direct support for struct type and array type and
// LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
@@ -1210,54 +1230,49 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
// Ptx allows variable initilization only for constant and
// global state spaces.
if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
- && GVar->hasInitializer()) {
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+ GVar->hasInitializer()) {
Constant *Initializer = GVar->getInitializer();
- if (!isa<UndefValue>(Initializer) &&
- !Initializer->isNullValue()) {
+ if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
AggBuffer aggBuffer(ElementSize, O, *this);
bufferAggregateConstant(Initializer, &aggBuffer);
if (aggBuffer.numSymbols) {
if (nvptxSubtarget.is64Bit()) {
- O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ;
- O << ElementSize/8;
- }
- else {
- O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ;
- O << ElementSize/4;
+ O << " .u64 " << *Mang->getSymbol(GVar) << "[";
+ O << ElementSize / 8;
+ } else {
+ O << " .u32 " << *Mang->getSymbol(GVar) << "[";
+ O << ElementSize / 4;
}
O << "]";
- }
- else {
- O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+ } else {
+ O << " .b8 " << *Mang->getSymbol(GVar) << "[";
O << ElementSize;
O << "]";
}
- O << " = {" ;
+ O << " = {";
aggBuffer.print();
O << "}";
- }
- else {
- O << " .b8 " << *Mang->getSymbol(GVar) ;
+ } else {
+ O << " .b8 " << *Mang->getSymbol(GVar);
if (ElementSize) {
- O <<"[" ;
+ O << "[";
O << ElementSize;
O << "]";
}
}
- }
- else {
+ } else {
O << " .b8 " << *Mang->getSymbol(GVar);
if (ElementSize) {
- O <<"[" ;
+ O << "[";
O << ElementSize;
O << "]";
}
}
break;
default:
- assert( 0 && "type not supported yet");
+ assert(0 && "type not supported yet");
}
}
@@ -1270,7 +1285,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
std::vector<GlobalVariable *> &gvars = localDecls[f];
- for (unsigned i=0, e=gvars.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
O << "\t// demoted variable\n\t";
printModuleLevelGV(gvars[i], O, true);
}
@@ -1280,24 +1295,24 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
raw_ostream &O) const {
switch (AddressSpace) {
case llvm::ADDRESS_SPACE_LOCAL:
- O << "local" ;
+ O << "local";
break;
case llvm::ADDRESS_SPACE_GLOBAL:
- O << "global" ;
+ O << "global";
break;
case llvm::ADDRESS_SPACE_CONST:
// This logic should be consistent with that in
// getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
if (nvptxSubtarget.hasGenericLdSt())
- O << "global" ;
+ O << "global";
else
- O << "const" ;
+ O << "const";
break;
case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
- O << "const" ;
+ O << "const";
break;
case llvm::ADDRESS_SPACE_SHARED:
- O << "shared" ;
+ O << "shared";
break;
default:
report_fatal_error("Bad address space found while emitting PTX");
@@ -1305,8 +1320,8 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
}
}
-std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
- bool useB4PTR) const {
+std::string
+NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
switch (Ty->getTypeID()) {
default:
llvm_unreachable("unexpected type");
@@ -1330,17 +1345,20 @@ std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
return "f64";
case Type::PointerTyID:
if (nvptxSubtarget.is64Bit())
- if (useB4PTR) return "b64";
- else return "u64";
+ if (useB4PTR)
+ return "b64";
+ else
+ return "u64";
+ else if (useB4PTR)
+ return "b32";
else
- if (useB4PTR) return "b32";
- else return "u32";
+ return "u32";
}
llvm_unreachable("unexpected type");
return NULL;
}
-void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
+void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
raw_ostream &O) {
const DataLayout *TD = TM.getDataLayout();
@@ -1364,7 +1382,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
return;
}
- int64_t ElementSize =0;
+ int64_t ElementSize = 0;
// Although PTX has direct support for struct type and array type and LLVM IR
// is very similar to PTX, the LLVM CodeGen does not support for targets that
@@ -1375,22 +1393,19 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
case Type::ArrayTyID:
case Type::VectorTyID:
ElementSize = TD->getTypeStoreSize(ETy);
- O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+ O << " .b8 " << *Mang->getSymbol(GVar) << "[";
if (ElementSize) {
- O << itostr(ElementSize) ;
+ O << itostr(ElementSize);
}
O << "]";
break;
default:
- assert( 0 && "type not supported yet");
+ assert(0 && "type not supported yet");
}
- return ;
+ return;
}
-
-static unsigned int
-getOpenCLAlignment(const DataLayout *TD,
- Type *Ty) {
+static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
return TD->getPrefTypeAlignment(Ty);
@@ -1404,9 +1419,9 @@ getOpenCLAlignment(const DataLayout *TD,
unsigned int numE = VTy->getNumElements();
unsigned int alignE = TD->getPrefTypeAlignment(ETy);
if (numE == 3)
- return 4*alignE;
+ return 4 * alignE;
else
- return numE*alignE;
+ return numE * alignE;
}
const StructType *STy = dyn_cast<StructType>(Ty);
@@ -1414,7 +1429,7 @@ getOpenCLAlignment(const DataLayout *TD,
unsigned int alignStruct = 1;
// Go through each element of the struct and find the
// largest alignment.
- for (unsigned i=0, e=STy->getNumElements(); i != e; i++) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
Type *ETy = STy->getElementType(i);
unsigned int align = getOpenCLAlignment(TD, ETy);
if (align > alignStruct)
@@ -1458,7 +1473,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
}
for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
- if (i==paramIndex) {
+ if (i == paramIndex) {
printParamName(I, paramIndex, O);
return;
}
@@ -1466,8 +1481,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
llvm_unreachable("paramIndex out of bound");
}
-void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
- raw_ostream &O) {
+void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
const DataLayout *TD = TM.getDataLayout();
const AttributeSet &PAL = F->getAttributes();
const TargetLowering *TLI = TM.getTargetLowering();
@@ -1481,7 +1495,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
O << "(\n";
for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
- const Type *Ty = I->getType();
+ Type *Ty = I->getType();
if (!first)
O << ",\n";
@@ -1496,14 +1510,28 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
else // Default image is read_only
O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
- }
- else // Should be llvm::isSampler(*I)
+ } else // Should be llvm::isSampler(*I)
O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
- << paramIndex;
+ << paramIndex;
continue;
}
- if (PAL.hasAttribute(paramIndex+1, Attribute::ByVal) == false) {
+ if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
+ if (Ty->isVectorTy()) {
+ // Just print .param .b8 .align <a> .param[size];
+ // <a> = PAL.getparamalignment
+ // size = typeallocsize of element type
+ unsigned align = PAL.getParamAlignment(paramIndex + 1);
+ if (align == 0)
+ align = TD->getABITypeAlignment(Ty);
+
+ unsigned sz = TD->getTypeAllocSize(Ty);
+ O << "\t.param .align " << align << " .b8 ";
+ printParamName(I, paramIndex, O);
+ O << "[" << sz << "]";
+
+ continue;
+ }
// Just a scalar
const PointerType *PTy = dyn_cast<PointerType>(Ty);
if (isKernelFunc) {
@@ -1514,7 +1542,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
Type *ETy = PTy->getElementType();
int addrSpace = PTy->getAddressSpace();
- switch(addrSpace) {
+ switch (addrSpace) {
default:
O << ".ptr ";
break;
@@ -1529,15 +1557,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
O << ".ptr .global ";
break;
}
- O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " ";
+ O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
}
printParamName(I, paramIndex, O);
continue;
}
// non-pointer scalar to kernel func
- O << "\t.param ."
- << getPTXFundamentalTypeStr(Ty) << " ";
+ O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " ";
printParamName(I, paramIndex, O);
continue;
}
@@ -1546,9 +1573,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
unsigned sz = 0;
if (isa<IntegerType>(Ty)) {
sz = cast<IntegerType>(Ty)->getBitWidth();
- if (sz < 32) sz = 32;
- }
- else if (isa<PointerType>(Ty))
+ if (sz < 32)
+ sz = 32;
+ } else if (isa<PointerType>(Ty))
sz = thePointerTy.getSizeInBits();
else
sz = Ty->getPrimitiveSizeInBits();
@@ -1562,21 +1589,19 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
// param has byVal attribute. So should be a pointer
const PointerType *PTy = dyn_cast<PointerType>(Ty);
- assert(PTy &&
- "Param with byval attribute should be a pointer type");
+ assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
if (isABI || isKernelFunc) {
// Just print .param .b8 .align <a> .param[size];
// <a> = PAL.getparamalignment
// size = typeallocsize of element type
- unsigned align = PAL.getParamAlignment(paramIndex+1);
+ unsigned align = PAL.getParamAlignment(paramIndex + 1);
if (align == 0)
align = TD->getABITypeAlignment(ETy);
unsigned sz = TD->getTypeAllocSize(ETy);
- O << "\t.param .align " << align
- << " .b8 ";
+ O << "\t.param .align " << align << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
continue;
@@ -1587,7 +1612,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
// each vector element.
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*TLI, ETy, vtparts);
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
@@ -1595,15 +1620,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0,je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
O << "\t.reg .b" << sz << " ";
printParamName(I, paramIndex, O);
- if (j<je-1) O << ",\n";
+ if (j < je - 1)
+ O << ",\n";
++paramIndex;
}
- if (i<e-1)
+ if (i < e - 1)
O << ",\n";
}
--paramIndex;
@@ -1620,9 +1647,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
emitFunctionParamList(F, O);
}
-
-void NVPTXAsmPrinter::
-setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
+void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
+ const MachineFunction &MF) {
SmallString<128> Str;
raw_svector_ostream O(Str);
@@ -1635,14 +1661,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
const MachineFrameInfo *MFI = MF.getFrameInfo();
int NumBytes = (int) MFI->getStackSize();
if (NumBytes) {
- O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t"
- << DEPOTNAME
- << getFunctionNumber() << "[" << NumBytes << "];\n";
+ O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
+ << getFunctionNumber() << "[" << NumBytes << "];\n";
if (nvptxSubtarget.is64Bit()) {
O << "\t.reg .b64 \t%SP;\n";
O << "\t.reg .b64 \t%SPL;\n";
- }
- else {
+ } else {
O << "\t.reg .b32 \t%SP;\n";
O << "\t.reg .b32 \t%SPL;\n";
}
@@ -1653,12 +1677,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
// register number and the per class virtual register number.
// We use the per class virtual register number in the ptx output.
unsigned int numVRs = MRI->getNumVirtRegs();
- for (unsigned i=0; i< numVRs; i++) {
+ for (unsigned i = 0; i < numVRs; i++) {
unsigned int vr = TRI->index2VirtReg(i);
const TargetRegisterClass *RC = MRI->getRegClass(vr);
std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
int n = regmap.size();
- regmap.insert(std::make_pair(vr, n+1));
+ regmap.insert(std::make_pair(vr, n + 1));
}
// Emit register declarations
@@ -1702,23 +1726,20 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
OutStreamer.EmitRawText(O.str());
}
-
void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
- APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
+ APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
bool ignored;
unsigned int numHex;
const char *lead;
- if (Fp->getType()->getTypeID()==Type::FloatTyID) {
+ if (Fp->getType()->getTypeID() == Type::FloatTyID) {
numHex = 8;
lead = "0f";
- APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
- &ignored);
+ APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
} else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
numHex = 16;
lead = "0d";
- APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
- &ignored);
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
} else
llvm_unreachable("unsupported fp type");
@@ -1760,7 +1781,6 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
llvm_unreachable("Not scalar type found in printScalarConstant()");
}
-
void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
@@ -1768,7 +1788,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
int s = TD->getTypeAllocSize(CPV->getType());
- if (s<Bytes)
+ if (s < Bytes)
s = Bytes;
aggBuffer->addZeros(s);
return;
@@ -1779,28 +1799,26 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
case Type::IntegerTyID: {
const Type *ETy = CPV->getType();
- if ( ETy == Type::getInt8Ty(CPV->getContext()) ){
+ if (ETy == Type::getInt8Ty(CPV->getContext())) {
unsigned char c =
(unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
ptr = &c;
aggBuffer->addBytes(ptr, 1, Bytes);
- } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) {
- short int16 =
- (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
- ptr = (unsigned char*)&int16;
+ } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
+ short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ ptr = (unsigned char *)&int16;
aggBuffer->addBytes(ptr, 2, Bytes);
- } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) {
+ } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
- int int32 =(int)(constInt->getZExtValue());
- ptr = (unsigned char*)&int32;
+ int int32 = (int)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
break;
} else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (ConstantInt *constInt =
- dyn_cast<ConstantInt>(ConstantFoldConstantExpression(
- Cexpr, TD))) {
- int int32 =(int)(constInt->getZExtValue());
- ptr = (unsigned char*)&int32;
+ if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ ConstantFoldConstantExpression(Cexpr, TD))) {
+ int int32 = (int)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
break;
}
@@ -1812,17 +1830,17 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
}
llvm_unreachable("unsupported integer const type");
- } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) {
+ } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
- long long int64 =(long long)(constInt->getZExtValue());
- ptr = (unsigned char*)&int64;
+ long long int64 = (long long)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
break;
} else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, TD))) {
- long long int64 =(long long)(constInt->getZExtValue());
- ptr = (unsigned char*)&int64;
+ ConstantFoldConstantExpression(Cexpr, TD))) {
+ long long int64 = (long long)(constInt->getZExtValue());
+ ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
break;
}
@@ -1841,17 +1859,16 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
case Type::FloatTyID:
case Type::DoubleTyID: {
ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
- const Type* Ty = CFP->getType();
+ const Type *Ty = CFP->getType();
if (Ty == Type::getFloatTy(CPV->getContext())) {
- float float32 = (float)CFP->getValueAPF().convertToFloat();
- ptr = (unsigned char*)&float32;
+ float float32 = (float) CFP->getValueAPF().convertToFloat();
+ ptr = (unsigned char *)&float32;
aggBuffer->addBytes(ptr, 4, Bytes);
} else if (Ty == Type::getDoubleTy(CPV->getContext())) {
double float64 = CFP->getValueAPF().convertToDouble();
- ptr = (unsigned char*)&float64;
+ ptr = (unsigned char *)&float64;
aggBuffer->addBytes(ptr, 8, Bytes);
- }
- else {
+ } else {
llvm_unreachable("unsupported fp const type");
}
break;
@@ -1859,8 +1876,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
case Type::PointerTyID: {
if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
aggBuffer->addSymbol(GVar);
- }
- else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
Value *v = Cexpr->stripPointerCasts();
aggBuffer->addSymbol(v);
}
@@ -1876,10 +1892,9 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
isa<ConstantStruct>(CPV)) {
int ElementSize = TD->getTypeAllocSize(CPV->getType());
bufferAggregateConstant(CPV, aggBuffer);
- if ( Bytes > ElementSize )
- aggBuffer->addZeros(Bytes-ElementSize);
- }
- else if (isa<ConstantAggregateZero>(CPV))
+ if (Bytes > ElementSize)
+ aggBuffer->addZeros(Bytes - ElementSize);
+ } else if (isa<ConstantAggregateZero>(CPV))
aggBuffer->addZeros(Bytes);
else
llvm_unreachable("Unexpected Constant type");
@@ -1905,7 +1920,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
}
if (const ConstantDataSequential *CDS =
- dyn_cast<ConstantDataSequential>(CPV)) {
+ dyn_cast<ConstantDataSequential>(CPV)) {
if (CDS->getNumElements())
for (unsigned i = 0; i < CDS->getNumElements(); ++i)
bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
@@ -1913,20 +1928,18 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
return;
}
-
if (isa<ConstantStruct>(CPV)) {
if (CPV->getNumOperands()) {
StructType *ST = cast<StructType>(CPV->getType());
for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
- if ( i == (e - 1))
+ if (i == (e - 1))
Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
- TD->getTypeAllocSize(ST)
- - TD->getStructLayout(ST)->getElementOffset(i);
+ TD->getTypeAllocSize(ST) -
+ TD->getStructLayout(ST)->getElementOffset(i);
else
- Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) -
- TD->getStructLayout(ST)->getElementOffset(i);
- bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes,
- aggBuffer);
+ Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
+ TD->getStructLayout(ST)->getElementOffset(i);
+ bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
}
}
return;
@@ -1937,15 +1950,13 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
// buildTypeNameMap - Run through symbol table looking for type names.
//
-
bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
- if (PI != TypeNameMap.end() &&
- (!PI->second.compare("struct._image1d_t") ||
- !PI->second.compare("struct._image2d_t") ||
- !PI->second.compare("struct._image3d_t")))
+ if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
+ !PI->second.compare("struct._image2d_t") ||
+ !PI->second.compare("struct._image3d_t")))
return true;
return false;
@@ -1955,10 +1966,10 @@ bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
///
bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
+ const char *ExtraCode, raw_ostream &O) {
if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0) return true; // Unknown modifier.
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
switch (ExtraCode[0]) {
default:
@@ -1974,13 +1985,11 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
-bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo,
- unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
+ const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier
+ return true; // Unknown modifier
O << '[';
printMemOperand(MI, OpNo, O);
@@ -1989,41 +1998,69 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
-{
- switch(MI.getOpcode()) {
+bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
return false;
- case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0:
- case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32:
- case NVPTX::CallArgF64: case NVPTX::CallArgI16:
- case NVPTX::CallArgI32: case NVPTX::CallArgI32imm:
- case NVPTX::CallArgI64: case NVPTX::CallArgI8:
- case NVPTX::CallArgParam: case NVPTX::CallVoidInst:
- case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End:
+ case NVPTX::CallArgBeginInst:
+ case NVPTX::CallArgEndInst0:
+ case NVPTX::CallArgEndInst1:
+ case NVPTX::CallArgF32:
+ case NVPTX::CallArgF64:
+ case NVPTX::CallArgI16:
+ case NVPTX::CallArgI32:
+ case NVPTX::CallArgI32imm:
+ case NVPTX::CallArgI64:
+ case NVPTX::CallArgI8:
+ case NVPTX::CallArgParam:
+ case NVPTX::CallVoidInst:
+ case NVPTX::CallVoidInstReg:
+ case NVPTX::Callseq_End:
case NVPTX::CallVoidInstReg64:
- case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst:
- case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst:
- case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst:
- case NVPTX::StoreParamF32: case NVPTX::StoreParamF64:
- case NVPTX::StoreParamI16: case NVPTX::StoreParamI32:
- case NVPTX::StoreParamI64: case NVPTX::StoreParamI8:
- case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8:
- case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16:
- case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64:
- case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32:
- case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8:
- case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64:
- case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32:
- case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64:
- case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam:
- case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64:
- case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32:
- case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8:
- case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64:
- case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32:
- case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8:
- case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE:
+ case NVPTX::DeclareParamInst:
+ case NVPTX::DeclareRetMemInst:
+ case NVPTX::DeclareRetRegInst:
+ case NVPTX::DeclareRetScalarInst:
+ case NVPTX::DeclareScalarParamInst:
+ case NVPTX::DeclareScalarRegInst:
+ case NVPTX::StoreParamF32:
+ case NVPTX::StoreParamF64:
+ case NVPTX::StoreParamI16:
+ case NVPTX::StoreParamI32:
+ case NVPTX::StoreParamI64:
+ case NVPTX::StoreParamI8:
+ case NVPTX::StoreParamS32I8:
+ case NVPTX::StoreParamU32I8:
+ case NVPTX::StoreParamS32I16:
+ case NVPTX::StoreParamU32I16:
+ case NVPTX::StoreRetvalF32:
+ case NVPTX::StoreRetvalF64:
+ case NVPTX::StoreRetvalI16:
+ case NVPTX::StoreRetvalI32:
+ case NVPTX::StoreRetvalI64:
+ case NVPTX::StoreRetvalI8:
+ case NVPTX::LastCallArgF32:
+ case NVPTX::LastCallArgF64:
+ case NVPTX::LastCallArgI16:
+ case NVPTX::LastCallArgI32:
+ case NVPTX::LastCallArgI32imm:
+ case NVPTX::LastCallArgI64:
+ case NVPTX::LastCallArgI8:
+ case NVPTX::LastCallArgParam:
+ case NVPTX::LoadParamMemF32:
+ case NVPTX::LoadParamMemF64:
+ case NVPTX::LoadParamMemI16:
+ case NVPTX::LoadParamMemI32:
+ case NVPTX::LoadParamMemI64:
+ case NVPTX::LoadParamMemI8:
+ case NVPTX::LoadParamRegF32:
+ case NVPTX::LoadParamRegF64:
+ case NVPTX::LoadParamRegI16:
+ case NVPTX::LoadParamRegI32:
+ case NVPTX::LoadParamRegI64:
+ case NVPTX::LoadParamRegI8:
+ case NVPTX::PrototypeInst:
+ case NVPTX::DBG_VALUE:
return true;
}
return false;
@@ -2035,10 +2072,9 @@ extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
}
-
void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
std::stringstream temp;
- LineReader * reader = this->getReader(filename.str());
+ LineReader *reader = this->getReader(filename.str());
temp << "\n//";
temp << filename.str();
temp << ":";
@@ -2049,29 +2085,26 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
this->OutStreamer.EmitRawText(Twine(temp.str()));
}
-
LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
- if (reader == NULL) {
- reader = new LineReader(filename);
+ if (reader == NULL) {
+ reader = new LineReader(filename);
}
if (reader->fileName() != filename) {
delete reader;
- reader = new LineReader(filename);
+ reader = new LineReader(filename);
}
return reader;
}
-
-std::string
-LineReader::readLine(unsigned lineNum) {
+std::string LineReader::readLine(unsigned lineNum) {
if (lineNum < theCurLine) {
theCurLine = 0;
- fstr.seekg(0,std::ios::beg);
+ fstr.seekg(0, std::ios::beg);
}
while (theCurLine < lineNum) {
- fstr.getline(buff,500);
+ fstr.getline(buff, 500);
theCurLine++;
}
return buff;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 42498f0bf7..6dc9fc0ffe 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -43,15 +43,15 @@
// This is defined in AsmPrinter.cpp.
// Used to process the constant expressions in initializers.
namespace nvptx {
-const llvm::MCExpr *LowerConstant(const llvm::Constant *CV,
- llvm::AsmPrinter &AP) ;
+const llvm::MCExpr *
+LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP);
}
namespace llvm {
class LineReader {
private:
- unsigned theCurLine ;
+ unsigned theCurLine;
std::ifstream fstr;
char buff[512];
std::string theFileName;
@@ -63,17 +63,12 @@ public:
theFileName = filename;
}
std::string fileName() { return theFileName; }
- ~LineReader() {
- fstr.close();
- }
+ ~LineReader() { fstr.close(); }
std::string readLine(unsigned line);
};
-
-
class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
-
class AggBuffer {
// Used to buffer the emitted string for initializing global
// aggregates.
@@ -92,7 +87,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
// Once we have this AggBuffer setup, we can choose how to print
// it out.
public:
- unsigned size; // size of the buffer in bytes
+ unsigned size; // size of the buffer in bytes
unsigned char *buffer; // the buffer
unsigned numSymbols; // number of symbol addresses
SmallVector<unsigned, 4> symbolPosInBuffer;
@@ -105,33 +100,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
public:
AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
- :O(_O),AP(_AP) {
+ : O(_O), AP(_AP) {
buffer = new unsigned char[_size];
size = _size;
curpos = 0;
numSymbols = 0;
}
- ~AggBuffer() {
- delete [] buffer;
- }
+ ~AggBuffer() { delete[] buffer; }
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
- assert((curpos+Num) <= size);
- assert((curpos+Bytes) <= size);
- for ( int i= 0; i < Num; ++i) {
+ assert((curpos + Num) <= size);
+ assert((curpos + Bytes) <= size);
+ for (int i = 0; i < Num; ++i) {
buffer[curpos] = Ptr[i];
- curpos ++;
+ curpos++;
}
- for ( int i=Num; i < Bytes ; ++i) {
+ for (int i = Num; i < Bytes; ++i) {
buffer[curpos] = 0;
- curpos ++;
+ curpos++;
}
return curpos;
}
unsigned addZeros(int Num) {
- assert((curpos+Num) <= size);
- for ( int i= 0; i < Num; ++i) {
+ assert((curpos + Num) <= size);
+ for (int i = 0; i < Num; ++i) {
buffer[curpos] = 0;
- curpos ++;
+ curpos++;
}
return curpos;
}
@@ -143,10 +136,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
void print() {
if (numSymbols == 0) {
// print out in bytes
- for (unsigned i=0; i<size; i++) {
+ for (unsigned i = 0; i < size; i++) {
if (i)
O << ", ";
- O << (unsigned int)buffer[i];
+ O << (unsigned int) buffer[i];
}
} else {
// print out in 4-bytes or 8-bytes
@@ -156,7 +149,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
unsigned int nBytes = 4;
if (AP.nvptxSubtarget.is64Bit())
nBytes = 8;
- for (pos=0; pos<size; pos+=nBytes) {
+ for (pos = 0; pos < size; pos += nBytes) {
if (pos)
O << ", ";
if (pos == nextSymbolPos) {
@@ -164,22 +157,19 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
MCSymbol *Name = AP.Mang->getSymbol(GVar);
O << *Name;
- }
- else if (ConstantExpr *Cexpr =
- dyn_cast<ConstantExpr>(v)) {
+ } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
O << *nvptx::LowerConstant(Cexpr, AP);
} else
llvm_unreachable("symbol type unknown");
nSym++;
if (nSym >= numSymbols)
- nextSymbolPos = size+1;
+ nextSymbolPos = size + 1;
else
nextSymbolPos = symbolPosInBuffer[nSym];
- } else
- if (nBytes == 4)
- O << *(unsigned int*)(buffer+pos);
- else
- O << *(unsigned long long*)(buffer+pos);
+ } else if (nBytes == 4)
+ O << *(unsigned int *)(buffer + pos);
+ else
+ O << *(unsigned long long *)(buffer + pos);
}
}
}
@@ -189,10 +179,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
virtual void emitSrcInText(StringRef filename, unsigned line);
-private :
- virtual const char *getPassName() const {
- return "NVPTX Assembly Printer";
- }
+private:
+ virtual const char *getPassName() const { return "NVPTX Assembly Printer"; }
const Function *F;
std::string CurrentFnName;
@@ -207,31 +195,28 @@ private :
void printGlobalVariable(const GlobalVariable *GVar);
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier=0);
+ const char *Modifier = 0);
void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier=0);
- void printVecModifiedImmediate(const MachineOperand &MO,
- const char *Modifier, raw_ostream &O);
+ const char *Modifier = 0);
+ void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
+ raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier=0);
+ const char *Modifier = 0);
void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
// definition autogenerated.
void printInstruction(const MachineInstr *MI, raw_ostream &O);
- void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
- bool=false);
+ void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
void printParamName(int paramIndex, raw_ostream &O);
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
void emitHeader(Module &M, raw_ostream &O);
- void emitKernelFunctionDirectives(const Function& F,
- raw_ostream &O) const;
+ void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
void emitFunctionExternParamList(const MachineFunction &MF);
void emitFunctionParamList(const Function *, raw_ostream &O);
void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
- void emitFunctionTempData(const MachineFunction &MF,
- unsigned &FrameSize);
+ void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize);
bool isImageType(const Type *Ty);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
@@ -269,17 +254,16 @@ private:
void recordAndEmitFilenames(Module &);
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
- void emitPTXAddressSpace(unsigned int AddressSpace,
- raw_ostream &O) const;
- std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ;
- void printScalarConstant(Constant *CPV, raw_ostream &O) ;
- void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ;
- void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ;
- void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ;
+ void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
+ std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
+ void printScalarConstant(Constant *CPV, raw_ostream &O);
+ void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
+ void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
+ void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
void printOperandProper(const MachineOperand &MO);
- void emitLinkageDirective(const GlobalValue* V, raw_ostream &O);
+ void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
void emitDeclarations(Module &, raw_ostream &O);
void emitDeclaration(const Function *, raw_ostream &O);
@@ -289,10 +273,9 @@ private:
LineReader *reader;
LineReader *getReader(std::string);
public:
- NVPTXAsmPrinter(TargetMachine &TM,
- MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer),
- nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+ NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer),
+ nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
CurrentBankselLabelInBasicBlock = "";
VRidGlobal2LocalMap = NULL;
reader = NULL;
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index bb2c55ceed..6533da5102 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -25,9 +25,7 @@
using namespace llvm;
-bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const {
- return true;
-}
+bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
if (MF.getFrameInfo()->hasStackObjects()) {
@@ -42,46 +40,39 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
// mov %SPL, %depot;
// cvta.local %SP, %SPL;
if (is64bit) {
- MachineInstr *MI = BuildMI(MBB, MBBI, dl,
- tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
- NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
- BuildMI(MBB, MI, dl,
- tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal)
- .addReg(NVPTX::VRDepot);
+ MachineInstr *MI = BuildMI(
+ MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+ NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+ BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+ NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
} else {
- MachineInstr *MI = BuildMI(MBB, MBBI, dl,
- tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
- NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
- BuildMI(MBB, MI, dl,
- tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal)
- .addReg(NVPTX::VRDepot);
+ MachineInstr *MI = BuildMI(
+ MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
+ NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+ BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+ NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
}
- }
- else {
+ } else {
// mov %SP, %depot;
if (is64bit)
- BuildMI(MBB, MBBI, dl,
- tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame)
- .addReg(NVPTX::VRDepot);
+ BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+ NVPTX::VRFrame).addReg(NVPTX::VRDepot);
else
- BuildMI(MBB, MBBI, dl,
- tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame)
- .addReg(NVPTX::VRDepot);
+ BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+ NVPTX::VRFrame).addReg(NVPTX::VRDepot);
}
}
}
void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
-}
+ MachineBasicBlock &MBB) const {}
// This function eliminates ADJCALLSTACKDOWN,
// ADJCALLSTACKUP pseudo instructions
-void NVPTXFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
+void NVPTXFrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
// Simply discard ADJCALLSTACKDOWN,
// ADJCALLSTACKUP instructions.
MBB.erase(I);
}
-
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index d34e7bec1d..819f1dd3f4 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -16,7 +16,6 @@
#include "llvm/Target/TargetFrameLowering.h"
-
namespace llvm {
class NVPTXTargetMachine;
@@ -26,13 +25,12 @@ class NVPTXFrameLowering : public TargetFrameLowering {
public:
explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
- tm(_tm), is64bit(_is64bit) {}
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),
+ is64bit(_is64bit) {}
virtual bool hasFP(const MachineFunction &MF) const;
virtual void emitPrologue(MachineFunction &MF) const;
- virtual void emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 481f13afd1..e862988c85 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-
#include "NVPTXISelDAGToDAG.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
@@ -26,27 +25,22 @@
using namespace llvm;
-
-static cl::opt<bool>
-UseFMADInstruction("nvptx-mad-enable",
- cl::ZeroOrMore,
- cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
- cl::init(false));
+static cl::opt<bool> UseFMADInstruction(
+ "nvptx-mad-enable", cl::ZeroOrMore,
+ cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
+ cl::init(false));
static cl::opt<int>
-FMAContractLevel("nvptx-fma-level",
- cl::ZeroOrMore,
+FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
- " 1: do it 2: do it aggressively"),
- cl::init(2));
-
+ " 1: do it 2: do it aggressively"),
+ cl::init(2));
-static cl::opt<int>
-UsePrecDivF32("nvptx-prec-divf32",
- cl::ZeroOrMore,
- cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
- " IEEE Compliant F32 div.rnd if avaiable."),
- cl::init(2));
+static cl::opt<int> UsePrecDivF32(
+ "nvptx-prec-divf32", cl::ZeroOrMore,
+ cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
+ " IEEE Compliant F32 div.rnd if avaiable."),
+ cl::init(2));
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
@@ -55,26 +49,22 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
return new NVPTXDAGToDAGISel(TM, OptLevel);
}
-
NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
CodeGenOpt::Level OptLevel)
-: SelectionDAGISel(tm, OptLevel),
- Subtarget(tm.getSubtarget<NVPTXSubtarget>())
-{
+ : SelectionDAGISel(tm, OptLevel),
+ Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
// Always do fma.f32 fpcontract if the target supports the instruction.
// Always do fma.f64 fpcontract if the target supports the instruction.
// Do mad.f32 is nvptx-mad-enable is specified and the target does not
// support fma.f32.
doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
- doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() &&
- (FMAContractLevel>=1);
- doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() &&
- (FMAContractLevel>=1);
- doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() &&
- (FMAContractLevel==2);
- doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() &&
- (FMAContractLevel==2);
+ doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
+ doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
+ doFMAF32AGG =
+ (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
+ doFMAF64AGG =
+ (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
@@ -92,10 +82,10 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
-SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
if (N->isMachineOpcode())
- return NULL; // Already selected.
+ return NULL; // Already selected.
SDNode *ResNode = NULL;
switch (N->getOpcode()) {
@@ -119,30 +109,34 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::StoreV4:
ResNode = SelectStoreVector(N);
break;
- default: break;
+ default:
+ break;
}
if (ResNode)
return ResNode;
return SelectCode(N);
}
-
-static unsigned int
-getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
-{
+static unsigned int getCodeAddrSpace(MemSDNode *N,
+ const NVPTXSubtarget &Subtarget) {
const Value *Src = N->getSrcValue();
if (!Src)
return NVPTX::PTXLdStInstCode::LOCAL;
if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
switch (PT->getAddressSpace()) {
- case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
- case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
- case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
+ case llvm::ADDRESS_SPACE_LOCAL:
+ return NVPTX::PTXLdStInstCode::LOCAL;
+ case llvm::ADDRESS_SPACE_GLOBAL:
+ return NVPTX::PTXLdStInstCode::GLOBAL;
+ case llvm::ADDRESS_SPACE_SHARED:
+ return NVPTX::PTXLdStInstCode::SHARED;
case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
return NVPTX::PTXLdStInstCode::CONSTANT;
- case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
- case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
+ case llvm::ADDRESS_SPACE_GENERIC:
+ return NVPTX::PTXLdStInstCode::GENERIC;
+ case llvm::ADDRESS_SPACE_PARAM:
+ return NVPTX::PTXLdStInstCode::PARAM;
case llvm::ADDRESS_SPACE_CONST:
// If the arch supports generic address space, translate it to GLOBAL
// for correctness.
@@ -153,18 +147,18 @@ getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
return NVPTX::PTXLdStInstCode::GLOBAL;
else
return NVPTX::PTXLdStInstCode::CONSTANT;
- default: break;
+ default:
+ break;
}
}
return NVPTX::PTXLdStInstCode::LOCAL;
}
-
-SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
- SDNode *NVPTXLD= NULL;
+ SDNode *NVPTXLD = NULL;
// do not support pre/post inc/dec
if (LD->isIndexed())
@@ -204,7 +198,7 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
// type is integer
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
MVT ScalarVT = SimpleVT.getScalarType();
- unsigned fromTypeWidth = ScalarVT.getSizeInBits();
+ unsigned fromTypeWidth = ScalarVT.getSizeInBits();
unsigned int fromType;
if ((LD->getExtensionType() == ISD::SEXTLOAD))
fromType = NVPTX::PTXLdStInstCode::Signed;
@@ -223,105 +217,166 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
if (SelectDirectAddr(N1, Addr)) {
switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_avar; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_avar; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_avar; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_avar;
+ break;
+ default:
+ return NULL;
}
- SDValue Ops[] = { getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(fromType),
- getI32Imm(fromTypeWidth),
- Addr, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
- MVT::Other, Ops, 7);
- } else if (Subtarget.is64Bit()?
- SelectADDRsi64(N1.getNode(), N1, Base, Offset):
- SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), Addr, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_asi; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_asi; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_asi; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_asi;
+ break;
+ default:
+ return NULL;
}
- SDValue Ops[] = { getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(fromType),
- getI32Imm(fromTypeWidth),
- Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
- MVT::Other, Ops, 8);
- } else if (Subtarget.is64Bit()?
- SelectADDRri64(N1.getNode(), N1, Base, Offset):
- SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), Base, Offset, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
if (Subtarget.is64Bit()) {
switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_ari_64;
+ break;
+ default:
+ return NULL;
}
} else {
switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_ari;
+ break;
+ default:
+ return NULL;
}
}
- SDValue Ops[] = { getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(fromType),
- getI32Imm(fromTypeWidth),
- Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
- MVT::Other, Ops, 8);
- }
- else {
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), Base, Offset, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+ } else {
if (Subtarget.is64Bit()) {
switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_areg_64;
+ break;
+ default:
+ return NULL;
}
} else {
switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LD_i8_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LD_i16_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LD_i32_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LD_i64_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LD_f32_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LD_f64_areg;
+ break;
+ default:
+ return NULL;
}
}
- SDValue Ops[] = { getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(fromType),
- getI32Imm(fromTypeWidth),
- N1, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
- MVT::Other, Ops, 7);
+ SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(fromType),
+ getI32Imm(fromTypeWidth), N1, Chain };
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
}
if (NVPTXLD != NULL) {
@@ -344,9 +399,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
MemSDNode *MemSD = cast<MemSDNode>(N);
EVT LoadedVT = MemSD->getMemoryVT();
-
if (!LoadedVT.isSimple())
- return NULL;
+ return NULL;
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
@@ -369,11 +423,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
// type is integer
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
MVT ScalarVT = SimpleVT.getScalarType();
- unsigned FromTypeWidth = ScalarVT.getSizeInBits();
+ unsigned FromTypeWidth = ScalarVT.getSizeInBits();
unsigned int FromType;
// The last operand holds the original LoadSDNode::getExtensionType() value
- unsigned ExtensionType =
- cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue();
+ unsigned ExtensionType = cast<ConstantSDNode>(
+ N->getOperand(N->getNumOperands() - 1))->getZExtValue();
if (ExtensionType == ISD::SEXTLOAD)
FromType = NVPTX::PTXLdStInstCode::Signed;
else if (ScalarVT.isFloatingPoint())
@@ -384,197 +438,328 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
unsigned VecType;
switch (N->getOpcode()) {
- case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break;
- case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break;
- default: return NULL;
+ case NVPTXISD::LoadV2:
+ VecType = NVPTX::PTXLdStInstCode::V2;
+ break;
+ case NVPTXISD::LoadV4:
+ VecType = NVPTX::PTXLdStInstCode::V4;
+ break;
+ default:
+ return NULL;
}
EVT EltVT = N->getValueType(0);
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break;
- case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break;
- case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_avar;
+ break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_avar;
+ break;
}
break;
}
- SDValue Ops[] = { getI32Imm(IsVolatile),
- getI32Imm(CodeAddrSpace),
- getI32Imm(VecType),
- getI32Imm(FromType),
- getI32Imm(FromTypeWidth),
- Addr, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Addr, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
- } else if (Subtarget.is64Bit()?
- SelectADDRsi64(Op1.getNode(), Op1, Base, Offset):
- SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break;
- case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break;
- case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_asi;
+ break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_asi;
+ break;
}
break;
}
- SDValue Ops[] = { getI32Imm(IsVolatile),
- getI32Imm(CodeAddrSpace),
- getI32Imm(VecType),
- getI32Imm(FromType),
- getI32Imm(FromTypeWidth),
- Base, Offset, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Base, Offset, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
- } else if (Subtarget.is64Bit()?
- SelectADDRri64(Op1.getNode(), Op1, Base, Offset):
- SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break;
- case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break;
- case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_ari_64;
+ break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_ari_64;
+ break;
}
break;
}
} else {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break;
- case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break;
- case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_ari;
+ break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_ari;
+ break;
}
break;
}
}
- SDValue Ops[] = { getI32Imm(IsVolatile),
- getI32Imm(CodeAddrSpace),
- getI32Imm(VecType),
- getI32Imm(FromType),
- getI32Imm(FromTypeWidth),
- Base, Offset, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Base, Offset, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
} else {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break;
- case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break;
- case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_areg_64;
+ break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_areg_64;
+ break;
}
break;
}
} else {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break;
- case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break;
- case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v2_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v2_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v2_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::LDV_i64_v2_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v2_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::LDV_f64_v2_areg;
+ break;
}
break;
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break;
- case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break;
- case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break;
- case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::LDV_i8_v4_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::LDV_i16_v4_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::LDV_i32_v4_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::LDV_f32_v4_areg;
+ break;
}
break;
}
}
- SDValue Ops[] = { getI32Imm(IsVolatile),
- getI32Imm(CodeAddrSpace),
- getI32Imm(VecType),
- getI32Imm(FromType),
- getI32Imm(FromTypeWidth),
- Op1, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType), getI32Imm(FromType),
+ getI32Imm(FromTypeWidth), Op1, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
}
@@ -598,89 +783,179 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
// Select opcode
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::LDGV2:
switch (RetVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break;
- case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break;
- case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break;
- case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break;
- case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break;
- case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64;
+ break;
}
break;
case NVPTXISD::LDGV4:
switch (RetVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break;
- case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break;
- case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break;
- case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64;
+ break;
}
break;
case NVPTXISD::LDUV2:
switch (RetVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break;
- case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break;
- case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break;
- case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break;
- case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break;
- case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
+ break;
}
break;
case NVPTXISD::LDUV4:
switch (RetVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break;
- case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break;
- case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break;
- case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
+ break;
}
break;
}
} else {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::LDGV2:
switch (RetVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break;
- case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break;
- case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break;
- case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break;
- case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break;
- case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32;
+ break;
}
break;
case NVPTXISD::LDGV4:
switch (RetVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break;
- case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break;
- case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break;
- case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32;
+ break;
}
break;
case NVPTXISD::LDUV2:
switch (RetVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break;
- case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break;
- case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break;
- case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break;
- case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break;
- case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
+ break;
}
break;
case NVPTXISD::LDUV4:
switch (RetVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break;
- case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break;
- case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break;
- case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
+ break;
}
break;
}
@@ -696,8 +971,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
return LD;
}
-
-SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(N);
EVT StoreVT = ST->getMemoryVT();
@@ -738,7 +1012,7 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
// - for integer type, always use 'u'
//
MVT ScalarVT = SimpleVT.getScalarType();
- unsigned toTypeWidth = ScalarVT.getSizeInBits();
+ unsigned toTypeWidth = ScalarVT.getSizeInBits();
unsigned int toType;
if (ScalarVT.isFloatingPoint())
toType = NVPTX::PTXLdStInstCode::Float;
@@ -757,108 +1031,166 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
if (SelectDirectAddr(N2, Addr)) {
switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_avar; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_avar; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_avar; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_avar;
+ break;
+ default:
+ return NULL;
}
- SDValue Ops[] = { N1,
- getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(toType),
- getI32Imm(toTypeWidth),
- Addr, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops, 8);
- } else if (Subtarget.is64Bit()?
- SelectADDRsi64(N2.getNode(), N2, Base, Offset):
- SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), Addr, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_asi; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_asi; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_asi; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_asi;
+ break;
+ default:
+ return NULL;
}
- SDValue Ops[] = { N1,
- getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(toType),
- getI32Imm(toTypeWidth),
- Base, Offset, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops, 9);
- } else if (Subtarget.is64Bit()?
- SelectADDRri64(N2.getNode(), N2, Base, Offset):
- SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), Base, Offset, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
if (Subtarget.is64Bit()) {
switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_ari_64;
+ break;
+ default:
+ return NULL;
}
} else {
switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_ari;
+ break;
+ default:
+ return NULL;
}
}
- SDValue Ops[] = { N1,
- getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(toType),
- getI32Imm(toTypeWidth),
- Base, Offset, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops, 9);
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), Base, Offset, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
} else {
if (Subtarget.is64Bit()) {
switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_areg_64;
+ break;
+ default:
+ return NULL;
}
} else {
switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
- default: return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::ST_i8_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::ST_i16_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::ST_i32_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::ST_i64_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::ST_f32_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::ST_f64_areg;
+ break;
+ default:
+ return NULL;
}
}
- SDValue Ops[] = { N1,
- getI32Imm(isVolatile),
- getI32Imm(codeAddrSpace),
- getI32Imm(vecType),
- getI32Imm(toType),
- getI32Imm(toTypeWidth),
- N2, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops, 8);
+ SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+ getI32Imm(vecType), getI32Imm(toType),
+ getI32Imm(toTypeWidth), N2, Chain };
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
}
if (NVPTXST != NULL) {
@@ -901,14 +1233,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
// - for integer type, always use 'u'
assert(StoreVT.isSimple() && "Store value is not simple");
MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
- unsigned ToTypeWidth = ScalarVT.getSizeInBits();
+ unsigned ToTypeWidth = ScalarVT.getSizeInBits();
unsigned ToType;
if (ScalarVT.isFloatingPoint())
ToType = NVPTX::PTXLdStInstCode::Float;
else
ToType = NVPTX::PTXLdStInstCode::Unsigned;
-
SmallVector<SDValue, 12> StOps;
SDValue N2;
unsigned VecType;
@@ -928,7 +1259,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
StOps.push_back(N->getOperand(4));
N2 = N->getOperand(5);
break;
- default: return NULL;
+ default:
+ return NULL;
}
StOps.push_back(getI32Imm(IsVolatile));
@@ -939,105 +1271,197 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
if (SelectDirectAddr(N2, Addr)) {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break;
- case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break;
- case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_avar;
+ break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_avar;
+ break;
}
break;
}
StOps.push_back(Addr);
- } else if (Subtarget.is64Bit()?
- SelectADDRsi64(N2.getNode(), N2, Base, Offset):
- SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break;
- case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break;
- case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_asi;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_asi;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_asi;
+ break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_asi;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_asi;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_asi;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_asi;
+ break;
}
break;
}
StOps.push_back(Base);
StOps.push_back(Offset);
- } else if (Subtarget.is64Bit()?
- SelectADDRri64(N2.getNode(), N2, Base, Offset):
- SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break;
- case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break;
- case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_ari_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_ari_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_ari_64;
+ break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_ari_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_ari_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_ari_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_ari_64;
+ break;
}
break;
}
} else {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break;
- case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break;
- case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_ari;
+ break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_ari;
+ break;
}
break;
}
@@ -1047,49 +1471,95 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
} else {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break;
- case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break;
- case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_areg_64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_areg_64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_areg_64;
+ break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_areg_64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_areg_64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_areg_64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_areg_64;
+ break;
}
break;
}
} else {
switch (N->getOpcode()) {
- default: return NULL;
+ default:
+ return NULL;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break;
- case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break;
- case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v2_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v2_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v2_areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::STV_i64_v2_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v2_areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::STV_f64_v2_areg;
+ break;
}
break;
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
- default: return NULL;
- case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break;
- case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break;
- case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break;
- case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break;
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::STV_i8_v4_areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::STV_i16_v4_areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::STV_i32_v4_areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::STV_f32_v4_areg;
+ break;
}
break;
}
@@ -1112,8 +1582,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
// Return true if TGA or ES.
- if (N.getOpcode() == ISD::TargetGlobalAddress
- || N.getOpcode() == ISD::TargetExternalSymbol) {
+ if (N.getOpcode() == ISD::TargetGlobalAddress ||
+ N.getOpcode() == ISD::TargetExternalSymbol) {
Address = N;
return true;
}
@@ -1131,12 +1601,11 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
}
// symbol+offset
-bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
- SDValue &Base, SDValue &Offset,
- MVT mvt) {
+bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
+ SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- SDValue base=Addr.getOperand(0);
+ SDValue base = Addr.getOperand(0);
if (SelectDirectAddr(base, Base)) {
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
return true;
@@ -1159,9 +1628,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
}
// register+offset
-bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
- SDValue &Base, SDValue &Offset,
- MVT mvt) {
+bool NVPTXDAGToDAGISel::SelectADDRri_imp(
+ SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
Offset = CurDAG->getTargetConstant(0, mvt);
@@ -1169,7 +1637,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
}
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // direct calls.
+ return false; // direct calls.
if (Addr.getOpcode() == ISD::ADD) {
if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
@@ -1177,7 +1645,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
}
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
if (FrameIndexSDNode *FIN =
- dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
// Constant offset from frame ref.
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
else
@@ -1209,8 +1677,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
// (See SelectionDAGNodes.h). So we need to check for both.
if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Src = mN->getSrcValue();
- }
- else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
+ } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
Src = mN->getSrcValue();
}
if (!Src)
@@ -1222,13 +1689,13 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
-bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps) {
+bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
switch (ConstraintCode) {
- default: return true;
- case 'm': // memory
+ default:
+ return true;
+ case 'm': // memory
if (SelectDirectAddr(Op, Op0)) {
OutOps.push_back(Op0);
OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
@@ -1251,10 +1718,8 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
// pattern matcher inserts a bunch of IMOVi8rr to convert
// the imm to i8imm, and this causes instruction selection
// to fail.
-bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
- SDValue &Retval) {
- if (!(N.getOpcode() == ISD::UNDEF) &&
- !(N.getOpcode() == ISD::Constant))
+bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
+ if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
return false;
if (N.getOpcode() == ISD::UNDEF)
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 4ec924117a..70e8e46429 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -64,11 +64,10 @@ public:
const NVPTXSubtarget &Subtarget;
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps);
+ virtual bool SelectInlineAsmMemoryOperand(
+ const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps);
private:
- // Include the pieces autogenerated from the target description.
+// Include the pieces autogenerated from the target description.
#include "NVPTXGenDAGISel.inc"
SDNode *Select(SDNode *N);
@@ -99,7 +98,6 @@ private:
bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
SDValue &Offset);
-
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index e9a9fbfd04..6e01a5a820 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-
#include "NVPTXISelLowering.h"
#include "NVPTX.h"
#include "NVPTXTargetMachine.h"
@@ -44,14 +43,14 @@ using namespace llvm;
static unsigned int uniqueCallSite = 0;
-static cl::opt<bool>
-sched4reg("nvptx-sched4reg",
- cl::desc("NVPTX Specific: schedule for register pressue"),
- cl::init(false));
+static cl::opt<bool> sched4reg(
+ "nvptx-sched4reg",
+ cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
static bool IsPTXVectorType(MVT VT) {
switch (VT.SimpleTy) {
- default: return false;
+ default:
+ return false;
case MVT::v2i8:
case MVT::v4i8:
case MVT::v2i16:
@@ -62,22 +61,21 @@ static bool IsPTXVectorType(MVT VT) {
case MVT::v2f32:
case MVT::v4f32:
case MVT::v2f64:
- return true;
+ return true;
}
}
// NVPTXTargetLowering Constructor.
NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
-: TargetLowering(TM, new NVPTXTargetObjectFile()),
- nvTM(&TM),
- nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+ : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
+ nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
// always lower memset, memcpy, and memmove intrinsics to load/store
// instructions, rather
// then generating calls to memset, mempcy or memmove.
- MaxStoresPerMemset = (unsigned)0xFFFFFFFF;
- MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
- MaxStoresPerMemmove = (unsigned)0xFFFFFFFF;
+ MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
+ MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
+ MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
setBooleanContents(ZeroOrNegativeOneBooleanContent);
@@ -100,52 +98,50 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
// Operations not directly supported by NVPTX.
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::f32, Expand);
- setOperationAction(ISD::BR_CC, MVT::f64, Expand);
- setOperationAction(ISD::BR_CC, MVT::i1, Expand);
- setOperationAction(ISD::BR_CC, MVT::i8, Expand);
- setOperationAction(ISD::BR_CC, MVT::i16, Expand);
- setOperationAction(ISD::BR_CC, MVT::i32, Expand);
- setOperationAction(ISD::BR_CC, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i8, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (nvptxSubtarget.hasROT64()) {
- setOperationAction(ISD::ROTL , MVT::i64, Legal);
- setOperationAction(ISD::ROTR , MVT::i64, Legal);
- }
- else {
- setOperationAction(ISD::ROTL , MVT::i64, Expand);
- setOperationAction(ISD::ROTR , MVT::i64, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Legal);
+ setOperationAction(ISD::ROTR, MVT::i64, Legal);
+ } else {
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
}
if (nvptxSubtarget.hasROT32()) {
- setOperationAction(ISD::ROTL , MVT::i32, Legal);
- setOperationAction(ISD::ROTR , MVT::i32, Legal);
- }
- else {
- setOperationAction(ISD::ROTL , MVT::i32, Expand);
- setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Legal);
+ setOperationAction(ISD::ROTR, MVT::i32, Legal);
+ } else {
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
}
- setOperationAction(ISD::ROTL , MVT::i16, Expand);
- setOperationAction(ISD::ROTR , MVT::i16, Expand);
- setOperationAction(ISD::ROTL , MVT::i8, Expand);
- setOperationAction(ISD::ROTR , MVT::i8, Expand);
- setOperationAction(ISD::BSWAP , MVT::i16, Expand);
- setOperationAction(ISD::BSWAP , MVT::i32, Expand);
- setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+ setOperationAction(ISD::ROTL, MVT::i16, Expand);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand);
+ setOperationAction(ISD::ROTL, MVT::i8, Expand);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i16, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
// Indirect branch is not supported.
// This also disables Jump Table creation.
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
- setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
- setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
// We want to legalize constant related memmove and memcopy
// intrinsics.
@@ -168,16 +164,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setTruncStoreAction(MVT::i8, MVT::i1, Expand);
// This is legal in NVPTX
- setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
// TRAP can be lowered to PTX trap
- setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
// Register custom handling for vector loads/stores
- for (int i = MVT::FIRST_VECTOR_VALUETYPE;
- i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
+ ++i) {
+ MVT VT = (MVT::SimpleValueType) i;
if (IsPTXVectorType(VT)) {
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
@@ -190,49 +186,86 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
computeRegisterProperties();
}
-
const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return 0;
- case NVPTXISD::CALL: return "NVPTXISD::CALL";
- case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG";
- case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper";
- case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin";
- case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam";
+ default:
+ return 0;
+ case NVPTXISD::CALL:
+ return "NVPTXISD::CALL";
+ case NVPTXISD::RET_FLAG:
+ return "NVPTXISD::RET_FLAG";
+ case NVPTXISD::Wrapper:
+ return "NVPTXISD::Wrapper";
+ case NVPTXISD::NVBuiltin:
+ return "NVPTXISD::NVBuiltin";
+ case NVPTXISD::DeclareParam:
+ return "NVPTXISD::DeclareParam";
case NVPTXISD::DeclareScalarParam:
return "NVPTXISD::DeclareScalarParam";
- case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet";
- case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
- case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall";
- case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam";
- case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam";
- case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32";
- case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32";
- case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam";
- case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin";
- case NVPTXISD::CallArg: return "NVPTXISD::CallArg";
- case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg";
- case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd";
- case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid";
- case NVPTXISD::CallVal: return "NVPTXISD::CallVal";
- case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol";
- case NVPTXISD::Prototype: return "NVPTXISD::Prototype";
- case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam";
- case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval";
- case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval";
- case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval";
- case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam";
- case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
- case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
- case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
- case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2";
- case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4";
- case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2";
- case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4";
- case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2";
- case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4";
- case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2";
- case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4";
+ case NVPTXISD::DeclareRet:
+ return "NVPTXISD::DeclareRet";
+ case NVPTXISD::DeclareRetParam:
+ return "NVPTXISD::DeclareRetParam";
+ case NVPTXISD::PrintCall:
+ return "NVPTXISD::PrintCall";
+ case NVPTXISD::LoadParam:
+ return "NVPTXISD::LoadParam";
+ case NVPTXISD::StoreParam:
+ return "NVPTXISD::StoreParam";
+ case NVPTXISD::StoreParamS32:
+ return "NVPTXISD::StoreParamS32";
+ case NVPTXISD::StoreParamU32:
+ return "NVPTXISD::StoreParamU32";
+ case NVPTXISD::MoveToParam:
+ return "NVPTXISD::MoveToParam";
+ case NVPTXISD::CallArgBegin:
+ return "NVPTXISD::CallArgBegin";
+ case NVPTXISD::CallArg:
+ return "NVPTXISD::CallArg";
+ case NVPTXISD::LastCallArg:
+ return "NVPTXISD::LastCallArg";
+ case NVPTXISD::CallArgEnd:
+ return "NVPTXISD::CallArgEnd";
+ case NVPTXISD::CallVoid:
+ return "NVPTXISD::CallVoid";
+ case NVPTXISD::CallVal:
+ return "NVPTXISD::CallVal";
+ case NVPTXISD::CallSymbol:
+ return "NVPTXISD::CallSymbol";
+ case NVPTXISD::Prototype:
+ return "NVPTXISD::Prototype";
+ case NVPTXISD::MoveParam:
+ return "NVPTXISD::MoveParam";
+ case NVPTXISD::MoveRetval:
+ return "NVPTXISD::MoveRetval";
+ case NVPTXISD::MoveToRetval:
+ return "NVPTXISD::MoveToRetval";
+ case NVPTXISD::StoreRetval:
+ return "NVPTXISD::StoreRetval";
+ case NVPTXISD::PseudoUseParam:
+ return "NVPTXISD::PseudoUseParam";
+ case NVPTXISD::RETURN:
+ return "NVPTXISD::RETURN";
+ case NVPTXISD::CallSeqBegin:
+ return "NVPTXISD::CallSeqBegin";
+ case NVPTXISD::CallSeqEnd:
+ return "NVPTXISD::CallSeqEnd";
+ case NVPTXISD::LoadV2:
+ return "NVPTXISD::LoadV2";
+ case NVPTXISD::LoadV4:
+ return "NVPTXISD::LoadV4";
+ case NVPTXISD::LDGV2:
+ return "NVPTXISD::LDGV2";
+ case NVPTXISD::LDGV4:
+ return "NVPTXISD::LDGV4";
+ case NVPTXISD::LDUV2:
+ return "NVPTXISD::LDUV2";
+ case NVPTXISD::LDUV4:
+ return "NVPTXISD::LDUV4";
+ case NVPTXISD::StoreV2:
+ return "NVPTXISD::StoreV2";
+ case NVPTXISD::StoreV4:
+ return "NVPTXISD::StoreV4";
}
}
@@ -248,10 +281,9 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
}
-std::string NVPTXTargetLowering::getPrototype(Type *retTy,
- const ArgListTy &Args,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- unsigned retAlignment) const {
+std::string NVPTXTargetLowering::getPrototype(
+ Type *retTy, const ArgListTy &Args,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const {
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
@@ -267,54 +299,47 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
unsigned size = 0;
if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
size = ITy->getBitWidth();
- if (size < 32) size = 32;
- }
- else {
+ if (size < 32)
+ size = 32;
+ } else {
assert(retTy->isFloatingPointTy() &&
"Floating point type expected here");
size = retTy->getPrimitiveSizeInBits();
}
O << ".param .b" << size << " _";
- }
- else if (isa<PointerType>(retTy))
- O << ".param .b" << getPointerTy().getSizeInBits()
- << " _";
+ } else if (isa<PointerType>(retTy))
+ O << ".param .b" << getPointerTy().getSizeInBits() << " _";
else {
if ((retTy->getTypeID() == Type::StructTyID) ||
isa<VectorType>(retTy)) {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*this, retTy, vtparts);
unsigned totalsz = 0;
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
elems = vtparts[i].getVectorNumElements();
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0, je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8)) sz = 8;
- totalsz += sz/8;
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ totalsz += sz / 8;
}
}
- O << ".param .align "
- << retAlignment
- << " .b8 _["
- << totalsz << "]";
- }
- else {
- assert(false &&
- "Unknown return type");
+ O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
+ } else {
+ assert(false && "Unknown return type");
}
}
- }
- else {
+ } else {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*this, retTy, vtparts);
unsigned idx = 0;
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
@@ -322,14 +347,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0, je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
O << ".reg .b" << sz << " _";
- if (j<je-1) O << ", ";
+ if (j < je - 1)
+ O << ", ";
++idx;
}
- if (i < e-1)
+ if (i < e - 1)
O << ", ";
}
}
@@ -340,7 +367,7 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
bool first = true;
MVT thePointerTy = getPointerTy();
- for (unsigned i=0,e=Args.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
const Type *Ty = Args[i].Ty;
if (!first) {
O << ", ";
@@ -351,9 +378,9 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
unsigned sz = 0;
if (isa<IntegerType>(Ty)) {
sz = cast<IntegerType>(Ty)->getBitWidth();
- if (sz < 32) sz = 32;
- }
- else if (isa<PointerType>(Ty))
+ if (sz < 32)
+ sz = 32;
+ } else if (isa<PointerType>(Ty))
sz = thePointerTy.getSizeInBits();
else
sz = Ty->getPrimitiveSizeInBits();
@@ -365,23 +392,20 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
continue;
}
const PointerType *PTy = dyn_cast<PointerType>(Ty);
- assert(PTy &&
- "Param with byval attribute should be a pointer type");
+ assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
if (isABI) {
unsigned align = Outs[i].Flags.getByValAlign();
unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
- O << ".param .align " << align
- << " .b8 ";
+ O << ".param .align " << align << " .b8 ";
O << "_";
O << "[" << sz << "]";
continue;
- }
- else {
+ } else {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*this, ETy, vtparts);
- for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
if (vtparts[i].isVector()) {
@@ -389,14 +413,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
elemtype = vtparts[i].getVectorElementType();
}
- for (unsigned j=0,je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
O << ".reg .b" << sz << " ";
O << "_";
- if (j<je-1) O << ", ";
+ if (j < je - 1)
+ O << ", ";
}
- if (i<e-1)
+ if (i < e - 1)
O << ", ";
}
continue;
@@ -406,27 +432,25 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
return O.str();
}
-
-SDValue
-NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const {
- SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
+SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
- SDValue Chain = CLI.Chain;
- SDValue Callee = CLI.Callee;
- bool &isTailCall = CLI.IsTailCall;
- ArgListTy &Args = CLI.Args;
- Type *retTy = CLI.RetTy;
- ImmutableCallSite *CS = CLI.CS;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ ArgListTy &Args = CLI.Args;
+ Type *retTy = CLI.RetTy;
+ ImmutableCallSite *CS = CLI.CS;
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
SDValue tempChain = Chain;
- Chain = DAG.getCALLSEQ_START(Chain,
- DAG.getIntPtrConstant(uniqueCallSite, true));
+ Chain =
+ DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true));
SDValue InFlag = Chain.getValue(1);
assert((Outs.size() == Args.size()) &&
@@ -434,7 +458,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned paramCount = 0;
// Declare the .params or .reg need to pass values
// to the function
- for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
EVT VT = Outs[i].VT;
if (Outs[i].Flags.isByVal() == false) {
@@ -445,19 +469,20 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isABI)
isReg = 0;
unsigned sz = VT.getSizeInBits();
- if (VT.isInteger() && (sz < 32)) sz = 32;
+ if (VT.isInteger() && (sz < 32))
+ sz = 32;
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareParamOps[] = { Chain,
DAG.getConstant(paramCount, MVT::i32),
DAG.getConstant(sz, MVT::i32),
- DAG.getConstant(isReg, MVT::i32),
- InFlag };
+ DAG.getConstant(isReg, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
DeclareParamOps, 5);
InFlag = Chain.getValue(1);
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
+ DAG.getConstant(0, MVT::i32), OutVals[i],
+ InFlag };
unsigned opcode = NVPTXISD::StoreParam;
if (isReg)
@@ -477,8 +502,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// struct or vector
SmallVector<EVT, 16> vtparts;
const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
- assert(PTy &&
- "Type of a byval parameter should be pointer");
+ assert(PTy && "Type of a byval parameter should be pointer");
ComputeValueVTs(*this, PTy->getElementType(), vtparts);
if (isABI) {
@@ -488,40 +512,41 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The ByValAlign in the Outs[i].Flags is alway set at this point, so we
// don't need to
// worry about natural alignment or not. See TargetLowering::LowerCallTo()
- SDValue DeclareParamOps[] = { Chain,
- DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(sz, MVT::i32),
- InFlag };
+ SDValue DeclareParamOps[] = {
+ Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
+ DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
+ InFlag
+ };
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps, 5);
InFlag = Chain.getValue(1);
unsigned curOffset = 0;
- for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
unsigned elems = 1;
EVT elemtype = vtparts[j];
if (vtparts[j].isVector()) {
elems = vtparts[j].getVectorNumElements();
elemtype = vtparts[j].getVectorElementType();
}
- for (unsigned k=0,ke=elems; k!=ke; ++k) {
+ for (unsigned k = 0, ke = elems; k != ke; ++k) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8)) sz = 8;
- SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
- OutVals[i],
- DAG.getConstant(curOffset,
- getPointerTy()));
- SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
- MachinePointerInfo(), false, false, false, 0);
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ SDValue srcAddr =
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+ DAG.getConstant(curOffset, getPointerTy()));
+ SDValue theVal =
+ DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+ MachinePointerInfo(), false, false, false, 0);
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
- MVT::i32),
- DAG.getConstant(curOffset, MVT::i32),
- theVal, InFlag };
+ SDValue CopyParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(curOffset, MVT::i32),
+ theVal, InFlag };
Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
CopyParamOps, 5);
InFlag = Chain.getValue(1);
- curOffset += sz/8;
+ curOffset += sz / 8;
}
}
++paramCount;
@@ -530,30 +555,31 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Non-abi, struct or vector
// Declare a bunch or .reg .b<size> .param<n>
unsigned curOffset = 0;
- for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
unsigned elems = 1;
EVT elemtype = vtparts[j];
if (vtparts[j].isVector()) {
elems = vtparts[j].getVectorNumElements();
elemtype = vtparts[j].getVectorElementType();
}
- for (unsigned k=0,ke=elems; k!=ke; ++k) {
+ for (unsigned k = 0, ke = elems; k != ke; ++k) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
- MVT::i32),
- DAG.getConstant(sz, MVT::i32),
- DAG.getConstant(1, MVT::i32),
- InFlag };
+ SDValue DeclareParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(sz, MVT::i32),
+ DAG.getConstant(1, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
DeclareParamOps, 5);
InFlag = Chain.getValue(1);
- SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
- DAG.getConstant(curOffset,
- getPointerTy()));
- SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
- MachinePointerInfo(), false, false, false, 0);
+ SDValue srcAddr =
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+ DAG.getConstant(curOffset, getPointerTy()));
+ SDValue theVal =
+ DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(),
+ false, false, false, 0);
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
DAG.getConstant(0, MVT::i32), theVal,
@@ -578,20 +604,21 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
// individual .reg .b<size> func_retval<0..> for non ABI
unsigned resultsz = 0;
- for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = resvtparts[i];
if (resvtparts[i].isVector()) {
elems = resvtparts[i].getVectorNumElements();
elemtype = resvtparts[i].getVectorElementType();
}
- for (unsigned j=0,je=elems; j!=je; ++j) {
+ for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
if (isABI == false) {
- if (elemtype.isInteger() && (sz < 32)) sz = 32;
- }
- else {
- if (elemtype.isInteger() && (sz < 8)) sz = 8;
+ if (elemtype.isInteger() && (sz < 32))
+ sz = 32;
+ } else {
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
}
if (isABI == false) {
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -609,7 +636,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
if (isABI) {
if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
- retTy->isPointerTy() ) {
+ retTy->isPointerTy()) {
// Scalar needs to be at least 32bit wide
if (resultsz < 32)
resultsz = 32;
@@ -620,8 +647,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
DeclareRetOps, 5);
InFlag = Chain.getValue(1);
- }
- else {
+ } else {
if (Func) { // direct call
if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
retAlignment = getDataLayout()->getABITypeAlignment(retTy);
@@ -631,10 +657,10 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
retAlignment = getDataLayout()->getABITypeAlignment(retTy);
}
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
- MVT::i32),
- DAG.getConstant(resultsz/8, MVT::i32),
- DAG.getConstant(0, MVT::i32), InFlag };
+ SDValue DeclareRetOps[] = { Chain,
+ DAG.getConstant(retAlignment, MVT::i32),
+ DAG.getConstant(resultsz / 8, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
DeclareRetOps, 5);
InFlag = Chain.getValue(1);
@@ -652,24 +678,24 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// INLINEASM SDNode.
SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
- const char *asmstr = nvTM->getManagedStrPool()->
- getManagedString(proto_string.c_str())->c_str();
- SDValue InlineAsmOps[] = { Chain,
- DAG.getTargetExternalSymbol(asmstr,
- getPointerTy()),
- DAG.getMDNode(0),
- DAG.getTargetConstant(0, MVT::i32), InFlag };
+ const char *asmstr = nvTM->getManagedStrPool()
+ ->getManagedString(proto_string.c_str())->c_str();
+ SDValue InlineAsmOps[] = {
+ Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()),
+ DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag
+ };
Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
InFlag = Chain.getValue(1);
}
// Op to just print "call"
SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue PrintCallOps[] = { Chain,
- DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
- : retCount, MVT::i32),
- InFlag };
- Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
- PrintCallVTs, PrintCallOps, 3);
+ SDValue PrintCallOps[] = {
+ Chain,
+ DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32),
+ InFlag
+ };
+ Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
+ dl, PrintCallVTs, PrintCallOps, 3);
InFlag = Chain.getValue(1);
// Ops to print out the function name
@@ -685,31 +711,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallArgBeginOps, 2);
InFlag = Chain.getValue(1);
- for (unsigned i=0, e=paramCount; i!=e; ++i) {
+ for (unsigned i = 0, e = paramCount; i != e; ++i) {
unsigned opcode;
- if (i==(e-1))
+ if (i == (e - 1))
opcode = NVPTXISD::LastCallArg;
else
opcode = NVPTXISD::CallArg;
SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
- DAG.getConstant(i, MVT::i32),
- InFlag };
+ DAG.getConstant(i, MVT::i32), InFlag };
Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
InFlag = Chain.getValue(1);
}
SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CallArgEndOps[] = { Chain,
- DAG.getConstant(Func ? 1 : 0, MVT::i32),
+ SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
InFlag };
- Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
- 3);
+ Chain =
+ DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3);
InFlag = Chain.getValue(1);
if (!Func) {
SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue PrototypeOps[] = { Chain,
- DAG.getConstant(uniqueCallSite, MVT::i32),
+ SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
InFlag };
Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
InFlag = Chain.getValue(1);
@@ -719,32 +742,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Ins.size() > 0) {
if (isABI) {
unsigned resoffset = 0;
- for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
unsigned sz = Ins[i].VT.getSizeInBits();
- if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
+ if (Ins[i].VT.isInteger() && (sz < 8))
+ sz = 8;
EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
- SDValue LoadRetOps[] = {
- Chain,
- DAG.getConstant(1, MVT::i32),
- DAG.getConstant(resoffset, MVT::i32),
- InFlag
- };
+ SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(resoffset, MVT::i32), InFlag };
SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
LoadRetOps, array_lengthof(LoadRetOps));
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
InVals.push_back(retval);
- resoffset += sz/8;
+ resoffset += sz / 8;
}
- }
- else {
+ } else {
SmallVector<EVT, 16> resvtparts;
ComputeValueVTs(*this, retTy, resvtparts);
assert(Ins.size() == resvtparts.size() &&
"Unexpected number of return values in non-ABI case");
unsigned paramNum = 0;
- for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(EVT(Ins[i].VT) == resvtparts[i] &&
"Unexpected EVT type in non-ABI case");
unsigned numelems = 1;
@@ -754,14 +773,11 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
elemtype = Ins[i].VT.getVectorElementType();
}
std::vector<SDValue> tempRetVals;
- for (unsigned j=0; j<numelems; ++j) {
+ for (unsigned j = 0; j < numelems; ++j) {
EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
- SDValue MoveRetOps[] = {
- Chain,
- DAG.getConstant(0, MVT::i32),
- DAG.getConstant(paramNum, MVT::i32),
- InFlag
- };
+ SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(paramNum, MVT::i32),
+ InFlag };
SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
MoveRetOps, array_lengthof(MoveRetOps));
Chain = retval.getValue(1);
@@ -777,9 +793,8 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
}
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(uniqueCallSite, true),
- DAG.getIntPtrConstant(uniqueCallSite+1, true),
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
+ DAG.getIntPtrConstant(uniqueCallSite + 1, true),
InFlag);
uniqueCallSite++;
@@ -792,45 +807,51 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
// (see LegalizeDAG.cpp). This is slow and uses local memory.
// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
-SDValue NVPTXTargetLowering::
-LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+SDValue
+NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
SmallVector<SDValue, 8> Ops;
unsigned NumOperands = Node->getNumOperands();
- for (unsigned i=0; i < NumOperands; ++i) {
+ for (unsigned i = 0; i < NumOperands; ++i) {
SDValue SubOp = Node->getOperand(i);
EVT VVT = SubOp.getNode()->getValueType(0);
EVT EltVT = VVT.getVectorElementType();
unsigned NumSubElem = VVT.getVectorNumElements();
- for (unsigned j=0; j < NumSubElem; ++j) {
+ for (unsigned j = 0; j < NumSubElem; ++j) {
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
DAG.getIntPtrConstant(j)));
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
- &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0],
+ Ops.size());
}
-SDValue NVPTXTargetLowering::
-LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+SDValue
+NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
- case ISD::RETURNADDR: return SDValue();
- case ISD::FRAMEADDR: return SDValue();
- case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::INTRINSIC_W_CHAIN: return Op;
+ case ISD::RETURNADDR:
+ return SDValue();
+ case ISD::FRAMEADDR:
+ return SDValue();
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN:
+ return Op;
case ISD::BUILD_VECTOR:
case ISD::EXTRACT_SUBVECTOR:
return Op;
- case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
- case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::CONCAT_VECTORS:
+ return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::STORE:
+ return LowerSTORE(Op, DAG);
+ case ISD::LOAD:
+ return LowerLOAD(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
}
-
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1)
return LowerLOADi1(Op, DAG);
@@ -842,24 +863,22 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// =>
// v1 = ld i8* addr
// v = trunc v1 to i1
-SDValue NVPTXTargetLowering::
-LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(Node);
DebugLoc dl = Node->getDebugLoc();
- assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ;
+ assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
assert(Node->getValueType(0) == MVT::i1 &&
"Custom lowering for i1 load only");
- SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(),
- LD->getAlignment());
+ SDValue newLD =
+ DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
// The legalizer (the caller) is expecting two values from the legalized
// load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
// in LegalizeDAG.cpp which also uses MergeValues.
- SDValue Ops[] = {result, LD->getChain()};
+ SDValue Ops[] = { result, LD->getChain() };
return DAG.getMergeValues(Ops, 2, dl);
}
@@ -887,7 +906,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
if (!ValVT.isSimple())
return SDValue();
switch (ValVT.getSimpleVT().SimpleTy) {
- default: return SDValue();
+ default:
+ return SDValue();
case MVT::v2i8:
case MVT::v2i16:
case MVT::v2i32:
@@ -914,7 +934,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
NeedExt = true;
switch (NumElts) {
- default: return SDValue();
+ default:
+ return SDValue();
case 2:
Opcode = NVPTXISD::StoreV2;
break;
@@ -947,11 +968,9 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
MemSDNode *MemSD = cast<MemSDNode>(N);
- SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL,
- DAG.getVTList(MVT::Other), &Ops[0],
- Ops.size(), MemSD->getMemoryVT(),
- MemSD->getMemOperand());
-
+ SDValue NewSt = DAG.getMemIntrinsicNode(
+ Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(),
+ MemSD->getMemoryVT(), MemSD->getMemOperand());
//return DCI.CombineTo(N, NewSt, true);
return NewSt;
@@ -964,8 +983,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
// =>
// v1 = zxt v to i8
// st i8, addr
-SDValue NVPTXTargetLowering::
-LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(Node);
@@ -976,18 +994,14 @@ LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
- MVT::i8, Tmp3);
- SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
+ Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3);
+ SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
return Result;
}
-
-SDValue
-NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
- EVT v) const {
+SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
+ int idx, EVT v) const {
std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
std::stringstream suffix;
suffix << idx;
@@ -1000,19 +1014,16 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
return getExtSymb(DAG, ".PARAM", idx, v);
}
-SDValue
-NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
+SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
return getExtSymb(DAG, ".HLPPARAM", idx);
}
// Check to see if the kernel argument is image*_t or sampler_t
bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
- static const char *const specialTypes[] = {
- "struct._image2d_t",
- "struct._image3d_t",
- "struct._sampler_t"
- };
+ static const char *const specialTypes[] = { "struct._image2d_t",
+ "struct._image3d_t",
+ "struct._sampler_t" };
const Type *Ty = arg->getType();
const PointerType *PTy = dyn_cast<PointerType>(Ty);
@@ -1033,12 +1044,10 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
return false;
}
-SDValue
-NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+SDValue NVPTXTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
const DataLayout *TD = getDataLayout();
@@ -1054,34 +1063,43 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
std::vector<Type *> argTypes;
std::vector<const Argument *> theArgs;
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I) {
+ I != E; ++I) {
theArgs.push_back(I);
argTypes.push_back(I->getType());
}
- assert(argTypes.size() == Ins.size() &&
- "Ins types and function types did not match");
+ //assert(argTypes.size() == Ins.size() &&
+ // "Ins types and function types did not match");
int idx = 0;
- for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
+ for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) {
Type *Ty = argTypes[i];
EVT ObjectVT = getValueType(Ty);
- assert(ObjectVT == Ins[i].VT &&
- "Ins type did not match function type");
+ //assert(ObjectVT == Ins[i].VT &&
+ // "Ins type did not match function type");
// If the kernel argument is image*_t or sampler_t, convert it to
// a i32 constant holding the parameter position. This can later
// matched in the AsmPrinter to output the correct mangled name.
- if (isImageOrSamplerVal(theArgs[i],
- (theArgs[i]->getParent() ?
- theArgs[i]->getParent()->getParent() : 0))) {
+ if (isImageOrSamplerVal(
+ theArgs[i],
+ (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
+ : 0))) {
assert(isKernel && "Only kernels can have image/sampler params");
- InVals.push_back(DAG.getConstant(i+1, MVT::i32));
+ InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
continue;
}
if (theArgs[i]->use_empty()) {
// argument is dead
- InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+ if (ObjectVT.isVector()) {
+ EVT EltVT = ObjectVT.getVectorElementType();
+ unsigned NumElts = ObjectVT.getVectorNumElements();
+ for (unsigned vi = 0; vi < NumElts; ++vi) {
+ InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT));
+ }
+ } else {
+ InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+ }
continue;
}
@@ -1089,31 +1107,52 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
// to newly created nodes. The SDNOdes for params have to
// appear in the same order as their order of appearance
// in the original function. "idx+1" holds that order.
- if (PAL.hasAttribute(i+1, Attribute::ByVal) == false) {
+ if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
+ if (ObjectVT.isVector()) {
+ unsigned NumElts = ObjectVT.getVectorNumElements();
+ EVT EltVT = ObjectVT.getVectorElementType();
+ unsigned Offset = 0;
+ for (unsigned vi = 0; vi < NumElts; ++vi) {
+ SDValue A = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue B = DAG.getIntPtrConstant(Offset);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ //getParamSymbol(DAG, idx, EltVT),
+ //DAG.getConstant(Offset, getPointerTy()));
+ A, B);
+ Value *SrcValue = Constant::getNullValue(PointerType::get(
+ EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
+ SDValue Ld = DAG.getLoad(
+ EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false,
+ false,
+ TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
+ Offset += EltVT.getStoreSizeInBits() / 8;
+ InVals.push_back(Ld);
+ }
+ continue;
+ }
+
// A plain scalar.
if (isABI || isKernel) {
// If ABI, load from the param symbol
SDValue Arg = getParamSymbol(DAG, idx);
// Conjure up a value that we can get the address space from.
// FIXME: Using a constant here is a hack.
- Value *srcValue = Constant::getNullValue(PointerType::get(
- ObjectVT.getTypeForEVT(F->getContext()),
- llvm::ADDRESS_SPACE_PARAM));
- SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
- MachinePointerInfo(srcValue), false, false,
- false,
- TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
- F->getContext())));
+ Value *srcValue = Constant::getNullValue(
+ PointerType::get(ObjectVT.getTypeForEVT(F->getContext()),
+ llvm::ADDRESS_SPACE_PARAM));
+ SDValue p = DAG.getLoad(
+ ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false,
+ false,
+ TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
if (p.getNode())
- DAG.AssignOrdering(p.getNode(), idx+1);
+ DAG.AssignOrdering(p.getNode(), idx + 1);
InVals.push_back(p);
- }
- else {
+ } else {
// If no ABI, just move the param symbol
SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
if (p.getNode())
- DAG.AssignOrdering(p.getNode(), idx+1);
+ DAG.AssignOrdering(p.getNode(), idx + 1);
InVals.push_back(p);
}
continue;
@@ -1130,47 +1169,49 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
if (p.getNode())
- DAG.AssignOrdering(p.getNode(), idx+1);
+ DAG.AssignOrdering(p.getNode(), idx + 1);
if (isKernel)
InVals.push_back(p);
else {
- SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
- DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
- p);
+ SDValue p2 = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
+ DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
InVals.push_back(p2);
}
} else {
// Have to move a set of param symbols to registers and
// store them locally and return the local pointer in InVals
const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
- assert(elemPtrType &&
- "Byval parameter should be a pointer type");
+ assert(elemPtrType && "Byval parameter should be a pointer type");
Type *elemType = elemPtrType->getElementType();
// Compute the constituent parts
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> offsets;
ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
unsigned totalsize = 0;
- for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j)
totalsize += vtparts[j].getStoreSizeInBits();
- SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()->
- CreateStackObject(totalsize/8, 16, false),
- getPointerTy());
+ SDValue localcopy = DAG.getFrameIndex(
+ MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false),
+ getPointerTy());
unsigned sizesofar = 0;
std::vector<SDValue> theChains;
- for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
unsigned numElems = 1;
- if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
- for (unsigned k=0, ke=numElems; k!=ke; ++k) {
+ if (vtparts[j].isVector())
+ numElems = vtparts[j].getVectorNumElements();
+ for (unsigned k = 0, ke = numElems; k != ke; ++k) {
EVT tmpvt = vtparts[j];
- if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
+ if (tmpvt.isVector())
+ tmpvt = tmpvt.getVectorElementType();
SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
getParamSymbol(DAG, idx, tmpvt));
- SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
- DAG.getConstant(sizesofar, getPointerTy()));
- theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
- MachinePointerInfo(), false, false, 0));
- sizesofar += tmpvt.getStoreSizeInBits()/8;
+ SDValue addr =
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
+ DAG.getConstant(sizesofar, getPointerTy()));
+ theChains.push_back(DAG.getStore(
+ Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0));
+ sizesofar += tmpvt.getStoreSizeInBits() / 8;
++idx;
}
}
@@ -1190,43 +1231,42 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
//}
if (!OutChains.empty())
- DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &OutChains[0], OutChains.size()));
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0],
+ OutChains.size()));
return Chain;
}
-SDValue
-NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerReturn(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
+ SelectionDAG &DAG) const {
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
unsigned sizesofar = 0;
unsigned idx = 0;
- for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
SDValue theVal = OutVals[i];
EVT theValType = theVal.getValueType();
unsigned numElems = 1;
- if (theValType.isVector()) numElems = theValType.getVectorNumElements();
- for (unsigned j=0,je=numElems; j!=je; ++j) {
+ if (theValType.isVector())
+ numElems = theValType.getVectorNumElements();
+ for (unsigned j = 0, je = numElems; j != je; ++j) {
SDValue tmpval = theVal;
if (theValType.isVector())
tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- theValType.getVectorElementType(),
- tmpval, DAG.getIntPtrConstant(j));
- Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
- dl, MVT::Other,
- Chain,
- DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
+ theValType.getVectorElementType(), tmpval,
+ DAG.getIntPtrConstant(j));
+ Chain = DAG.getNode(
+ isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl,
+ MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
tmpval);
if (theValType.isVector())
- sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
+ sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8;
else
- sizesofar += theValType.getStoreSizeInBits()/8;
+ sizesofar += theValType.getStoreSizeInBits() / 8;
++idx;
}
}
@@ -1234,12 +1274,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
}
-void
-NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const
-{
+void NVPTXTargetLowering::LowerAsmOperandForConstraint(
+ SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
if (Constraint.length() > 1)
return;
else
@@ -1249,8 +1286,7 @@ NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// NVPTX suuport vector of legal types of any length in Intrinsics because the
// NVPTX specific type legalizer
// will legalize them to the PTX supported length.
-bool
-NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
+bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
if (isTypeLegal(VT))
return true;
if (VT.isVector()) {
@@ -1261,15 +1297,13 @@ NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
return false;
}
-
// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
// TgtMemIntrinsic
// because we need the information that is only available in the "Value" type
// of destination
// pointer. In particular, the address space information.
-bool
-NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
- unsigned Intrinsic) const {
+bool NVPTXTargetLowering::getTgtMemIntrinsic(
+ IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
switch (Intrinsic) {
default:
return false;
@@ -1325,9 +1359,8 @@ NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
/// Used to guide target specific optimizations, like loop strength reduction
/// (LoopStrengthReduce.cpp) and memory optimization for address mode
/// (CodeGenPrepare.cpp)
-bool
-NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
// AddrMode - This represents an addressing mode of:
// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
@@ -1345,10 +1378,10 @@ NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
}
switch (AM.Scale) {
- case 0: // "r", "r+i" or "i" is allowed
+ case 0: // "r", "r+i" or "i" is allowed
break;
case 1:
- if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
+ if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
return false;
// Otherwise we have r+i.
break;
@@ -1385,8 +1418,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
return TargetLowering::getConstraintType(Constraint);
}
-
-std::pair<unsigned, const TargetRegisterClass*>
+std::pair<unsigned, const TargetRegisterClass *>
NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const {
if (Constraint.size() == 1) {
@@ -1409,8 +1441,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
-
-
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
return 4;
@@ -1418,7 +1448,7 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
- SmallVectorImpl<SDValue>& Results) {
+ SmallVectorImpl<SDValue> &Results) {
EVT ResVT = N->getValueType(0);
DebugLoc DL = N->getDebugLoc();
@@ -1429,7 +1459,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
// but I'm leaving that as a TODO for now.
assert(ResVT.isSimple() && "Can only handle simple types");
switch (ResVT.getSimpleVT().SimpleTy) {
- default: return;
+ default:
+ return;
case MVT::v2i8:
case MVT::v2i16:
case MVT::v2i32:
@@ -1460,7 +1491,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
SDVTList LdResVTs;
switch (NumElts) {
- default: return;
+ default:
+ return;
case 2:
Opcode = NVPTXISD::LoadV2;
LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
@@ -1500,14 +1532,14 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
SDValue LoadChain = NewLD.getValue(NumElts);
- SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+ SDValue BuildVec =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
Results.push_back(BuildVec);
Results.push_back(LoadChain);
}
-static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
- SelectionDAG &DAG,
+static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) {
SDValue Chain = N->getOperand(0);
SDValue Intrin = N->getOperand(1);
@@ -1515,8 +1547,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
// Get the intrinsic ID
unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
- switch(IntrinNo) {
- default: return;
+ switch (IntrinNo) {
+ default:
+ return;
case Intrinsic::nvvm_ldg_global_i:
case Intrinsic::nvvm_ldg_global_f:
case Intrinsic::nvvm_ldg_global_p:
@@ -1544,10 +1577,12 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
SDVTList LdResVTs;
switch (NumElts) {
- default: return;
+ default:
+ return;
case 2:
- switch(IntrinNo) {
- default: return;
+ switch (IntrinNo) {
+ default:
+ return;
case Intrinsic::nvvm_ldg_global_i:
case Intrinsic::nvvm_ldg_global_f:
case Intrinsic::nvvm_ldg_global_p:
@@ -1562,8 +1597,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
break;
case 4: {
- switch(IntrinNo) {
- default: return;
+ switch (IntrinNo) {
+ default:
+ return;
case Intrinsic::nvvm_ldg_global_i:
case Intrinsic::nvvm_ldg_global_f:
case Intrinsic::nvvm_ldg_global_p:
@@ -1586,29 +1622,31 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
// Copy regular operands
OtherOps.push_back(Chain); // Chain
- // Skip operand 1 (intrinsic ID)
- // Others
+ // Skip operand 1 (intrinsic ID)
+ // Others
for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
OtherOps.push_back(N->getOperand(i));
MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
- SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
- OtherOps.size(), MemSD->getMemoryVT(),
- MemSD->getMemOperand());
+ SDValue NewLD = DAG.getMemIntrinsicNode(
+ Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(),
+ MemSD->getMemoryVT(), MemSD->getMemOperand());
SmallVector<SDValue, 4> ScalarRes;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Res = NewLD.getValue(i);
if (NeedTrunc)
- Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+ Res =
+ DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
ScalarRes.push_back(Res);
}
SDValue LoadChain = NewLD.getValue(NumElts);
- SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+ SDValue BuildVec =
+ DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
Results.push_back(BuildVec);
Results.push_back(LoadChain);
@@ -1629,10 +1667,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
// We make sure the memory type is i8, which will be used during isel
// to select the proper instruction.
- SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL,
- LdResVTs, &Ops[0],
- Ops.size(), MVT::i8,
- MemSD->getMemOperand());
+ SDValue NewLD =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0],
+ Ops.size(), MVT::i8, MemSD->getMemOperand());
Results.push_back(NewLD.getValue(0));
Results.push_back(NewLD.getValue(1));
@@ -1641,11 +1678,11 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
}
}
-void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const {
+void NVPTXTargetLowering::ReplaceNodeResults(
+ SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
switch (N->getOpcode()) {
- default: report_fatal_error("Unhandled custom legalization");
+ default:
+ report_fatal_error("Unhandled custom legalization");
case ISD::LOAD:
ReplaceLoadVector(N, DAG, Results);
return;
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 14afc148cb..3cd49d38af 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -87,7 +87,7 @@ public:
bool isTypeSupportedInIntrinsic(MVT VT) const;
- bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
unsigned Intrinsic) const;
/// isLegalAddressingMode - Return true if the addressing mode represented
@@ -107,14 +107,13 @@ public:
}
ConstraintType getConstraintType(const std::string &Constraint) const;
- std::pair<unsigned, const TargetRegisterClass*>
+ std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
- virtual SDValue
- LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ virtual SDValue LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const;
@@ -136,17 +135,15 @@ public:
NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const {
- return MVT::i32;
- }
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
virtual bool shouldSplitVectorElementType(EVT VT) const;
private:
- const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
+ const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
- SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT =
- MVT::i32) const;
+ SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
+ EVT = MVT::i32) const;
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
@@ -159,8 +156,7 @@ private:
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
- virtual void ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
};
} // namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 9e73d80c28..33a63c26f4 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -23,61 +23,55 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include <cstdio>
-
using namespace llvm;
// FIXME: Add the subtarget support on this constructor.
NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
-: NVPTXGenInstrInfo(),
- TM(tm),
- RegInfo(*this, *TM.getSubtargetImpl()) {}
-
+ : NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {}
-void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+void NVPTXInstrInfo::copyPhysReg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg, bool KillSrc) const {
if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
NVPTX::Int32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
- NVPTX::Int8RegsRegClass.contains(SrcReg))
+ NVPTX::Int8RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
- NVPTX::Int1RegsRegClass.contains(SrcReg))
+ NVPTX::Int1RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
- NVPTX::Float32RegsRegClass.contains(SrcReg))
+ NVPTX::Float32RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
- NVPTX::Int16RegsRegClass.contains(SrcReg))
+ NVPTX::Int16RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
- NVPTX::Int64RegsRegClass.contains(SrcReg))
+ NVPTX::Int64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
- NVPTX::Float64RegsRegClass.contains(SrcReg))
+ NVPTX::Float64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
else {
llvm_unreachable("Don't know how to copy a register");
}
}
-bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg,
+bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DestReg) const {
// Look for the appropriate part of TSFlags
bool isMove = false;
- unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >>
- NVPTX::SimpleMoveShift;
+ unsigned TSFlags =
+ (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> NVPTX::SimpleMoveShift;
isMove = (TSFlags == 1);
if (isMove) {
@@ -94,10 +88,10 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
return false;
}
-bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
-{
+bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const {
switch (MI.getOpcode()) {
- default: return false;
+ default:
+ return false;
case NVPTX::INT_PTX_SREG_NTID_X:
case NVPTX::INT_PTX_SREG_NTID_Y:
case NVPTX::INT_PTX_SREG_NTID_Z:
@@ -115,12 +109,11 @@ bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
}
}
-
bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
unsigned &AddrSpace) const {
bool isLoad = false;
- unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >>
- NVPTX::isLoadShift;
+ unsigned TSFlags =
+ (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> NVPTX::isLoadShift;
isLoad = (TSFlags == 1);
if (isLoad)
AddrSpace = getLdStCodeAddrSpace(MI);
@@ -130,15 +123,14 @@ bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
unsigned &AddrSpace) const {
bool isStore = false;
- unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >>
- NVPTX::isStoreShift;
+ unsigned TSFlags =
+ (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift;
isStore = (TSFlags == 1);
if (isStore)
AddrSpace = getLdStCodeAddrSpace(MI);
return isStore;
}
-
bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
unsigned addrspace = 0;
if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
@@ -152,7 +144,6 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
return true;
}
-
/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
/// implemented for a target). Upon success, this returns false and returns
@@ -176,11 +167,9 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
/// Note that RemoveBranch and InsertBranch must be implemented to support
/// cases where this method returns success.
///
-bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
+bool NVPTXInstrInfo::AnalyzeBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
@@ -208,14 +197,13 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineInstr *SecondLastInst = I;
// If there are three terminators, we don't know what sort of block this is.
- if (SecondLastInst && I != MBB.begin() &&
- isUnpredicatedTerminator(--I))
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
return true;
// If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
LastInst->getOpcode() == NVPTX::GOTO) {
- TBB = SecondLastInst->getOperand(1).getMBB();
+ TBB = SecondLastInst->getOperand(1).getMBB();
Cond.push_back(SecondLastInst->getOperand(0));
FBB = LastInst->getOperand(0).getMBB();
return false;
@@ -238,7 +226,8 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin()) return 0;
+ if (I == MBB.begin())
+ return 0;
--I;
if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
return 0;
@@ -248,7 +237,8 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
I = MBB.end();
- if (I == MBB.begin()) return 1;
+ if (I == MBB.begin())
+ return 1;
--I;
if (I->getOpcode() != NVPTX::CBranch)
return 1;
@@ -258,11 +248,9 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 2;
}
-unsigned
-NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
+unsigned NVPTXInstrInfo::InsertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -270,17 +258,16 @@ NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
// One-way branch.
if (FBB == 0) {
- if (Cond.empty()) // Unconditional branch
+ if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
- else // Conditional branch
- BuildMI(&MBB, DL, get(NVPTX::CBranch))
- .addReg(Cond[0].getReg()).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg())
+ .addMBB(TBB);
return 1;
}
// Two-way Conditional Branch.
- BuildMI(&MBB, DL, get(NVPTX::CBranch))
- .addReg(Cond[0].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB);
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
return 2;
}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 7b8e218b05..b1972e9b72 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -23,8 +23,7 @@
namespace llvm {
-class NVPTXInstrInfo : public NVPTXGenInstrInfo
-{
+class NVPTXInstrInfo : public NVPTXGenInstrInfo {
NVPTXTargetMachine &TM;
const NVPTXRegisterInfo RegInfo;
public:
@@ -50,30 +49,26 @@ public:
* const TargetRegisterClass *RC) const;
*/
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const ;
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg,
+ virtual void copyPhysReg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg, bool KillSrc) const;
+ virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DestReg) const;
bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
bool isReadSpecialReg(MachineInstr &MI) const;
- virtual bool CanTailMerge(const MachineInstr *MI) const ;
+ virtual bool CanTailMerge(const MachineInstr *MI) const;
// Branch analysis.
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
+ virtual bool AnalyzeBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
- virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
+ virtual unsigned InsertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
- return MI.getOperand(2).getImm();
+ return MI.getOperand(2).getImm();
}
};
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index f7fa7aa61d..7c257b4c6a 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -25,18 +25,15 @@
using namespace llvm;
-namespace llvm {
-FunctionPass *createLowerAggrCopies();
-}
+namespace llvm { FunctionPass *createLowerAggrCopies(); }
char NVPTXLowerAggrCopies::ID = 0;
// Lower MemTransferInst or load-store pair to loop
-static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
- Value *dstAddr, Value *len,
- //unsigned numLoads,
- bool srcVolatile, bool dstVolatile,
- LLVMContext &Context, Function &F) {
+static void convertTransferToLoop(
+ Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
+ //unsigned numLoads,
+ bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
Type *indType = len->getType();
BasicBlock *origBB = splitAt->getParent();
@@ -48,10 +45,8 @@ static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
// srcAddr and dstAddr are expected to be pointer types,
// so no check is made here.
- unsigned srcAS =
- dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
- unsigned dstAS =
- dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
+ unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
// Cast pointers to (char *)
srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
@@ -86,12 +81,11 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
origBB->getTerminator()->setSuccessor(0, loopBB);
IRBuilder<> builder(origBB, origBB->getTerminator());
- unsigned dstAS =
- dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
// Cast pointer to the type of value getting stored
- dstAddr = builder.CreateBitCast(dstAddr,
- PointerType::get(val->getType(), dstAS));
+ dstAddr =
+ builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
IRBuilder<> loop(loopBB);
PHINode *ind = loop.CreatePHI(len->getType(), 0);
@@ -120,24 +114,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
//BasicBlock *bb = BI;
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
- ++II) {
- if (LoadInst * load = dyn_cast<LoadInst>(II)) {
+ ++II) {
+ if (LoadInst *load = dyn_cast<LoadInst>(II)) {
- if (load->hasOneUse() == false) continue;
+ if (load->hasOneUse() == false)
+ continue;
- if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue;
+ if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+ continue;
User *use = *(load->use_begin());
- if (StoreInst * store = dyn_cast<StoreInst>(use)) {
+ if (StoreInst *store = dyn_cast<StoreInst>(use)) {
if (store->getOperand(0) != load) //getValueOperand
- continue;
+ continue;
aggrLoads.push_back(load);
}
- } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) {
+ } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
Value *len = intr->getLength();
// If the number of elements being copied is greater
// than MaxAggrCopySize, lower it to a loop
- if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) {
+ if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
if (len_int->getZExtValue() >= MaxAggrCopySize) {
aggrMemcpys.push_back(intr);
}
@@ -145,9 +141,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
// turn variable length memcpy/memmov into loop
aggrMemcpys.push_back(intr);
}
- } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) {
+ } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
Value *len = memsetintr->getLength();
- if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) {
+ if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
if (len_int->getZExtValue() >= MaxAggrCopySize) {
aggrMemsets.push_back(memsetintr);
}
@@ -158,8 +154,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
}
}
}
- if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0)
- && (aggrMemsets.size() == 0)) return false;
+ if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
+ (aggrMemsets.size() == 0))
+ return false;
//
// Do the transformation of an aggr load/copy/set to a loop
diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h
index b4a4dbce98..a95c16b1e6 100644
--- a/lib/Target/NVPTX/NVPTXNumRegisters.h
+++ b/lib/Target/NVPTX/NVPTXNumRegisters.h
@@ -11,10 +11,6 @@
#ifndef NVPTX_NUM_REGISTERS_H
#define NVPTX_NUM_REGISTERS_H
-namespace llvm {
-
-const unsigned NVPTXNumRegisters = 396;
-
-}
+namespace llvm { const unsigned NVPTXNumRegisters = 396; }
#endif
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 350a2c5551..282465359b 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -23,69 +23,54 @@
#include "llvm/MC/MachineLocation.h"
#include "llvm/Target/TargetInstrInfo.h"
-
using namespace llvm;
-namespace llvm
-{
-std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
+namespace llvm {
+std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass) {
return ".f32";
}
if (RC == &NVPTX::Float64RegsRegClass) {
return ".f64";
- }
- else if (RC == &NVPTX::Int64RegsRegClass) {
+ } else if (RC == &NVPTX::Int64RegsRegClass) {
return ".s64";
- }
- else if (RC == &NVPTX::Int32RegsRegClass) {
+ } else if (RC == &NVPTX::Int32RegsRegClass) {
return ".s32";
- }
- else if (RC == &NVPTX::Int16RegsRegClass) {
+ } else if (RC == &NVPTX::Int16RegsRegClass) {
return ".s16";
}
- // Int8Regs become 16-bit registers in PTX
- else if (RC == &NVPTX::Int8RegsRegClass) {
+ // Int8Regs become 16-bit registers in PTX
+ else if (RC == &NVPTX::Int8RegsRegClass) {
return ".s16";
- }
- else if (RC == &NVPTX::Int1RegsRegClass) {
+ } else if (RC == &NVPTX::Int1RegsRegClass) {
return ".pred";
- }
- else if (RC == &NVPTX::SpecialRegsRegClass) {
+ } else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
- }
- else {
+ } else {
return "INTERNAL";
}
return "";
}
-std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
+std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass) {
return "%f";
}
if (RC == &NVPTX::Float64RegsRegClass) {
return "%fd";
- }
- else if (RC == &NVPTX::Int64RegsRegClass) {
+ } else if (RC == &NVPTX::Int64RegsRegClass) {
return "%rd";
- }
- else if (RC == &NVPTX::Int32RegsRegClass) {
+ } else if (RC == &NVPTX::Int32RegsRegClass) {
return "%r";
- }
- else if (RC == &NVPTX::Int16RegsRegClass) {
+ } else if (RC == &NVPTX::Int16RegsRegClass) {
return "%rs";
- }
- else if (RC == &NVPTX::Int8RegsRegClass) {
+ } else if (RC == &NVPTX::Int8RegsRegClass) {
return "%rc";
- }
- else if (RC == &NVPTX::Int1RegsRegClass) {
+ } else if (RC == &NVPTX::Int1RegsRegClass) {
return "%p";
- }
- else if (RC == &NVPTX::SpecialRegsRegClass) {
+ } else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
- }
- else {
+ } else {
return "INTERNAL";
}
return "";
@@ -94,23 +79,22 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
const NVPTXSubtarget &st)
- : NVPTXGenRegisterInfo(0),
- Is64Bit(st.is64Bit()) {}
+ : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {}
#define GET_REGINFO_TARGET_DESC
#include "NVPTXGenRegisterInfo.inc"
/// NVPTX Callee Saved Registers
-const uint16_t* NVPTXRegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const {
+const uint16_t *
+NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
static const uint16_t CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs;
}
// NVPTX Callee Saved Reg Classes
-const TargetRegisterClass* const*
+const TargetRegisterClass *const *
NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+ static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 };
return CalleeSavedRegClasses;
}
@@ -119,10 +103,9 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-void NVPTXRegisterInfo::
-eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
+void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
@@ -130,15 +113,14 @@ eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MI.getParent()->getParent();
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MI.getOperand(FIOperandNum+1).getImm();
+ MI.getOperand(FIOperandNum + 1).getImm();
// Using I0 as the frame pointer
MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false);
- MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
-int NVPTXRegisterInfo::
-getDwarfRegNum(unsigned RegNum, bool isEH) const {
+int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
return 0;
}
@@ -146,7 +128,4 @@ unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return NVPTX::VRFrame;
}
-unsigned NVPTXRegisterInfo::getRARegister() const {
- return 0;
-}
-
+unsigned NVPTXRegisterInfo::getRARegister() const { return 0; }
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 69f73f213c..d406820661 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -17,7 +17,6 @@
#include "ManagedStringPool.h"
#include "llvm/Target/TargetRegisterInfo.h"
-
#define GET_REGINFO_HEADER
#include "NVPTXGenRegisterInfo.inc"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -33,30 +32,28 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
private:
bool Is64Bit;
// Hold Strings that can be free'd all together with NVPTXRegisterInfo
- ManagedStringPool ManagedStrPool;
+ ManagedStringPool ManagedStrPool;
public:
- NVPTXRegisterInfo(const TargetInstrInfo &tii,
- const NVPTXSubtarget &st);
-
+ NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st);
//------------------------------------------------------
// Pure virtual functions from TargetRegisterInfo
//------------------------------------------------------
// NVPTX callee saved registers
- virtual const uint16_t*
+ virtual const uint16_t *
getCalleeSavedRegs(const MachineFunction *MF = 0) const;
// NVPTX callee saved register classes
- virtual const TargetRegisterClass* const *
+ virtual const TargetRegisterClass *const *
getCalleeSavedRegClasses(const MachineFunction *MF) const;
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
- virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS=NULL) const;
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
virtual unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -74,11 +71,9 @@ public:
};
-
-std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
-std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
+std::string getNVPTXRegClassName(const TargetRegisterClass *RC);
+std::string getNVPTXRegClassStr(const TargetRegisterClass *RC);
} // end namespace llvm
-
#endif
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
index babe29500d..83dfe12089 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
@@ -21,9 +21,7 @@
using namespace llvm;
-namespace llvm {
-FunctionPass *createSplitBBatBarPass();
-}
+namespace llvm { FunctionPass *createSplitBBatBarPass(); }
char NVPTXSplitBBatBar::ID = 0;
@@ -72,6 +70,4 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
// This interface will most likely not be necessary, because this pass will
// not be invoked by the driver, but will be used as a prerequisite to
// another pass.
-FunctionPass *llvm::createSplitBBatBarPass() {
- return new NVPTXSplitBBatBar();
-}
+FunctionPass *llvm::createSplitBBatBarPass() { return new NVPTXSplitBBatBar(); }
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 7b62cce2c6..2dcd73dcff 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -22,27 +22,23 @@ using namespace llvm;
// Select Driver Interface
#include "llvm/Support/CommandLine.h"
namespace {
-cl::opt<NVPTX::DrvInterface>
-DriverInterface(cl::desc("Choose driver interface:"),
- cl::values(
- clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
- clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
- clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"),
- clEnumValEnd),
- cl::init(NVPTX::NVCL));
+cl::opt<NVPTX::DrvInterface> DriverInterface(
+ cl::desc("Choose driver interface:"),
+ cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
+ clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
+ clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd),
+ cl::init(NVPTX::NVCL));
}
NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit)
-: NVPTXGenSubtargetInfo(TT, CPU, FS),
- Is64Bit(is64Bit),
- PTXVersion(0),
- SmVersion(10) {
+ : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
+ SmVersion(20) {
drvInterface = DriverInterface;
// Provide the default CPU if none
- std::string defCPU = "sm_10";
+ std::string defCPU = "sm_20";
ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS);
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index beea77e38d..670077daaa 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -25,7 +25,7 @@
namespace llvm {
class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
-
+
std::string TargetName;
NVPTX::DrvInterface drvInterface;
bool Is64Bit;
@@ -61,13 +61,10 @@ public:
bool hasLDU() const { return SmVersion >= 20; }
bool hasGenericLdSt() const { return SmVersion >= 20; }
inline bool hasHWROT32() const { return false; }
- inline bool hasSWROT32() const {
- return true;
- }
- inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; }
+ inline bool hasSWROT32() const { return true; }
+ inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }
-
bool is64Bit() const { return Is64Bit; }
unsigned int getSmVersion() const { return SmVersion; }
@@ -96,4 +93,4 @@ public:
} // End llvm namespace
-#endif // NVPTXSUBTARGET_H
+#endif // NVPTXSUBTARGET_H
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index cd765fa8cb..67ca6b58e5 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -45,9 +45,11 @@
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
-
using namespace llvm;
+namespace llvm {
+void initializeNVVMReflectPass(PassRegistry&);
+}
extern "C" void LLVMInitializeNVPTXTarget() {
// Register the target.
@@ -57,52 +59,42 @@ extern "C" void LLVMInitializeNVPTXTarget() {
RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
+ // FIXME: This pass is really intended to be invoked during IR optimization,
+ // but it's very NVPTX-specific.
+ initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
}
-NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
- StringRef TT,
- StringRef CPU,
- StringRef FS,
- const TargetOptions& Options,
- Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool is64bit)
-: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, is64bit),
- DL(Subtarget.getDataLayout()),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
-/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
-}
-
-
+NVPTXTargetMachine::NVPTXTargetMachine(
+ const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+ FrameLowering(
+ *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {}
void NVPTXTargetMachine32::anchor() {}
-NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
-: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
-}
+NVPTXTargetMachine32::NVPTXTargetMachine32(
+ const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
void NVPTXTargetMachine64::anchor() {}
-NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
-: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
-}
-
+NVPTXTargetMachine64::NVPTXTargetMachine64(
+ const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
namespace llvm {
class NVPTXPassConfig : public TargetPassConfig {
public:
NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {}
NVPTXTargetMachine &getNVPTXTargetMachine() const {
return getTM<NVPTXTargetMachine>();
@@ -126,6 +118,4 @@ bool NVPTXPassConfig::addInstSelector() {
return false;
}
-bool NVPTXPassConfig::addPreRegAlloc() {
- return false;
-}
+bool NVPTXPassConfig::addPreRegAlloc() { return false; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 1a732be1ad..5fbcf735b4 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-
#ifndef NVPTX_TARGETMACHINE_H
#define NVPTX_TARGETMACHINE_H
@@ -31,42 +30,40 @@ namespace llvm {
/// NVPTXTargetMachine
///
class NVPTXTargetMachine : public LLVMTargetMachine {
- NVPTXSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- NVPTXInstrInfo InstrInfo;
- NVPTXTargetLowering TLInfo;
- TargetSelectionDAGInfo TSInfo;
+ NVPTXSubtarget Subtarget;
+ const DataLayout DL; // Calculates type size & alignment
+ NVPTXInstrInfo InstrInfo;
+ NVPTXTargetLowering TLInfo;
+ TargetSelectionDAGInfo TSInfo;
// NVPTX does not have any call stack frame, but need a NVPTX specific
// FrameLowering class because TargetFrameLowering is abstract.
- NVPTXFrameLowering FrameLowering;
+ NVPTXFrameLowering FrameLowering;
// Hold Strings that can be free'd all together with NVPTXTargetMachine
- ManagedStringPool ManagedStrPool;
+ ManagedStringPool ManagedStrPool;
//bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
// bool DisableVerify, MCContext *&OutCtx);
public:
- NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU,
- StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OP,
- bool is64bit);
+ NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
virtual const TargetFrameLowering *getFrameLowering() const {
return &FrameLowering;
}
- virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const DataLayout *getDataLayout() const { return &DL;}
- virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;}
+ virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const DataLayout *getDataLayout() const { return &DL; }
+ virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
virtual const NVPTXRegisterInfo *getRegisterInfo() const {
return &(InstrInfo.getRegisterInfo());
}
virtual NVPTXTargetLowering *getTargetLowering() const {
- return const_cast<NVPTXTargetLowering*>(&TLInfo);
+ return const_cast<NVPTXTargetLowering *>(&TLInfo);
}
virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
@@ -79,22 +76,19 @@ public:
//virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
ManagedStringPool *getManagedStrPool() const {
- return const_cast<ManagedStringPool*>(&ManagedStrPool);
+ return const_cast<ManagedStringPool *>(&ManagedStrPool);
}
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
// Emission of machine code through JITCodeEmitter is not supported.
- virtual bool addPassesToEmitMachineCode(PassManagerBase &,
- JITCodeEmitter &,
+ virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
bool = true) {
return true;
}
// Emission of machine code through MCJIT is not supported.
- virtual bool addPassesToEmitMC(PassManagerBase &,
- MCContext *&,
- raw_ostream &,
+ virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
bool = true) {
return true;
}
@@ -119,7 +113,6 @@ public:
CodeGenOpt::Level OL);
};
-
} // end namespace llvm
#endif
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index b5698a2fc0..6ab0e08ad0 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -46,45 +46,43 @@ public:
}
virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
- TextSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getText());
- DataSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getDataRel());
- BSSSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getBSS());
- ReadOnlySection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getReadOnly());
+ TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
+ DataSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
+ BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS());
+ ReadOnlySection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly());
- StaticCtorSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- StaticDtorSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- LSDASection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- EHFrameSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfLineSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfStrSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfLocSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
- DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF,
- SectionKind::getMetadata());
+ StaticCtorSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ StaticDtorSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ LSDASection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ EHFrameSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfAbbrevSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfInfoSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfLineSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfFrameSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfPubTypesSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfDebugInlineSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfStrSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfLocSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfARangesSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfRangesSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfMacroInfoSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
virtual const MCSection *getSectionForConstant(SectionKind Kind) const {
@@ -93,8 +91,7 @@ public:
virtual const MCSection *
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang,
- const TargetMachine &TM) const {
+ Mangler *Mang, const TargetMachine &TM) const {
return DataSection;
}
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index 1ccc9f7c02..6786eb0224 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -34,7 +34,6 @@ typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
ManagedStatic<per_module_annot_t> annotationCache;
-
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
assert(md && "Invalid mdnode for annotation");
assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
@@ -46,7 +45,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
assert(prop && "Annotation property not a string");
// value
- ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1));
+ ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1));
assert(Val && "Value operand not a constant int");
std::string keyname = prop->getString().str();
@@ -120,9 +119,9 @@ bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
bool llvm::isTexture(const llvm::Value &val) {
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
unsigned annot;
- if (llvm::findOneNVVMAnnotation(gv,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
- annot)) {
+ if (llvm::findOneNVVMAnnotation(
+ gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
+ annot)) {
assert((annot == 1) && "Unexpected annotation on a texture symbol");
return true;
}
@@ -133,9 +132,9 @@ bool llvm::isTexture(const llvm::Value &val) {
bool llvm::isSurface(const llvm::Value &val) {
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
unsigned annot;
- if (llvm::findOneNVVMAnnotation(gv,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
- annot)) {
+ if (llvm::findOneNVVMAnnotation(
+ gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
+ annot)) {
assert((annot == 1) && "Unexpected annotation on a surface symbol");
return true;
}
@@ -146,9 +145,9 @@ bool llvm::isSurface(const llvm::Value &val) {
bool llvm::isSampler(const llvm::Value &val) {
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
unsigned annot;
- if (llvm::findOneNVVMAnnotation(gv,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
- annot)) {
+ if (llvm::findOneNVVMAnnotation(
+ gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+ annot)) {
assert((annot == 1) && "Unexpected annotation on a sampler symbol");
return true;
}
@@ -156,9 +155,9 @@ bool llvm::isSampler(const llvm::Value &val) {
if (const Argument *arg = dyn_cast<Argument>(&val)) {
const Function *func = arg->getParent();
std::vector<unsigned> annot;
- if (llvm::findAllNVVMAnnotation(func,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
- annot)) {
+ if (llvm::findAllNVVMAnnotation(
+ func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+ annot)) {
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
return true;
}
@@ -171,8 +170,9 @@ bool llvm::isImageReadOnly(const llvm::Value &val) {
const Function *func = arg->getParent();
std::vector<unsigned> annot;
if (llvm::findAllNVVMAnnotation(func,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
- annot)) {
+ llvm::PropertyAnnotationNames[
+ llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
+ annot)) {
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
return true;
}
@@ -185,8 +185,9 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) {
const Function *func = arg->getParent();
std::vector<unsigned> annot;
if (llvm::findAllNVVMAnnotation(func,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
- annot)) {
+ llvm::PropertyAnnotationNames[
+ llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
+ annot)) {
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
return true;
}
@@ -214,52 +215,44 @@ std::string llvm::getSamplerName(const llvm::Value &val) {
}
bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X],
- x));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x));
}
bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y],
- y));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y));
}
bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z],
- z));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z));
}
bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X],
- x));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x));
}
bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y],
- y));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y));
}
bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z],
- z));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z));
}
bool llvm::getMinCTASm(const Function &F, unsigned &x) {
- return (llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM],
- x));
+ return (llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x));
}
bool llvm::isKernelFunction(const Function &F) {
unsigned x = 0;
- bool retval = llvm::findOneNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION],
- x);
+ bool retval = llvm::findOneNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
if (retval == false) {
// There is no NVVM metadata, check the calling convention
if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
@@ -267,20 +260,19 @@ bool llvm::isKernelFunction(const Function &F) {
else
return false;
}
- return (x==1);
+ return (x == 1);
}
bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
std::vector<unsigned> Vs;
- bool retval = llvm::findAllNVVMAnnotation(&F,
- llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN],
- Vs);
+ bool retval = llvm::findAllNVVMAnnotation(
+ &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
if (retval == false)
return false;
- for (int i=0, e=Vs.size(); i<e; i++) {
+ for (int i = 0, e = Vs.size(); i < e; i++) {
unsigned v = Vs[i];
- if ( (v >> 16) == index ) {
- align = v & 0xFFFF;
+ if ((v >> 16) == index) {
+ align = v & 0xFFFF;
return true;
}
}
@@ -289,16 +281,15 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
if (MDNode *alignNode = I.getMetadata("callalign")) {
- for (int i=0, n = alignNode->getNumOperands();
- i<n; i++) {
+ for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
+ dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
unsigned v = CI->getZExtValue();
- if ( (v>>16) == index ) {
+ if ((v >> 16) == index) {
align = v & 0xFFFF;
return true;
}
- if ( (v>>16) > index ) {
+ if ((v >> 16) > index) {
return false;
}
}
@@ -337,8 +328,8 @@ bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
// consider several special intrinsics in striping pointer casts, and
// provide an option to ignore GEP indicies for find out the base address only
// which could be used in simple alias disambigurate.
-const Value *llvm::skipPointerTransfer(const Value *V,
- bool ignore_GEP_indices) {
+const Value *
+llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
V = V->stripPointerCasts();
while (true) {
if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
@@ -360,8 +351,8 @@ const Value *llvm::skipPointerTransfer(const Value *V,
// - ignore GEP indicies for find out the base address only, and
// - tracking PHINode
// which could be used in simple alias disambigurate.
-const Value *llvm::skipPointerTransfer(const Value *V,
- std::set<const Value *> &processed) {
+const Value *
+llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
if (processed.find(V) != processed.end())
return NULL;
processed.insert(V);
@@ -406,7 +397,6 @@ const Value *llvm::skipPointerTransfer(const Value *V,
return V;
}
-
// The following are some useful utilities for debuggung
BasicBlock *llvm::getParentBlock(Value *v) {
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index 247e09b8bc..a208004297 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -23,8 +23,7 @@
#include <string>
#include <vector>
-namespace llvm
-{
+namespace llvm {
#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
@@ -64,8 +63,7 @@ bool isBarrierIntrinsic(llvm::Intrinsic::ID);
/// to pass into type construction of CallInst ctors. This turns a null
/// terminated list of pointers (or other value types) into a real live vector.
///
-template<typename T>
-inline std::vector<T> make_vector(T A, ...) {
+template <typename T> inline std::vector<T> make_vector(T A, ...) {
va_list Args;
va_start(Args, A);
std::vector<T> Result;
@@ -78,8 +76,8 @@ inline std::vector<T> make_vector(T A, ...) {
bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
-const Value *skipPointerTransfer(const Value *V,
- std::set<const Value *> &processed);
+const Value *
+skipPointerTransfer(const Value *V, std::set<const Value *> &processed);
BasicBlock *getParentBlock(Value *v);
Function *getParentFunction(Value *v);
void dumpBlock(Value *v, char *blockName);
diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp
index 6a0e5328f6..5f074b33a2 100644
--- a/lib/Target/NVPTX/NVPTXutil.cpp
+++ b/lib/Target/NVPTX/NVPTXutil.cpp
@@ -18,8 +18,7 @@ using namespace llvm;
namespace llvm {
-bool isParamLoad(const MachineInstr *MI)
-{
+bool isParamLoad(const MachineInstr *MI) {
if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
(MI->getOpcode() != NVPTX::LD_i64_avar))
return false;
@@ -30,13 +29,11 @@ bool isParamLoad(const MachineInstr *MI)
return true;
}
-#define DATA_MASK 0x7f
-#define DIGIT_WIDTH 7
-#define MORE_BYTES 0x80
+#define DATA_MASK 0x7f
+#define DIGIT_WIDTH 7
+#define MORE_BYTES 0x80
-static int encode_leb128(uint64_t val, int *nbytes,
- char *space, int splen)
-{
+static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
char *a;
char *end = space + splen;
@@ -61,29 +58,30 @@ static int encode_leb128(uint64_t val, int *nbytes,
#undef DIGIT_WIDTH
#undef MORE_BYTES
-uint64_t encode_leb128(const char *str)
-{
- union { uint64_t x; char a[8]; } temp64;
+uint64_t encode_leb128(const char *str) {
+ union {
+ uint64_t x;
+ char a[8];
+ } temp64;
temp64.x = 0;
- for (unsigned i=0,e=strlen(str); i!=e; ++i)
- temp64.a[i] = str[e-1-i];
+ for (unsigned i = 0, e = strlen(str); i != e; ++i)
+ temp64.a[i] = str[e - 1 - i];
char encoded[16];
int nbytes;
int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
- (void)retval;
- assert(retval == 0 &&
- "Encoding to leb128 failed");
+ (void) retval;
+ assert(retval == 0 && "Encoding to leb128 failed");
assert(nbytes <= 8 &&
"Cannot support register names with leb128 encoding > 8 bytes");
temp64.x = 0;
- for (int i=0; i<nbytes; ++i)
+ for (int i = 0; i < nbytes; ++i)
temp64.a[i] = encoded[i];
return temp64.x;
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
new file mode 100644
index 0000000000..3bbd1a13da
--- /dev/null
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -0,0 +1,193 @@
+//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces occurences of __nvvm_reflect("string") with an
+// integer based on -nvvm-reflect-list string=<int> option given to this pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#define NVVM_REFLECT_FUNCTION "__nvvm_reflect"
+
+using namespace llvm;
+
+namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
+
+namespace {
+class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass {
+private:
+ //std::map<std::string, int> VarMap;
+ StringMap<int> VarMap;
+ typedef std::map<std::string, int>::iterator VarMapIter;
+ Function *reflectFunction;
+
+public:
+ static char ID;
+ NVVMReflect() : ModulePass(ID) {
+ VarMap.clear();
+ reflectFunction = 0;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
+ virtual bool runOnModule(Module &);
+
+ void setVarMap();
+};
+}
+
+static cl::opt<bool>
+NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
+ cl::desc("NVVM reflection, enabled by default"));
+
+char NVVMReflect::ID = 0;
+INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
+ "Replace occurences of __nvvm_reflect() calls with 0/1", false,
+ false)
+
+static cl::list<std::string>
+ReflectList("nvvm-reflect-list", cl::value_desc("name=0/1"),
+ cl::desc("A list of string=num assignments, where num=0 or 1"),
+ cl::ValueRequired);
+
+/// This function does the same operation as perl's split.
+/// For example, calling this with ("a=1,b=2,c=0", ",") will
+/// return ["a=1", "b=2", "c=0"] in the return std::vector.
+static std::vector<std::string>
+Tokenize(const std::string &str, const std::string &delim) {
+ std::vector<std::string> tokens;
+
+ size_t p0 = 0, p1 = std::string::npos;
+ while (p0 != std::string::npos) {
+ p1 = str.find_first_of(delim, p0);
+ if (p1 != p0) {
+ std::string token = str.substr(p0, p1 - p0);
+ tokens.push_back(token);
+ }
+ p0 = str.find_first_not_of(delim, p1);
+ }
+
+ return tokens;
+}
+
+/// The command line can look as follows :
+/// -R a=1,b=2 -R c=3,d=0 -R e=2
+/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
+/// ReflectList vector. First, each of ReflectList[i] is 'split'
+/// using "," as the delimiter. Then each of this part is split
+/// using "=" as the delimiter.
+void NVVMReflect::setVarMap() {
+ for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
+ // DEBUG(dbgs() << "Option : " << ReflectList[i] << std::endl);
+ std::vector<std::string> nameValList = Tokenize(ReflectList[i], ",");
+ for (unsigned j = 0, ej = nameValList.size(); j != ej; ++j) {
+ std::vector<std::string> nameValPair = Tokenize(nameValList[j], "=");
+ assert(nameValPair.size() == 2 && "name=val expected");
+ std::stringstream valstream(nameValPair[1]);
+ int val;
+ valstream >> val;
+ assert((!(valstream.fail())) && "integer value expected");
+ VarMap[nameValPair[0]] = val;
+ }
+ }
+}
+
+bool NVVMReflect::runOnModule(Module &M) {
+ if (!NVVMReflectEnabled)
+ return false;
+
+ setVarMap();
+
+ reflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION);
+
+ // If reflect function is not used, then there will be
+ // no entry in the module.
+ if (reflectFunction == 0) {
+ return false;
+ }
+
+ // Validate _reflect function
+ assert(reflectFunction->isDeclaration() &&
+ "_reflect function should not have a body");
+ assert(reflectFunction->getReturnType()->isIntegerTy() &&
+ "_reflect's return type should be integer");
+
+ std::vector<Instruction *> toRemove;
+
+ // Go through the uses of reflectFunction in this Function.
+ // Each of them should a CallInst with a ConstantArray argument.
+ // First validate that. If the c-string corresponding to the
+ // ConstantArray can be found successfully, see if it can be
+ // found in VarMap. If so, replace the uses of CallInst with the
+ // value found in VarMap. If not, replace the use with value 0.
+ for (Value::use_iterator iter = reflectFunction->use_begin(),
+ iterEnd = reflectFunction->use_end();
+ iter != iterEnd; ++iter) {
+ assert(isa<CallInst>(*iter) && "Only a call instruction can use _reflect");
+ CallInst *reflect = cast<CallInst>(*iter);
+
+ assert((reflect->getNumOperands() == 2) &&
+ "Only one operand expect for _reflect function");
+ // In cuda, we will have an extra constant-to-generic conversion of
+ // the string.
+ const Value *conv = reflect->getArgOperand(0);
+ assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion");
+ const CallInst *convcall = cast<CallInst>(conv);
+ const Value *str = convcall->getArgOperand(0);
+ assert(isa<ConstantExpr>(str) &&
+ "Format of _reflect function not recognized");
+ const ConstantExpr *gep = cast<ConstantExpr>(str);
+
+ const Value *sym = gep->getOperand(0);
+ assert(isa<Constant>(sym) && "Format of _reflect function not recognized");
+
+ const Constant *symstr = cast<Constant>(sym);
+
+ assert(isa<ConstantDataSequential>(symstr->getOperand(0)) &&
+ "Format of _reflect function not recognized");
+
+ assert(cast<ConstantDataSequential>(symstr->getOperand(0))->isCString() &&
+ "Format of _reflect function not recognized");
+
+ std::string reflectArg =
+ cast<ConstantDataSequential>(symstr->getOperand(0))->getAsString();
+
+ reflectArg = reflectArg.substr(0, reflectArg.size() - 1);
+ // DEBUG(dbgs() << "Arg of _reflect : " << reflectArg << std::endl);
+
+ int reflectVal = 0; // The default value is 0
+ if (VarMap.find(reflectArg) != VarMap.end()) {
+ reflectVal = VarMap[reflectArg];
+ }
+ reflect->replaceAllUsesWith(
+ ConstantInt::get(reflect->getType(), reflectVal));
+ toRemove.push_back(reflect);
+ }
+ if (toRemove.size() == 0)
+ return false;
+
+ for (unsigned i = 0, e = toRemove.size(); i != e; ++i)
+ toRemove[i]->eraseFromParent();
+ return true;
+}
diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index 6c801b875e..cc7d4dc5ec 100644
--- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -17,7 +17,7 @@ Target llvm::TheNVPTXTarget64;
extern "C" void LLVMInitializeNVPTXTargetInfo() {
RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
- "NVIDIA PTX 32-bit");
+ "NVIDIA PTX 32-bit");
RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
- "NVIDIA PTX 64-bit");
+ "NVIDIA PTX 64-bit");
}
diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h
index a7347efd78..45cc0b8b67 100644
--- a/lib/Target/NVPTX/cl_common_defines.h
+++ b/lib/Target/NVPTX/cl_common_defines.h
@@ -24,22 +24,21 @@ enum {
CLK_LUMINANCE = 0x10B9
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
- ,
+ ,
CLK_Rx = 0x10BA,
CLK_RGx = 0x10BB,
CLK_RGBx = 0x10BC
#endif
};
-
typedef enum clk_channel_type {
// valid formats for float return types
- CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8
- CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
- CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8
- CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
- CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half
- CLK_FLOAT = 0x10DE, // four channel RGBA float
+ CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8
+ CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
+ CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8
+ CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
+ CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half
+ CLK_FLOAT = 0x10DE, // four channel RGBA float
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
CLK_UNORM_SHORT_565 = 0x10D4,
@@ -48,7 +47,7 @@ typedef enum clk_channel_type {
#endif
// valid only for integer return types
- CLK_SIGNED_INT8 = 0x10D7,
+ CLK_SIGNED_INT8 = 0x10D7,
CLK_SIGNED_INT16 = 0x10D8,
CLK_SIGNED_INT32 = 0x10D9,
CLK_UNSIGNED_INT8 = 0x10DA,
@@ -56,70 +55,68 @@ typedef enum clk_channel_type {
CLK_UNSIGNED_INT32 = 0x10DC,
// CI SPI for CPU
- __CLK_UNORM_INT8888 , // four channel ARGB unorm8
- __CLK_UNORM_INT8888R, // four channel BGRA unorm8
+ __CLK_UNORM_INT8888, // four channel ARGB unorm8
+ __CLK_UNORM_INT8888R, // four channel BGRA unorm8
__CLK_VALID_IMAGE_TYPE_COUNT,
__CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT,
- __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
- // represent any image type
- __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1
-}clk_channel_type;
+ __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
+ // represent any image type
+ __CLK_VALID_IMAGE_TYPE_MASK = (1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS) - 1
+} clk_channel_type;
typedef enum clk_sampler_type {
- __CLK_ADDRESS_BASE = 0,
- CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
- CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
- CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
- CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
- CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
+ __CLK_ADDRESS_BASE = 0,
+ CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
+ CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
- CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
+ CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
#endif
- __CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP |
- CLK_ADDRESS_CLAMP_TO_EDGE |
- CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
- __CLK_ADDRESS_BITS = 3, // number of bits required to
- // represent address info
-
- __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
- CLK_NORMALIZED_COORDS_FALSE = 0,
- CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
- __CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE |
- CLK_NORMALIZED_COORDS_TRUE,
- __CLK_NORMALIZED_BITS = 1, // number of bits required to
- // represent normalization
-
- __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE +
- __CLK_NORMALIZED_BITS,
- CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
- CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
- CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
- __CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR |
- CLK_FILTER_ANISOTROPIC,
- __CLK_FILTER_BITS = 2, // number of bits required to
- // represent address info
-
- __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
- CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
- CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
- CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
- __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR |
- CLK_MIP_ANISOTROPIC,
- __CLK_MIP_BITS = 2,
-
- __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
- __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
- __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
-
- __CLK_ANISOTROPIC_RATIO_BITS = 5,
- __CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >>
- (__CLK_ANISOTROPIC_RATIO_BITS-1)
+ __CLK_ADDRESS_MASK =
+ CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE |
+ CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
+ __CLK_ADDRESS_BITS = 3, // number of bits required to
+ // represent address info
+
+ __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
+ CLK_NORMALIZED_COORDS_FALSE = 0,
+ CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
+ __CLK_NORMALIZED_MASK =
+ CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE,
+ __CLK_NORMALIZED_BITS = 1, // number of bits required to
+ // represent normalization
+
+ __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS,
+ CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
+ CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
+ CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
+ __CLK_FILTER_MASK =
+ CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC,
+ __CLK_FILTER_BITS = 2, // number of bits required to
+ // represent address info
+
+ __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
+ CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
+ CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
+ CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
+ __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC,
+ __CLK_MIP_BITS = 2,
+
+ __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
+ __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
+ __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
+
+ __CLK_ANISOTROPIC_RATIO_BITS = 5,
+ __CLK_ANISOTROPIC_RATIO_MASK =
+ (int) 0x80000000 >> (__CLK_ANISOTROPIC_RATIO_BITS - 1)
} clk_sampler_type;
// Memory synchronization
-#define CLK_LOCAL_MEM_FENCE (1 << 0)
-#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+#define CLK_LOCAL_MEM_FENCE (1 << 0)
+#define CLK_GLOBAL_MEM_FENCE (1 << 1)
#endif // __CL_COMMON_DEFINES_H__
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 3d583060d1..bacc108c62 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -13,7 +13,7 @@
#define DEBUG_TYPE "asm-printer"
#include "PPCInstPrinter.h"
-#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -87,35 +87,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O,
const char *Modifier) {
unsigned Code = MI->getOperand(OpNo).getImm();
- if (!Modifier) {
- unsigned CCReg = MI->getOperand(OpNo+1).getReg();
- unsigned RegNo;
- switch (CCReg) {
- default: llvm_unreachable("Unknown CR register");
- case PPC::CR0: RegNo = 0; break;
- case PPC::CR1: RegNo = 1; break;
- case PPC::CR2: RegNo = 2; break;
- case PPC::CR3: RegNo = 3; break;
- case PPC::CR4: RegNo = 4; break;
- case PPC::CR5: RegNo = 5; break;
- case PPC::CR6: RegNo = 6; break;
- case PPC::CR7: RegNo = 7; break;
- }
-
- // Print the CR bit number. The Code is ((BI << 5) | BO) for a
- // BCC, but we must have the positive form here (BO == 12)
- unsigned BI = Code >> 5;
- assert((Code & 0xF) == 12 &&
- "BO in predicate bit must have the positive form");
-
- unsigned Value = 4*RegNo + BI;
- O << Value;
- return;
- }
if (StringRef(Modifier) == "cc") {
switch ((PPC::Predicate)Code) {
- case PPC::PRED_ALWAYS: return; // Don't print anything for always.
case PPC::PRED_LT: O << "lt"; return;
case PPC::PRED_LE: O << "le"; return;
case PPC::PRED_EQ: O << "eq"; return;
@@ -129,8 +103,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
assert(StringRef(Modifier) == "reg" &&
"Need to specify 'cc' or 'reg' as predicate op modifier!");
- // Don't print the register for 'always'.
- if (Code == PPC::PRED_ALWAYS) return;
printOperand(MI, OpNo+1, O);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index f24edf62ed..ec2657403e 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -30,13 +30,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
- case PPC::fixup_ppc_toc:
case PPC::fixup_ppc_tlsreg:
case PPC::fixup_ppc_nofixup:
return Value;
- case PPC::fixup_ppc_lo14:
- case PPC::fixup_ppc_toc16_ds:
- return (Value & 0xffff) << 2;
case PPC::fixup_ppc_brcond14:
return Value & 0xfffc;
case PPC::fixup_ppc_br24:
@@ -48,8 +44,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case PPC::fixup_ppc_ha16:
return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
case PPC::fixup_ppc_lo16:
- case PPC::fixup_ppc_toc16:
return Value & 0xffff;
+ case PPC::fixup_ppc_lo16_ds:
+ return Value & 0xfffc;
}
}
@@ -82,10 +79,7 @@ public:
{ "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_lo16", 16, 16, 0 },
{ "fixup_ppc_ha16", 16, 16, 0 },
- { "fixup_ppc_lo14", 16, 14, 0 },
- { "fixup_ppc_toc", 0, 64, 0 },
- { "fixup_ppc_toc16", 16, 16, 0 },
- { "fixup_ppc_toc16_ds", 16, 14, 0 },
+ { "fixup_ppc_lo16_ds", 16, 14, 0 },
{ "fixup_ppc_tlsreg", 0, 0, 0 },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
deleted file mode 100644
index 9c975c089e..0000000000
--- a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone helper functions and enum definitions for
-// the PPC target useful for the compiler back-end and the MC libraries.
-// As such, it deliberately does not include references to LLVM core
-// code gen types, passes, etc..
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PPCBASEINFO_H
-#define PPCBASEINFO_H
-
-#include "PPCMCTargetDesc.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
-/// getPPCRegisterNumbering - Given the enum value for some register, e.g.
-/// PPC::F14, return the number that it corresponds to (e.g. 14).
-inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) {
- using namespace PPC;
- switch (RegEnum) {
- case 0: return 0;
- case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0;
- case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1;
- case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2;
- case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3;
- case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4;
- case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5;
- case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6;
- case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7;
- case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8;
- case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9;
- case R10: case X10: case F10: case V10: case CR2EQ: return 10;
- case R11: case X11: case F11: case V11: case CR2UN: return 11;
- case R12: case X12: case F12: case V12: case CR3LT: return 12;
- case R13: case X13: case F13: case V13: case CR3GT: return 13;
- case R14: case X14: case F14: case V14: case CR3EQ: return 14;
- case R15: case X15: case F15: case V15: case CR3UN: return 15;
- case R16: case X16: case F16: case V16: case CR4LT: return 16;
- case R17: case X17: case F17: case V17: case CR4GT: return 17;
- case R18: case X18: case F18: case V18: case CR4EQ: return 18;
- case R19: case X19: case F19: case V19: case CR4UN: return 19;
- case R20: case X20: case F20: case V20: case CR5LT: return 20;
- case R21: case X21: case F21: case V21: case CR5GT: return 21;
- case R22: case X22: case F22: case V22: case CR5EQ: return 22;
- case R23: case X23: case F23: case V23: case CR5UN: return 23;
- case R24: case X24: case F24: case V24: case CR6LT: return 24;
- case R25: case X25: case F25: case V25: case CR6GT: return 25;
- case R26: case X26: case F26: case V26: case CR6EQ: return 26;
- case R27: case X27: case F27: case V27: case CR6UN: return 27;
- case R28: case X28: case F28: case V28: case CR7LT: return 28;
- case R29: case X29: case F29: case V29: case CR7GT: return 29;
- case R30: case X30: case F30: case V30: case CR7EQ: return 30;
- case R31: case X31: case F31: case V31: case CR7UN: return 31;
- default:
- llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
- }
-}
-
-} // end namespace llvm;
-
-#endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 61868d446f..84e4175e63 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -133,6 +133,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC_ADDR16_LO;
break;
+ case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+ Type = ELF::R_PPC64_TOC16;
+ break;
case MCSymbolRefExpr::VK_PPC_TOC16_LO:
Type = ELF::R_PPC64_TOC16_LO;
break;
@@ -144,35 +147,12 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
break;
}
break;
- case PPC::fixup_ppc_lo14:
- Type = ELF::R_PPC_ADDR14;
- break;
- case PPC::fixup_ppc_toc:
- Type = ELF::R_PPC64_TOC;
- break;
- case PPC::fixup_ppc_toc16:
+ case PPC::fixup_ppc_lo16_ds:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
- case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
- Type = ELF::R_PPC64_TPREL16_LO;
- break;
- case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
- Type = ELF::R_PPC64_DTPREL16_LO;
- break;
case MCSymbolRefExpr::VK_None:
- Type = ELF::R_PPC64_TOC16;
- break;
- case MCSymbolRefExpr::VK_PPC_TOC16_LO:
- Type = ELF::R_PPC64_TOC16_LO;
- break;
- case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
- Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+ Type = ELF::R_PPC64_ADDR16_DS;
break;
- }
- break;
- case PPC::fixup_ppc_toc16_ds:
- switch (Modifier) {
- default: llvm_unreachable("Unsupported Modifier");
case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
Type = ELF::R_PPC64_TOC16_DS;
break;
@@ -253,8 +233,7 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
switch ((unsigned)Fixup.getKind()) {
case PPC::fixup_ppc_ha16:
case PPC::fixup_ppc_lo16:
- case PPC::fixup_ppc_toc16:
- case PPC::fixup_ppc_toc16_ds:
+ case PPC::fixup_ppc_lo16_ds:
RelocOffset += 2;
break;
default:
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 7917f7736e..86c44f57a5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -12,6 +12,8 @@
#include "llvm/MC/MCFixup.h"
+#undef PPC
+
namespace llvm {
namespace PPC {
enum Fixups {
@@ -31,19 +33,9 @@ enum Fixups {
/// like 'lis'.
fixup_ppc_ha16,
- /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
- /// like 'std'.
- fixup_ppc_lo14,
-
- /// fixup_ppc_toc - Insert value of TOC base (.TOC.).
- fixup_ppc_toc,
-
- /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base.
- fixup_ppc_toc16,
-
- /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with
- /// implied 2 zero bits
- fixup_ppc_toc16_ds,
+ /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with
+ /// implied 2 zero bits for instrs like 'std'.
+ fixup_ppc_lo16_ds,
/// fixup_ppc_tlsreg - Insert thread-pointer register number.
fixup_ppc_tlsreg,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index d048426d43..2223cd623c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -13,10 +13,10 @@
#define DEBUG_TYPE "mccodeemitter"
#include "MCTargetDesc/PPCMCTargetDesc.h"
-#include "MCTargetDesc/PPCBaseInfo.h"
#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -33,24 +33,17 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCSubtargetInfo &STI;
+ const MCContext &CTX;
Triple TT;
public:
PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
MCContext &ctx)
- : STI(sti), TT(STI.getTargetTriple()) {
+ : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) {
}
~PPCMCCodeEmitter() {}
- bool is64BitMode() const {
- return (STI.getFeatureBits() & PPC::Feature64Bit) != 0;
- }
-
- bool isSVR4ABI() const {
- return TT.isMacOSX() == 0;
- }
-
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
@@ -81,12 +74,11 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const {
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
- // BL8_NOP_ELF, BLA8_NOP_ELF, etc., all have a size of 8 because of the
- // following 'nop'.
+ // BL8_NOP etc. all have a size of 8 because of the following 'nop'.
unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
unsigned Opcode = MI.getOpcode();
- if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF ||
- Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD)
+ if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP ||
+ Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD)
Size = 8;
// Output the constant in big endian byte order.
@@ -121,11 +113,11 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
(MCFixupKind)PPC::fixup_ppc_br24));
// For special TLS calls, add another fixup for the symbol. Apparently
- // BL8_NOP_ELF, BL8_NOP_ELF_TLSGD, and BL8_NOP_ELF_TLSLD are sufficiently
+ // BL8_NOP, BL8_NOP_TLSGD, and BL8_NOP_TLSLD are sufficiently
// similar that TblGen will not generate a separate case for the latter
// two, so this is the only way to get the extra fixup generated.
unsigned Opcode = MI.getOpcode();
- if (Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) {
+ if (Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) {
const MCOperand &MO2 = MI.getOperand(OpNo+1);
Fixups.push_back(MCFixup::Create(0, MO2.getExpr(),
(MCFixupKind)PPC::fixup_ppc_nofixup));
@@ -178,12 +170,8 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
// Add a fixup for the displacement field.
- if (isSVR4ABI() && is64BitMode())
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_toc16));
- else
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_lo16));
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
return RegBits;
}
@@ -199,13 +187,9 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
if (MO.isImm())
return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
- // Add a fixup for the branch target.
- if (isSVR4ABI() && is64BitMode())
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_toc16_ds));
- else
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_lo14));
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16_ds));
return RegBits;
}
@@ -220,7 +204,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
// Return the thread-pointer register's encoding.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_tlsreg));
- return getPPCRegisterNumbering(PPC::X13);
+ return CTX.getRegisterInfo().getEncodingValue(PPC::X13);
}
unsigned PPCMCCodeEmitter::
@@ -231,7 +215,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
MI.getOpcode() == PPC::MFOCRF ||
MI.getOpcode() == PPC::MTCRF8) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+ return 0x80 >> CTX.getRegisterInfo().getEncodingValue(MO.getReg());
}
@@ -243,7 +227,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
// The GPR operand should come through here though.
assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return getPPCRegisterNumbering(MO.getReg());
+ return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
}
assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 4a420929d0..38a7420d97 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -47,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
uint8_t OSABI);
} // End llvm namespace
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
// Defines symbolic names for PowerPC registers. This defines a mapping from
// register name to register number.
//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index 12bb0a1434..d84eb9c6aa 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -18,7 +18,6 @@ using namespace llvm;
PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
switch (Opcode) {
- default: llvm_unreachable("Unknown PPC branch opcode!");
case PPC::PRED_EQ: return PPC::PRED_NE;
case PPC::PRED_NE: return PPC::PRED_EQ;
case PPC::PRED_LT: return PPC::PRED_GE;
@@ -28,4 +27,5 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
case PPC::PRED_NU: return PPC::PRED_UN;
case PPC::PRED_UN: return PPC::PRED_NU;
}
+ llvm_unreachable("Unknown PPC branch opcode!");
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 972e13852e..ad2b018128 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -17,11 +17,14 @@
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
namespace llvm {
namespace PPC {
/// Predicate - These are "(BI << 5) | BO" for various predicates.
enum Predicate {
- PRED_ALWAYS = (0 << 5) | 20,
PRED_LT = (0 << 5) | 12,
PRED_LE = (1 << 5) | 4,
PRED_EQ = (2 << 5) | 12,
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index f71979f245..446b6854fb 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -15,7 +15,6 @@
#ifndef LLVM_TARGET_POWERPC_H
#define LLVM_TARGET_POWERPC_H
-#include "MCTargetDesc/PPCBaseInfo.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include <string>
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 992913602a..a1ea2297bf 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -59,8 +59,18 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
"Enable the stfiwx instruction">;
+def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
+ "Enable the lfiwax instruction">;
+def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true",
+ "Enable the fri[mnpz] instructions">;
+def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true",
+ "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">;
def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
"Enable the isel instruction">;
+def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
+ "Enable the popcnt[dw] instructions">;
+def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
+ "Enable the ldbrx instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
@@ -71,15 +81,9 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
//
// CMPB p6, p6x, p7 cmpb
// DFP p6, p6x, p7 decimal floating-point instructions
-// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz
-// FPRND p5x, p6, p6x, p7 frim, frin, frip, friz
// FRE p5 through p7 fre (vs. fres, available since p3)
// FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3)
-// LDBRX p7 load with byte reversal
-// LFIWAX p6, p6x, p7 lfiwax
-// LFIWZX p7 lfiwzx
// POPCNTB p5 through p7 popcntb and related instructions
-// POPCNTD p7 popcntd and related instructions
// RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates
// VSX p7 vector-scalar instruction set
@@ -128,16 +132,18 @@ def : ProcessorModel<"e500mc", PPCE500mcModel,
def : ProcessorModel<"e5500", PPCE5500Model,
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
-def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
- FeatureMFOCRF, FeatureFSqrt,
- FeatureSTFIWX, FeatureISEL,
- Feature64Bit
- /*, Feature64BitRegs */]>;
-def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
- FeatureMFOCRF, FeatureFSqrt,
- FeatureSTFIWX, FeatureISEL,
- Feature64Bit /*, Feature64BitRegs */,
- FeatureQPX]>;
+def : Processor<"a2", PPCA2Itineraries,
+ [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */]>;
+def : Processor<"a2q", PPCA2Itineraries,
+ [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */, FeatureQPX]>;
def : Processor<"pwr3", G5Itineraries,
[DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
FeatureSTFIWX, Feature64Bit]>;
@@ -149,18 +155,23 @@ def : Processor<"pwr5", G5Itineraries,
FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
def : Processor<"pwr5x", G5Itineraries,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
- FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+ FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
+ Feature64Bit]>;
def : Processor<"pwr6", G5Itineraries,
[DirectivePwr6, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
- Feature64Bit /*, Feature64BitRegs */]>;
+ FeatureLFIWAX, FeatureFPRND, Feature64Bit
+ /*, Feature64BitRegs */]>;
def : Processor<"pwr6x", G5Itineraries,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
- FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+ FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, Feature64Bit]>;
def : Processor<"pwr7", G5Itineraries,
[DirectivePwr7, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
- FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>;
+ FeatureLFIWAX, FeatureFPRND, FeatureFPCVT,
+ FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : Processor<"ppc64", G5Itineraries,
[Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index eae9b7b7fb..74cc1bb762 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -370,7 +370,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *PICBase = MF->getPICBaseSymbol();
// Emit the 'bl'.
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL_Darwin) // Darwin vs SVR4 doesn't matter here.
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL)
// FIXME: We would like an efficient form for this, so we don't have to do
// a lot of extra uniquing.
.addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
@@ -458,11 +458,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Transform %Xd = LDtocL <ga:@sym>, %Xs
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
- // Change the opcode to LDrs, which is a form of LD with the offset
- // specified by a SymbolLo. If the global address is external, has
+ // Change the opcode to LD. If the global address is external, has
// common linkage, or is a jump table address, then reference the
// associated TOC entry. Otherwise reference the symbol directly.
- TmpInst.setOpcode(PPC::LDrs);
+ TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
"Invalid operand for LDtocL!");
@@ -496,10 +495,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Transform %Xd = ADDItocL %Xs, <ga:@sym>
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
- // Change the opcode to ADDI8L. If the global address is external, then
+ // Change the opcode to ADDI8. If the global address is external, then
// generate a TOC entry and reference that. Otherwise reference the
// symbol directly.
- TmpInst.setOpcode(PPC::ADDI8L);
+ TmpInst.setOpcode(PPC::ADDI8);
const MachineOperand &MO = MI->getOperand(2);
assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
MCSymbol *MOSymbol = 0;
@@ -548,9 +547,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
- // Change the opcode to LDrs, which is a form of LD with the offset
- // specified by a SymbolLo.
- TmpInst.setOpcode(PPC::LDrs);
+ // Change the opcode to LD.
+ TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
@@ -579,7 +577,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::ADDItlsgdL: {
// Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
- // Into: %Xd = ADDI8L %Xs, sym@got@tlsgd@l
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
@@ -587,7 +585,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsGD));
@@ -595,7 +593,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::GETtlsADDR: {
// Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
- // Into: BL8_NOP_ELF_TLSGD __tls_get_addr(sym@tlsgd)
+ // Into: BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd)
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
StringRef Name = "__tls_get_addr";
@@ -608,7 +606,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSGD)
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD)
.addExpr(TlsRef)
.addExpr(SymVar));
return;
@@ -631,7 +629,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::ADDItlsldL: {
// Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
- // Into: %Xd = ADDI8L %Xs, sym@got@tlsld@l
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
@@ -639,7 +637,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsLD));
@@ -647,7 +645,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::GETtlsldADDR: {
// Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
- // Into: BL8_NOP_ELF_TLSLD __tls_get_addr(sym@tlsld)
+ // Into: BL8_NOP_TLSLD __tls_get_addr(sym@tlsld)
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
StringRef Name = "__tls_get_addr";
@@ -660,7 +658,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSLD)
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD)
.addExpr(TlsRef)
.addExpr(SymVar));
return;
@@ -683,7 +681,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::ADDIdtprelL: {
// Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
- // Into: %Xd = ADDI8L %Xs, sym@dtprel@l
+ // Into: %Xd = ADDI8 %Xs, sym@dtprel@l
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
@@ -691,7 +689,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymDtprel));
@@ -911,18 +909,19 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
OutStreamer.EmitLabel(Stub);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+ const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
+
// mflr r0
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
- // FIXME: MCize this.
- OutStreamer.EmitRawText("\tbcl 20, 31, " + Twine(AnonSymbol->getName()));
+ // bcl 20, 31, AnonSymbol
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCL).addExpr(Anon));
OutStreamer.EmitLabel(AnonSymbol);
// mflr r11
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
// addis r11, r11, ha16(LazyPtr - AnonSymbol)
const MCExpr *Sub =
MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext),
- MCSymbolRefExpr::Create(AnonSymbol, OutContext),
- OutContext);
+ Anon, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
.addReg(PPC::R11)
.addReg(PPC::R11)
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index b98cc489f6..81a54d7015 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -189,12 +189,23 @@ INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
/// isCompareEquals - Returns true if the instruction is a compare equals
/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
- if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
+ bool &Int64Cmp) {
+ if (MI->getOpcode() == PPC::CMPWI) {
SignedCmp = true;
+ Int64Cmp = false;
return true;
- } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
+ } else if (MI->getOpcode() == PPC::CMPDI) {
+ SignedCmp = true;
+ Int64Cmp = true;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLWI) {
+ SignedCmp = false;
+ Int64Cmp = false;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLDI) {
SignedCmp = false;
+ Int64Cmp = true;
return true;
}
@@ -353,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
RI != RE; ++RI) {
IV_Opnd = &RI.getOperand();
- bool SignedCmp;
+ bool SignedCmp, Int64Cmp;
MachineInstr *MI = IV_Opnd->getParent();
- if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
+ if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
MI->getOperand(0).getReg() == PredReg) {
OldInsts.push_back(MI);
@@ -380,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
assert(InitialValue->isReg() && "Expecting register for init value");
unsigned InitialValueReg = InitialValue->getReg();
- const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+ MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
// Here we need to look for an immediate load (an li or lis/ori pair).
if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
DefInstr->getOpcode() == PPC::ORI)) {
- int64_t start = (short) DefInstr->getOperand(2).getImm();
- const MachineInstr *DefInstr2 =
- MRI->getVRegDef(DefInstr->getOperand(0).getReg());
+ int64_t start = DefInstr->getOperand(2).getImm();
+ MachineInstr *DefInstr2 =
+ MRI->getVRegDef(DefInstr->getOperand(1).getReg());
if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
DefInstr2->getOpcode() == PPC::LIS)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
@@ -399,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+ OldInsts.push_back(DefInstr2);
+
+ // count/iv_value, the trip count, should be positive here. If it
+ // is negative, that indicates that the counter will wrap.
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
}
} else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
DefInstr->getOpcode() == PPC::LI)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
- int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
+ int64_t count = ImmVal -
+ int64_t(short(DefInstr->getOperand(1).getImm()));
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
} else if (iv_value == 1 || iv_value == -1) {
// We can't determine a constant starting value.
if (ImmVal == 0) {
@@ -417,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
}
// FIXME: handle non-zero end value.
}
- // FIXME: handle non-unit increments (we might not want to introduce division
- // but we can handle some 2^n cases with shifts).
+ // FIXME: handle non-unit increments (we might not want to introduce
+ // division but we can handle some 2^n cases with shifts).
}
}
@@ -489,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI,
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (!MRI->use_nodbg_empty(Reg)) {
- // This instruction has users, but if the only user is the phi node for the
- // parent block, and the only use of that phi node is this instruction, then
- // this instruction is dead: both it (and the phi node) can be removed.
+ // This instruction has users, but if the only user is the phi node for
+ // the parent block, and the only use of that phi node is this
+ // instruction, then this instruction is dead: both it (and the phi
+ // node) can be removed.
MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
if (llvm::next(I) == MRI->use_end() &&
I.getOperand().getParent()->isPHI()) {
@@ -594,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
DEBUG(dbgs() << "failed to get trip count!\n");
return false;
}
+
+ if (TripCount->isImm()) {
+ DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
+
+ // FIXME: We currently can't form 64-bit constants
+ // (including 32-bit unsigned constants)
+ if (!isInt<32>(TripCount->getImm()))
+ return false;
+ }
+
// Does the loop contain any invalid instructions?
if (containsInvalidInstruction(L)) {
return false;
@@ -647,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
const TargetRegisterClass *SrcRC =
MF->getRegInfo().getRegClass(TripCount->getReg());
CountReg = MF->getRegInfo().createVirtualRegister(RC);
- unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ?
+ unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
(unsigned) PPC::EXTSW_32_64 :
(unsigned) TargetOpcode::COPY;
BuildMI(*Preheader, InsertPos, dl,
@@ -664,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
// Put the trip count in a register for transfer into the count register.
int64_t CountImm = TripCount->getImm();
- assert(!TripCount->isNeg() && "Constant trip count must be positive");
+ if (TripCount->isNeg())
+ CountImm = -CountImm;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
- if (CountImm > 0xFFFF) {
+ if (abs64(CountImm) > 0x7FFF) {
BuildMI(*Preheader, InsertPos, dl,
TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
- CountReg).addImm(CountImm >> 16);
+ CountReg).addImm((CountImm >> 16) & 0xFFFF);
unsigned CountReg1 = CountReg;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
BuildMI(*Preheader, InsertPos, dl,
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index caeb1796f7..c8a29a3d2c 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -136,3 +136,9 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV
F27, F28, F29, F30, F31, CR2, CR3, CR4,
V20, V21, V22, V23, V24, V25, V26, V27,
V28, V29, V30, V31)>;
+
+def CSR_NoRegs : CalleeSavedRegs<(add VRSAVE)>;
+def CSR_NoRegs_Darwin : CalleeSavedRegs<(add)>;
+
+def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>;
+
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index d68bfd12e4..6478718513 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -142,7 +142,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
MI.getOpcode() == PPC::MFOCRF) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+ return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
}
MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
@@ -260,7 +260,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return getPPCRegisterNumbering(MO.getReg());
+ return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
}
assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 0a396e6693..3244b904ee 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -103,6 +103,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
// transform this into the appropriate ORI instruction.
static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned UsedRegMask = 0;
@@ -115,7 +116,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
for (MachineRegisterInfo::livein_iterator
I = MF->getRegInfo().livein_begin(),
E = MF->getRegInfo().livein_end(); I != E; ++I) {
- unsigned RegNo = getPPCRegisterNumbering(I->first);
+ unsigned RegNo = TRI->getEncodingValue(I->first);
if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
}
@@ -131,7 +132,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
const MachineOperand &MO = Ret.getOperand(I);
if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
continue;
- unsigned RegNo = getPPCRegisterNumbering(MO.getReg());
+ unsigned RegNo = TRI->getEncodingValue(MO.getReg());
UsedRegMask &= ~(1 << (31-RegNo));
}
}
@@ -188,13 +189,31 @@ static bool spillsCR(const MachineFunction &MF) {
return FuncInfo->isCRSpilled();
}
+static bool spillsVRSAVE(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isVRSAVESpilled();
+}
+
+static bool hasSpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasSpills();
+}
+
+static bool hasNonRISpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasNonRISpills();
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF,
+ bool UseEstimate) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize = MFI->getStackSize();
+ unsigned FrameSize =
+ UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
// Get the alignments provided by the target, and the maximum alignment
// (if any) of the fixed frame objects.
@@ -223,8 +242,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
&& spillsCR(MF)) &&
(!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
// No need for frame
- MFI->setStackSize(0);
- return;
+ if (UpdateMF)
+ MFI->setStackSize(0);
+ return 0;
}
// Get the maximum call frame size of all the calls.
@@ -241,7 +261,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
// Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
+ if (UpdateMF)
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
// Include call frame size in total.
FrameSize += maxCallFrameSize;
@@ -250,7 +271,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
FrameSize = (FrameSize + AlignMask) & ~AlignMask;
// Update frame info.
- MFI->setStackSize(FrameSize);
+ if (UpdateMF)
+ MFI->setStackSize(FrameSize);
+
+ return FrameSize;
}
// hasFP - Return true if the specified function actually has a dedicated frame
@@ -281,6 +305,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
+void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
+ bool is31 = needsFP(MF);
+ unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
+ unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI)
+ for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+ --MBBI;
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MBBI->getOperand(I);
+ if (!MO.isReg())
+ continue;
+
+ switch (MO.getReg()) {
+ case PPC::FP:
+ MO.setReg(FPReg);
+ break;
+ case PPC::FP8:
+ MO.setReg(FP8Reg);
+ break;
+ }
+ }
+ }
+}
void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
@@ -311,13 +360,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MBBI = MBB.begin();
// Work out frame sizes.
- // FIXME: determineFrameLayout() may change the frame size. This should be
- // moved upper, to some hook.
- determineFrameLayout(MF);
- unsigned FrameSize = MFI->getStackSize();
-
+ unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
+ if (MFI->isFrameAddressTaken())
+ replaceFPWithRealFP(MF);
+
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
// Get operating system
@@ -780,7 +828,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+ RegScavenger *) const {
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
// Save and clear the LR state.
@@ -822,30 +870,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
FI->setCRSpillFrameIndex(FrameIdx);
}
-
- // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
- // a large stack, which will require scavenging a register to materialize a
- // large offset.
- // FIXME: this doesn't actually check stack size, so is a bit pessimistic
- // FIXME: doesn't detect whether or not we need to spill vXX, which requires
- // r0 for now.
-
- if (RegInfo->requiresRegisterScavenging(MF))
- if (needsFP(MF) || spillsCR(MF)) {
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
}
-void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const {
// Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI())
+ if (!Subtarget.isSVR4ABI()) {
+ addScavengingSpillSlot(MF, RS);
return;
+ }
// Get callee saved register information.
MachineFrameInfo *FFI = MF.getFrameInfo();
@@ -853,6 +886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// Early exit if no callee saved registers are modified!
if (CSI.empty() && !needsFP(MF)) {
+ addScavengingSpillSlot(MF, RS);
return;
}
@@ -917,6 +951,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
int64_t LowerBound = 0;
@@ -936,7 +971,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
- LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8;
+ LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
}
// Check whether the frame pointer register is allocated. If so, make sure it
@@ -970,8 +1005,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
unsigned MinReg =
- std::min<unsigned>(getPPCRegisterNumbering(MinGPR),
- getPPCRegisterNumbering(MinG8R));
+ std::min<unsigned>(TRI->getEncodingValue(MinGPR),
+ TRI->getEncodingValue(MinG8R));
if (Subtarget.isPPC64()) {
LowerBound -= (31 - MinReg + 1) * 8;
@@ -1031,6 +1066,44 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
}
+
+ addScavengingSpillSlot(MF, RS);
+}
+
+void
+PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+
+ // We need to have a scavenger spill slot for spills if the frame size is
+ // large. In case there is no free register for large-offset addressing,
+ // this slot is used for the necessary emergency spill. Also, we need the
+ // slot for dynamic stack allocations.
+
+ // The scavenger might be invoked if the frame offset does not fit into
+ // the 16-bit immediate. We don't know the complete frame size here
+ // because we've not yet computed callee-saved register spills or the
+ // needed alignment padding.
+ unsigned StackSize = determineFrameLayout(MF, false, true);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
+ hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ // These kinds of spills might need two registers.
+ if (spillsCR(MF) || spillsVRSAVE(MF))
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ }
}
bool
@@ -1068,8 +1141,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// save slot via GPR12 (available in the prolog for 32- and 64-bit).
if (Subtarget.isPPC64()) {
// 64-bit: SP+8
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12));
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW))
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12));
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8))
.addReg(PPC::X12,
getKillRegState(true))
.addImm(8)
@@ -1109,7 +1182,7 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
if (isPPC64) {
// 64-bit: SP+8
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12)
.addImm(8)
.addReg(PPC::X1));
RestoreOp = PPC::MTCRF8;
@@ -1125,15 +1198,15 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
if (CR2Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
- .addReg(MoveReg));
+ .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
if (CR3Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
- .addReg(MoveReg));
+ .addReg(MoveReg, getKillRegState(!CR4Spilled)));
if (CR4Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
- .addReg(MoveReg));
+ .addReg(MoveReg, getKillRegState(true)));
}
void PPCFrameLowering::
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index d09e47fafd..6f5f9368c6 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -32,7 +32,9 @@ public:
Subtarget(sti) {
}
- void determineFrameLayout(MachineFunction &MF) const;
+ unsigned determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF = true,
+ bool UseEstimate = false) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
@@ -41,10 +43,13 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool needsFP(const MachineFunction &MF) const;
+ void replaceFPWithRealFP(MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -144,6 +149,9 @@ public:
return 0;
}
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
static const SpillSlot Offsets[] = {
// Floating-point register save area offsets.
{PPC::F31, -8},
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 6ed1fb9e6a..4bf1e33964 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -179,7 +179,7 @@ getHazardType(SUnit *SU, int Stalls) {
}
// Do not allow MTCTR and BCTRL to be in the same dispatch group.
- if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
+ if (HasCTRSet && Opcode == PPC::BCTRL)
return NoopHazard;
// If this is a load following a store, make sure it's not to the same or
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 17bea8a6a6..95efc11b53 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -120,10 +120,10 @@ namespace {
}
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
- /// immediate field. Because preinc imms have already been validated, just
- /// accept it.
+ /// immediate field. Note that the operand at this point is already the
+ /// result of a prior SelectAddressRegImm call.
bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
- if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
+ if (N.getOpcode() == ISD::TargetConstant ||
N.getOpcode() == ISD::TargetGlobalAddress) {
Out = N;
return true;
@@ -132,18 +132,6 @@ namespace {
return false;
}
- /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc
- /// index field. Because preinc imms have already been validated, just
- /// accept it.
- bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const {
- if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
- N.getOpcode() == ISD::TargetGlobalAddress)
- return false;
-
- Out = N;
- return true;
- }
-
/// SelectAddrIdx - Given the specified addressed, check to see if it can be
/// represented as an indexed [r+r] operation. Returns false if it can
/// be represented by [r+imm], which are preferred.
@@ -164,6 +152,12 @@ namespace {
return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
}
+ // Select an address into a single register.
+ bool SelectAddr(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+ }
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions. It is always correct to compute the value into
/// a register. The case of adding a (possibly relocatable) constant to a
@@ -1050,7 +1044,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
break;
SDValue Offset = LD->getOffset();
- if (isa<ConstantSDNode>(Offset) ||
+ if (Offset.getOpcode() == ISD::TargetConstant ||
Offset.getOpcode() == ISD::TargetGlobalAddress) {
unsigned Opcode;
@@ -1117,7 +1111,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
- SDValue Ops[] = { Offset, Base, Chain };
+ SDValue Ops[] = { Base, Offset, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering.getPointerTy(),
MVT::Other, Ops, 3);
@@ -1483,8 +1477,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
default: continue;
case PPC::ADDI8:
- case PPC::ADDI8L:
- case PPC::ADDIL:
+ case PPC::ADDI:
// In some cases (such as TLS) the relocation information
// is already in place on the operand, so copying the operand
// is sufficient.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 13cb358fc0..2cceb3d312 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
@@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ PPCRegInfo = TM.getRegisterInfo();
setPow2DivIsCheap();
@@ -154,18 +158,45 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ if (Subtarget->hasFPRND()) {
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+
+ // frin does not implement "ties to even." Thus, this is safe only in
+ // fast-math mode.
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+
+ // These need to set FE_INEXACT, and use a custom inserter.
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ }
+ }
+
// PowerPC does not have BSWAP, CTPOP or CTTZ
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
- setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
- setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ if (Subtarget->hasPOPCNTD()) {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
+ } else {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ }
+
// PowerPC does not have ROTR
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
@@ -208,6 +239,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
@@ -287,15 +326,28 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot do this with Promote because i64 is not a legal type.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- // FIXME: disable this lowered code. This generates 64-bit register values,
- // and we don't model the fact that the top part is clobbered by calls. We
- // need to flag these together so that the value isn't live across a call.
- //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
+ // With the instructions enabled under FPCVT, we can do everything.
+ if (PPCSubTarget.hasFPCVT()) {
+ if (Subtarget->has64BitSupport()) {
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ }
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ }
+
if (Subtarget->use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
@@ -508,7 +560,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
MaxStoresPerMemmoveOptSize = 8;
setPrefFunctionAlignment(4);
- BenefitFromCodePlacementOpt = true;
}
}
@@ -554,16 +605,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
case PPCISD::SHL: return "PPCISD::SHL";
- case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
- case PPCISD::STD_32: return "PPCISD::STD_32";
- case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
- case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4";
- case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
- case PPCISD::NOP: return "PPCISD::NOP";
+ case PPCISD::CALL: return "PPCISD::CALL";
+ case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
- case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin";
- case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4";
+ case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
+ case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFCR: return "PPCISD::MFCR";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
@@ -573,10 +621,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::MFFS: return "PPCISD::MFFS";
- case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
- case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
- case PPCISD::MTFSF: return "PPCISD::MTFSF";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
case PPCISD::CR6SET: return "PPCISD::CR6SET";
case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
@@ -1028,7 +1073,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1077,7 +1122,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
@@ -1140,7 +1185,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1178,15 +1223,19 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SelectionDAG &DAG) const {
if (DisablePPCPreinc) return false;
+ bool isLoad = true;
SDValue Ptr;
EVT VT;
+ unsigned Alignment;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
-
+ Alignment = LD->getAlignment();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
+ Alignment = ST->getAlignment();
+ isLoad = false;
} else
return false;
@@ -1194,7 +1243,25 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (VT.isVector())
return false;
- if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+ if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
+
+ // Common code will reject creating a pre-inc form if the base pointer
+ // is a frame index, or if N is a store and the base pointer is either
+ // the same as or a predecessor of the value being stored. Check for
+ // those situations here, and try with swapped Base/Offset instead.
+ bool Swap = false;
+
+ if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
+ Swap = true;
+ else if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
+ Swap = true;
+ }
+
+ if (Swap)
+ std::swap(Base, Offset);
+
AM = ISD::PRE_INC;
return true;
}
@@ -1205,6 +1272,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
return false;
} else {
+ // LDU/STU need an address with at least 4-byte alignment.
+ if (Alignment < 4)
+ return false;
+
// reg + imm * 4.
if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
return false;
@@ -3096,7 +3167,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
NodeTys.push_back(MVT::Other); // Returns a chain
NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
- unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+ unsigned CallOpc = PPCISD::CALL;
bool needIndirectCall = true;
if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
@@ -3229,8 +3300,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
NodeTys.push_back(MVT::Other);
NodeTys.push_back(MVT::Glue);
Ops.push_back(Chain);
- CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+ CallOpc = PPCISD::BCTRL;
Callee.setNode(0);
+ // Add use of X11 (holding environment pointer)
+ if (isSVR4ABI && isPPC64)
+ Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
@@ -3369,7 +3443,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
bool needsTOCRestore = false;
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
- if (CallOpc == PPCISD::BCTRL_SVR4) {
+ if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
// See PrepareCall() for more information about calls through function
@@ -3380,9 +3454,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) {
+ } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
// Otherwise insert NOP for non-local calls.
- CallOpc = PPCISD::CALL_NOP_SVR4;
+ CallOpc = PPCISD::CALL_NOP;
}
}
@@ -4555,6 +4629,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
}
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -4642,37 +4731,72 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
- PPCISD::FCTIDZ,
+ (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+ PPCISD::FCTIDZ),
dl, MVT::f64, Src);
break;
case MVT::i64:
- Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
+ "i64 FP_TO_UINT is supported only with FPCVT");
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ,
+ dl, MVT::f64, Src);
break;
}
// Convert the FP value to an int value through memory.
- SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+ bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
+ (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
+ SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
+ int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
// Emit a store to the stack slot.
- SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
- MachinePointerInfo(), false, false, 0);
+ SDValue Chain;
+ if (i32Stack) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
+ SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
+ Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ MVT::i32, MMO);
+ } else
+ Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+ MPI, false, false, 0);
// Result is a load from the stack slot. If loading 4 bytes, make sure to
// add in a bias.
- if (Op.getValueType() == MVT::i32)
+ if (Op.getValueType() == MVT::i32 && !i32Stack) {
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
DAG.getConstant(4, FIPtr.getValueType()));
- return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
+ MPI = MachinePointerInfo();
+ }
+
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
false, false, false, 0);
}
-SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
+ "UINT_TO_FP is supported only with FPCVT");
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-prcision and then round.
+ unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDUS : PPCISD::FCFIDS) :
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDU : PPCISD::FCFID);
+ MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT::f32 : MVT::f64;
+
if (Op.getOperand(0).getValueType() == MVT::i64) {
SDValue SINT = Op.getOperand(0);
// When converting to single-precision, we actually need to convert
@@ -4686,6 +4810,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
// However, if -enable-unsafe-fp-math is in effect, accept double
// rounding to avoid the extra overhead.
if (Op.getValueType() == MVT::f32 &&
+ !PPCSubTarget.hasFPCVT() &&
!DAG.getTarget().Options.UnsafeFPMath) {
// Twiddle input to make sure the low 11 bits are zero. (If this
@@ -4719,44 +4844,69 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
}
+
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
- SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
- if (Op.getValueType() == MVT::f32)
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
+
+ if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl,
MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
assert(Op.getOperand(0).getValueType() == MVT::i32 &&
- "Unhandled SINT_TO_FP type in custom expander!");
+ "Unhandled INT_TO_FP type in custom expander!");
// Since we only generate this in 64-bit mode, we can take advantage of
// 64-bit registers. In particular, sign extend the input value into the
// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
// then lfd it and fcfid it.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+ SDValue Ld;
+ if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad, 4, 4);
+ SDValue Ops[] = { Store, FIdx };
+ Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::LFIWZX : PPCISD::LFIWAX,
+ dl, DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, 2, MVT::i32, MMO);
+ } else {
+ assert(PPCSubTarget.isPPC64() &&
+ "i32->FP without LFIWAX supported only on PPC64");
+
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
Op.getOperand(0));
- // STD the extended value into the stack slot.
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore, 8, 8);
- SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
- SDValue Store =
- DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
- Ops, 4, MVT::i64, MMO);
- // Load the value as a double.
- SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
- false, false, false, 0);
+ // STD the extended value into the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ // Load the value as a double.
+ Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, false, 0);
+ }
// FCFID it and return it.
- SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
- if (Op.getValueType() == MVT::f32)
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
+ if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
@@ -5551,11 +5701,15 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
Op.getDebugLoc());
- case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
// Lower 64-bit shifts.
@@ -5609,50 +5763,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
MVT::f64, N->getOperand(0),
DAG.getIntPtrConstant(1));
- // This sequence changes FPSCR to do round-to-zero, adds the two halves
- // of the long double, and puts FPSCR back the way it was. We do not
- // actually model FPSCR.
- std::vector<EVT> NodeTys;
- SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
-
- NodeTys.push_back(MVT::f64); // Return register
- NodeTys.push_back(MVT::Glue); // Returns a flag for later insns
- Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
- MFFSreg = Result.getValue(0);
- InFlag = Result.getValue(1);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = DAG.getConstant(31, MVT::i32);
- Ops[1] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
- InFlag = Result.getValue(0);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = DAG.getConstant(30, MVT::i32);
- Ops[1] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
- InFlag = Result.getValue(0);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::f64); // result of add
- NodeTys.push_back(MVT::Glue); // Returns a flag
- Ops[0] = Lo;
- Ops[1] = Hi;
- Ops[2] = InFlag;
- Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
- FPreg = Result.getValue(0);
- InFlag = Result.getValue(1);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::f64);
- Ops[0] = DAG.getConstant(1, MVT::i32);
- Ops[1] = MFFSreg;
- Ops[2] = FPreg;
- Ops[3] = InFlag;
- Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
- FPreg = Result.getValue(0);
+ // Add the two halves of the long double in round-to-zero mode.
+ SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
// We know the low half is about to be thrown away, so just use something
// convenient.
@@ -5744,7 +5856,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = PPCSubTarget.isPPC64();
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
@@ -5863,9 +5975,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
return BB;
}
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // SjLjSetup mainMBB
+ // bl mainMBB
+ // v_restore = 1
+ // b sinkMBB
+ //
+ // mainMBB:
+ // buf[LabelOffset] = LR
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Note that the structure of the jmp_buf used here is not compatible
+ // with that used by libc, and is not designed to be. Specifically, it
+ // stores only those 'reserved' registers that LLVM does not otherwise
+ // understand how to spill. Also, by convention, by the time this
+ // intrinsic is called, Clang has already stored the frame address in the
+ // first slot of the buffer and stack address in the third. Following the
+ // X86 target code, we'll store the jump address in the second slot. We also
+ // need to save the TOC pointer (R2) to handle jumps between shared
+ // libraries, and that will be stored in the fourth slot. The thread
+ // identifier (R13) is not affected.
+
+ // thisMBB:
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ // Prepare IP either in reg.
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+ unsigned BufReg = MI->getOperand(1).getReg();
+
+ if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+ .addReg(PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCL)).addMBB(mainMBB);
+ MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+
+ BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+ .addMBB(mainMBB);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(mainMBB, /* weight */ 0);
+ thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+
+ // mainMBB:
+ // mainDstReg = 0
+ MIB = BuildMI(mainMBB, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+ // Store IP
+ if (PPCSubTarget.isPPC64()) {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+ .addReg(LabelReg)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(PPC::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(thisMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+ unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ unsigned BufReg = MI->getOperand(0).getReg();
+
+ // Reload FP (the jumped-to function may not have had a
+ // frame pointer, and if so, then its r31 will be restored
+ // as necessary).
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload IP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload SP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+ .addImm(SPOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // FIXME: When we also support base pointers, that register must also be
+ // restored here.
+
+ // Reload TOC
+ if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Jump
+ BuildMI(*MBB, MI, DL,
+ TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+ BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+ return emitEHSjLjSetJmp(MI, BB);
+ } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+ return emitEHSjLjLongJmp(MI, BB);
+ }
+
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// To "insert" these instructions we actually have to insert their
@@ -5883,24 +6224,24 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
unsigned SelectPred = MI->getOperand(4).getImm();
DebugLoc dl = MI->getDebugLoc();
- // The SelectPred is ((BI << 5) | BO) for a BCC
- unsigned BO = SelectPred & 0xF;
- assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel");
-
- unsigned TrueOpNo, FalseOpNo;
- if (BO == 12) {
- TrueOpNo = 2;
- FalseOpNo = 3;
- } else {
- TrueOpNo = 3;
- FalseOpNo = 2;
- SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred);
+ unsigned SubIdx;
+ bool SwapOps;
+ switch (SelectPred) {
+ default: llvm_unreachable("invalid predicate for isel");
+ case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
+ case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
+ case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
+ case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
+ case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
+ case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
+ case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
+ case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
}
BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(TrueOpNo).getReg())
- .addReg(MI->getOperand(FalseOpNo).getReg())
- .addImm(SelectPred).addReg(MI->getOperand(1).getReg());
+ .addReg(MI->getOperand(SwapOps? 3 : 2).getReg())
+ .addReg(MI->getOperand(SwapOps? 2 : 3).getReg())
+ .addReg(MI->getOperand(1).getReg(), 0, SubIdx);
} else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
@@ -6133,7 +6474,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
unsigned Ptr1Reg;
unsigned TmpReg = RegInfo.createVirtualRegister(RC);
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
// fallthrough --> loopMBB
@@ -6236,6 +6577,75 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB = exitMBB;
BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
.addReg(ShiftReg);
+ } else if (MI->getOpcode() == PPC::FADDrtz) {
+ // This pseudo performs an FADD with rounding mode temporarily forced
+ // to round-to-zero. We emit this via custom inserter since the FPSCR
+ // is not modeled at the SelectionDAG level.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src1 = MI->getOperand(1).getReg();
+ unsigned Src2 = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+ // Save FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
+
+ // Set rounding mode to round-to-zero.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+
+ // Perform addition.
+ BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+
+ // Restore FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+ } else if (MI->getOpcode() == PPC::FRINDrint ||
+ MI->getOpcode() == PPC::FRINSrint) {
+ bool isf32 = MI->getOpcode() == PPC::FRINSrint;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+
+ // Perform the rounding.
+ BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
+ .addReg(Src);
+
+ // Compare the results.
+ BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
+ .addReg(Dest).addReg(Src);
+
+ // If the results were not equal, then set the FPSCR XX bit.
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
+
+ BB->addSuccessor(midMBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+
+ // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
+ // the FI bit here because that will not automatically set XX also,
+ // and XX is what libm interprets as the FE_INEXACT flag.
+ BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+
+ BB->addSuccessor(exitMBB);
+
+ BB = exitMBB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -6321,8 +6731,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
- Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
- N->getOperand(2), N->getOperand(3));
+ SDValue Ops[] = {
+ N->getOperand(0), Val, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+
+ Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
DCI.AddToWorklist(Val.getNode());
return Val;
}
@@ -6332,7 +6749,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
N->getOperand(1).getOpcode() == ISD::BSWAP &&
N->getOperand(1).getNode()->hasOneUse() &&
(N->getOperand(1).getValueType() == MVT::i32 ||
- N->getOperand(1).getValueType() == MVT::i16)) {
+ N->getOperand(1).getValueType() == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getOperand(1).getValueType() == MVT::i64))) {
SDValue BSwapOp = N->getOperand(1).getOperand(0);
// Do an any-extend to 32-bits if this is a half-word input.
if (BSwapOp.getValueType() == MVT::i16)
@@ -6353,7 +6773,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
- (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getValueType(0) == MVT::i64))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
// Create the byte-swapping load.
@@ -6364,8 +6787,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
};
SDValue BSLoad =
DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
- DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
- LD->getMemoryVT(), LD->getMemOperand());
+ DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+ MVT::i64 : MVT::i32, MVT::Other),
+ Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.
SDValue ResVal = BSLoad;
@@ -6622,6 +7046,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+ return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
@@ -6806,13 +7233,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects()) &&
- MFI->getStackSize() &&
- !MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked);
- unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
- (is31 ? PPC::R31 : PPC::R1);
+
+ // Naked functions never have a frame pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned FrameReg;
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+ else
+ FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
PtrVT);
while (Depth--)
@@ -6851,6 +7281,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ bool *Fast) const {
+ if (DisablePPCUnaligned)
+ return false;
+
+ // PowerPC supports unaligned memory access for simple non-vector types.
+ // Although accessing unaligned addresses is not as efficient as accessing
+ // aligned addresses, it is generally more efficient than manual expansion,
+ // and generally only traps for software emulation when crossing page
+ // boundaries.
+
+ if (!VT.isSimple())
+ return false;
+
+ if (VT.getSimpleVT().isVector())
+ return false;
+
+ if (VT == MVT::ppcf128)
+ return false;
+
+ if (Fast)
+ *Fast = true;
+
+ return true;
+}
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 3931384d89..6690899e5d 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -16,6 +16,7 @@
#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
#include "PPC.h"
+#include "PPCRegisterInfo.h"
#include "PPCSubtarget.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
@@ -35,14 +36,18 @@ namespace llvm {
/// was temporarily in the f64 operand.
FCFID,
+ /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+ /// unsigned integers and single-precision outputs.
+ FCFIDU, FCFIDS, FCFIDUS,
+
/// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
/// operand, producing an f64 value containing the integer representation
/// of that FP value.
FCTIDZ, FCTIWZ,
- /// STFIWX - The STFIWX instruction. The first operand is an input token
- /// chain, then an f64 value to store, then an address to store it to.
- STFIWX,
+ /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+ /// unsigned integers.
+ FCTIDUZ, FCTIWUZ,
// VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
// three v4f32 operands and producing a v4f32 result.
@@ -90,17 +95,10 @@ namespace llvm {
/// code.
SRL, SRA, SHL,
- /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
- /// registers.
- EXTSW_32,
-
/// CALL - A direct function call.
- /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit
+ /// CALL_NOP is a call with the special NOP which follows 64-bit
/// SVR4 calls.
- CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4,
-
- /// NOP - Special NOP which follows 64-bit SVR4 calls.
- NOP,
+ CALL, CALL_NOP,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
@@ -108,7 +106,7 @@ namespace llvm {
/// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
/// BCTRL instruction.
- BCTRL_Darwin, BCTRL_SVR4,
+ BCTRL,
/// Return with a flag operand, matched by 'blr'
RET_FLAG,
@@ -119,6 +117,12 @@ namespace llvm {
/// are undefined.
MFCR,
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
/// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
/// instructions. For lack of better number, we use the opcode number
/// encoding for the OPC field to identify the compare. For example, 838
@@ -138,26 +142,13 @@ namespace llvm {
/// an optional input flag argument.
COND_BRANCH,
- // The following 5 instructions are used only as part of the
- // long double-to-int conversion sequence.
-
- /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
- /// register.
- MFFS,
-
- /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
- MTFSB0,
-
- /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
- MTFSB1,
-
- /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
- /// rounding towards zero. It has flags added so it won't move past the
- /// FPSCR-setting instructions.
+ /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+ /// towards zero. Used only as part of the long double-to-int
+ /// conversion sequence.
FADDRTZ,
- /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
- MTFSF,
+ /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+ MFFS,
/// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
/// reserve indexed. This is used to implement atomic operations.
@@ -243,14 +234,11 @@ namespace llvm {
/// optimizations due to constant folding.
VADD_SPLAT,
- /// STD_32 - This is the STD instruction for use with "32-bit" registers.
- STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
-
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
/// i32.
- STBRX,
+ STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
/// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
/// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
@@ -258,6 +246,20 @@ namespace llvm {
/// or i32.
LBRX,
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to.
+ STFIWX,
+
+ /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+ /// load which sign-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWAX,
+
+ /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+ /// load which zero-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWZX,
+
/// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
/// produces an ADDIS8 instruction that adds the TOC base register to
/// sym@toc@ha.
@@ -321,6 +323,7 @@ namespace llvm {
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &PPCSubTarget;
+ const PPCRegisterInfo *PPCRegInfo;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -395,6 +398,12 @@ namespace llvm {
MachineBasicBlock *MBB,
bool is8bit, unsigned Opcode) const;
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
ConstraintType getConstraintType(const std::string &Constraint) const;
/// Examine constraint string and operand type and determine a weight value.
@@ -449,6 +458,10 @@ namespace llvm {
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
+ /// Is unaligned memory access allowed for the given type, and is it fast
+ /// relative to software emulation.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
@@ -494,7 +507,7 @@ namespace llvm {
const PPCSubtarget &Subtarget) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
- SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
@@ -604,6 +617,9 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
};
}
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 01201304f7..fa5b65f0ba 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -30,12 +30,7 @@ def symbolLo64 : Operand<i64> {
let EncoderMethod = "getLO16Encoding";
}
def tocentry : Operand<iPTR> {
- let MIOperandInfo = (ops i32imm:$imm);
-}
-def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64
- let PrintMethod = "printMemRegImm";
- let EncoderMethod = "getMemRIXEncoding";
- let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg);
+ let MIOperandInfo = (ops i64imm:$imm);
}
def tlsreg : Operand<i64> {
let EncoderMethod = "getTLSRegEncoding";
@@ -71,133 +66,112 @@ def HI48_64 : SDNodeXForm<imm, [{
// Calls.
//
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
+}
+
let Defs = [LR8] in
def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
PPC970_Unit_BRU;
-// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
- // Convenient aliases for call instructions
- let Uses = [RM] in {
- def BL8_Darwin : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
- def BLA8_Darwin : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
- }
- let Uses = [CTR8, RM] in {
- def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let Defs = [CTR8], Uses = [CTR8] in {
+ def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
}
}
-// ELF 64 ABI Calls = Darwin ABI Calls
-// Used to define BL8_ELF and BLA8_ELF
let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
- def BL8_ELF : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
+ def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", BrB, []>; // See Pat patterns below.
- let isCodeGenOnly = 1 in
- def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ def BLA8 : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+ "bla $func", BrB, [(PPCcall (i64 imm:$func))]>;
+ }
+ let Uses = [RM], isCodeGenOnly = 1 in {
+ def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func),
"bl $func\n\tnop", BrB, []>;
- let isCodeGenOnly = 1 in
- def BL8_NOP_ELF_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ def BL8_NOP_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func, tlsgd:$sym),
"bl $func($sym)\n\tnop", BrB, []>;
- let isCodeGenOnly = 1 in
- def BL8_NOP_ELF_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ def BL8_NOP_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func, tlsgd:$sym),
"bl $func($sym)\n\tnop", BrB, []>;
- def BLA8_ELF : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
-
- let isCodeGenOnly = 1 in
- def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
+ def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
(outs), (ins aaddr:$func),
"bla $func\n\tnop", BrB,
- [(PPCcall_nop_SVR4 (i64 imm:$func))]>;
+ [(PPCcall_nop (i64 imm:$func))]>;
}
- let Uses = [X11, CTR8, RM] in {
- def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
+ let Uses = [CTR8, RM] in {
+ def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", BrB, [(PPCbctrl)]>,
+ Requires<[In64BitMode]>;
}
}
// Calls
-def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)),
- (BL8_Darwin tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
- (BL8_Darwin texternalsym:$dst)>;
+def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
+ (BL8 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)),
+ (BL8_NOP tglobaladdr:$dst)>;
-def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
- (BL8_ELF tglobaladdr:$dst)>;
-def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)),
- (BL8_NOP_ELF tglobaladdr:$dst)>;
-
-def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
- (BL8_ELF texternalsym:$dst)>;
-def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)),
- (BL8_NOP_ELF texternalsym:$dst)>;
-
-def : Pat<(PPCnop),
- (NOP)>;
+def : Pat<(PPCcall (i64 texternalsym:$dst)),
+ (BL8 texternalsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
+ (BL8_NOP texternalsym:$dst)>;
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64",
- [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_SUB_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64",
- [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_OR_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64",
- [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_XOR_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64",
- [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_AND_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64",
- [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_NAND_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64",
- [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
+ [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_CMP_SWAP_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64",
- [(set G8RC:$dst,
- (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+ [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
def ATOMIC_SWAP_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64",
- [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
+ [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
}
}
// Instructions to support atomic operations
def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr),
"ldarx $rD, $ptr", LdStLDARX,
- [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
+ [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
let Defs = [CR0] in
def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
"stdcx. $rS, $dst", LdStSTDCX,
- [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+ [(PPCstcx i64:$rS, xoaddr:$dst)]>,
isDOT;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
@@ -216,17 +190,12 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
+let isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
- isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in {
- let isReturn = 1 in {
- def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
- Requires<[In64BitMode]>;
- }
-
- def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
- Requires<[In64BitMode]>;
-}
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
@@ -242,6 +211,8 @@ def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
"ba $dst", BrB,
[]>;
+}
+
def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
@@ -251,20 +222,13 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
(TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
- let Defs = [CTR8], Uses = [CTR8] in {
- def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdz $dst">;
- def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdnz $dst">;
- }
-}
-// 64-but CR instructions
+// 64-bit CR instructions
def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let isCodeGenOnly = 1 in
def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
"#MFCR8pseud", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -273,6 +237,18 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
"mfcr $rT", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP64",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP64",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In64BitMode]>;
+}
+
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -281,13 +257,13 @@ def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
"mfctr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in {
+let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let Pattern = [(set G8RC:$rT, readcyclecounter)] in
+let Pattern = [(set i64:$rT, readcyclecounter)] in
def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
"mfspr $rT, 268", SprMFTB>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -298,8 +274,8 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
let Defs = [X1], Uses = [X1] in
def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8",
- [(set G8RC:$result,
- (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
+ [(set i64:$result,
+ (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
let Defs = [LR8] in {
def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
@@ -321,131 +297,129 @@ let PPC970_Unit = 1 in { // FXU Operations.
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
"li $rD, $imm", IntSimple,
- [(set G8RC:$rD, immSExt16:$imm)]>;
+ [(set i64:$rD, immSExt16:$imm)]>;
def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
"lis $rD, $imm", IntSimple,
- [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+ [(set i64:$rD, imm16ShiftedSExt:$imm)]>;
}
// Logical ops.
def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"nand $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
+ [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"and $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
+ [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"andc $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
+ [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"or $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
+ [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"nor $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
+ [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"orc $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
+ [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"eqv $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
+ [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
"xor $rA, $rS, $rB", IntSimple,
- [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+ [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
// Logical ops with immediate.
def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"andi. $dst, $src1, $src2", IntGeneral,
- [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+ [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
isDOT;
def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"andis. $dst, $src1, $src2", IntGeneral,
- [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+ [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"ori $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+ [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"oris $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+ [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"xori $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+ [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
"xoris $dst, $src1, $src2", IntSimple,
- [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+ [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"add $rT, $rA, $rB", IntSimple,
- [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
// initial-exec thread-local storage model.
+let isCodeGenOnly = 1 in
def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
"add $rT, $rA, $rB@tls", IntSimple,
- [(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>;
+ [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
let Defs = [CARRY] in {
def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"addc $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
+ [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
PPC970_DGroup_Cracked;
def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
"addic $rD, $rA, $imm", IntGeneral,
- [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
+ [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>;
}
-def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
- "addi $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm),
+def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+ [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm),
"addis $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+ [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
let Defs = [CARRY] in {
def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
"subfic $rD, $rA, $imm", IntGeneral,
- [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
+ [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>;
def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"subfc $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
+ [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
PPC970_DGroup_Cracked;
}
def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"subf $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+ [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"neg $rT, $rA", IntSimple,
- [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+ [(set i64:$rT, (ineg i64:$rA))]>;
let Uses = [CARRY], Defs = [CARRY] in {
def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"adde $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"addme $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
+ [(set i64:$rT, (adde i64:$rA, -1))]>;
def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"addze $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
+ [(set i64:$rT, (adde i64:$rA, 0))]>;
def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"subfe $rT, $rA, $rB", IntGeneral,
- [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+ [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"subfme $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
+ [(set i64:$rT, (sube -1, i64:$rA))]>;
def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"subfze $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
+ [(set i64:$rT, (sube 0, i64:$rA))]>;
}
def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"mulhd $rT, $rA, $rB", IntMulHW,
- [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"mulhdu $rT, $rA, $rB", IntMulHWU,
- [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
+ [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
"cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
@@ -458,54 +432,60 @@ def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
"sld $rA, $rS, $rB", IntRotateD,
- [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+ [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
"srd $rA, $rS, $rB", IntRotateD,
- [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+ [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
let Defs = [CARRY] in {
def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
"srad $rA, $rS, $rB", IntRotateD,
- [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+ [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
}
def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
"extsb $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
+ [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
"extsh $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
+ [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
"extsw $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
-/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
-def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
- "extsw $rA, $rS", IntSimple,
- [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+ [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
"extsw $rA, $rS", IntSimple,
- [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
+ [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
let Defs = [CARRY] in {
def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
"sradi $rA, $rS, $SH", IntRotateDI,
- [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+ [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
}
def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
"cntlzd $rA, $rS", IntGeneral,
- [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
+ [(set i64:$rA, (ctlz i64:$rS))]>;
+def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS),
+ "popcntd $rA, $rS", IntGeneral,
+ [(set i64:$rA, (ctpop i64:$rS))]>;
+
+// popcntw also does a population count on the high 32 bits (storing the
+// results in the high 32-bits of the output). We'll ignore that here (which is
+// safe because we never separately use the high part of the 64-bit registers).
+def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS),
+ "popcntw $rA, $rS", IntGeneral,
+ [(set i32:$rA, (ctpop i32:$rS))]>;
def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"divd $rT, $rA, $rB", IntDivD,
- [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"divdu $rT, $rA, $rB", IntDivD,
- [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"mulld $rT, $rA, $rB", IntMulHD,
- [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
+ [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
let isCommutable = 1 in {
@@ -536,7 +516,7 @@ def RLWINM8 : MForm_2<21,
[]>;
def ISEL8 : AForm_4<31, 15,
- (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
+ (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
} // End FXU Operations.
@@ -551,94 +531,96 @@ def ISEL8 : AForm_4<31, 15,
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
"lha $rD, $src", LdStLHA,
- [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+ [(set i64:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
"lwa $rD, $src", LdStLWA,
- [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+ [(set i64:$rD,
+ (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
- [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+ [(set i64:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
"lwax $rD, $src", LdStLHA,
- [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// Update forms.
-let mayLoad = 1 in
-def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
- ptr_rc:$rA),
- "lhau $rD, $disp($rA)", LdStLHAU,
- []>, RegConstraint<"$rA = $ea_result">,
+let mayLoad = 1 in {
+def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memri:$addr),
+ "lhau $rD, $addr", LdStLHAU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
-def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwaux $rD, $addr", LdStLHAU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
+}
// Zero extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
"lbz $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+ [(set i64:$rD, (zextloadi8 iaddr:$src))]>;
def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
"lhz $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+ [(set i64:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
"lwz $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+ [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src),
"lbzx $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+ [(set i64:$rD, (zextloadi8 xaddr:$src))]>;
def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
"lhzx $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+ [(set i64:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
"lwzx $rD, $src", LdStLoad,
- [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+ [(set i64:$rD, (zextloadi32 xaddr:$src))]>;
// Update forms.
let mayLoad = 1 in {
-def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
}
@@ -648,31 +630,28 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
"ld $rD, $src", LdStLD,
- [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
-def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src),
- "ld $rD, $src", LdStLD,
- []>, isPPC64;
+ [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
// The following three definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"#LDtoc",
- [(set G8RC:$rD,
- (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+ [(set i64:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"#LDtocJTI",
- [(set G8RC:$rD,
- (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64;
+ [(set i64:$rD,
+ (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
"#LDtocCPT",
- [(set G8RC:$rD,
- (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64;
+ [(set i64:$rD,
+ (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
-let hasSideEffects = 1 in {
+let hasSideEffects = 1, isCodeGenOnly = 1 in {
let RST = 2, DS = 2 in
def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
"ld 2, 8($reg)", LdStLD,
- [(PPCload_toc G8RC:$reg)]>, isPPC64;
+ [(PPCload_toc i64:$reg)]>, isPPC64;
let RST = 2, DS = 10, RA = 1 in
def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
@@ -681,18 +660,21 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
}
def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
"ldx $rD, $src", LdStLD,
- [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
-
+ [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src),
+ "ldbrx $rD, $src", LdStLoad,
+ [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
+
let mayLoad = 1 in
-def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
"ldu $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
-def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"ldux $rD, $addr", LdStLDU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -702,188 +684,167 @@ def : Pat<(PPCload xaddr:$src),
(LDX xaddr:$src)>;
// Support for medium and large code model.
-def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
+def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
"#ADDIStocHA",
- [(set G8RC:$rD,
- (PPCaddisTocHA G8RC:$reg, tglobaladdr:$disp))]>,
+ [(set i64:$rD,
+ (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
isPPC64;
-def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg),
"#LDtocL",
- [(set G8RC:$rD,
- (PPCldTocL tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
-def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
+ [(set i64:$rD,
+ (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
"#ADDItocL",
- [(set G8RC:$rD,
- (PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64;
+ [(set i64:$rD,
+ (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
// Support for thread-local storage.
-def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
"#ADDISgotTprelHA",
- [(set G8RC:$rD,
- (PPCaddisGotTprelHA G8RC:$reg,
+ [(set i64:$rD,
+ (PPCaddisGotTprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC:$reg),
+def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg),
"#LDgotTprelL",
- [(set G8RC:$rD,
- (PPCldGotTprelL tglobaltlsaddr:$disp, G8RC:$reg))]>,
+ [(set i64:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
isPPC64;
-def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g),
- (ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>;
-def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
+ (ADD8TLS $in, tglobaltlsaddr:$g)>;
+def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
"#ADDIStlsgdHA",
- [(set G8RC:$rD,
- (PPCaddisTlsgdHA G8RC:$reg, tglobaltlsaddr:$disp))]>,
+ [(set i64:$rD,
+ (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
"#ADDItlsgdL",
- [(set G8RC:$rD,
- (PPCaddiTlsgdL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+ [(set i64:$rD,
+ (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
"#GETtlsADDR",
- [(set G8RC:$rD,
- (PPCgetTlsAddr G8RC:$reg, tglobaltlsaddr:$sym))]>,
+ [(set i64:$rD,
+ (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
"#ADDIStlsldHA",
- [(set G8RC:$rD,
- (PPCaddisTlsldHA G8RC:$reg, tglobaltlsaddr:$disp))]>,
+ [(set i64:$rD,
+ (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
"#ADDItlsldL",
- [(set G8RC:$rD,
- (PPCaddiTlsldL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+ [(set i64:$rD,
+ (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
"#GETtlsldADDR",
- [(set G8RC:$rD,
- (PPCgetTlsldAddr G8RC:$reg, tglobaltlsaddr:$sym))]>,
+ [(set i64:$rD,
+ (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
+def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
"#ADDISdtprelHA",
- [(set G8RC:$rD,
- (PPCaddisDtprelHA G8RC:$reg,
+ [(set i64:$rD,
+ (PPCaddisDtprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
+def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
"#ADDIdtprelL",
- [(set G8RC:$rD,
- (PPCaddiDtprelL G8RC:$reg, tglobaltlsaddr:$disp))]>,
+ [(set i64:$rD,
+ (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
let PPC970_Unit = 2 in {
// Truncating stores.
def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
"stb $rS, $src", LdStStore,
- [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+ [(truncstorei8 i64:$rS, iaddr:$src)]>;
def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
"sth $rS, $src", LdStStore,
- [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+ [(truncstorei16 i64:$rS, iaddr:$src)]>;
def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
"stw $rS, $src", LdStStore,
- [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+ [(truncstorei32 i64:$rS, iaddr:$src)]>;
def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
"stbx $rS, $dst", LdStStore,
- [(truncstorei8 G8RC:$rS, xaddr:$dst)]>,
+ [(truncstorei8 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
"sthx $rS, $dst", LdStStore,
- [(truncstorei16 G8RC:$rS, xaddr:$dst)]>,
+ [(truncstorei16 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
- [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+ [(truncstorei32 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
"std $rS, $dst", LdStSTD,
- [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+ [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
"stdx $rS, $dst", LdStSTD,
- [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+ [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdbrx $rS, $dst", LdStStore,
+ [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
PPC970_DGroup_Cracked;
}
-let PPC970_Unit = 2 in {
+// Stores with Update (pre-inc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst),
+ "stdu $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
-def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- s16immX4:$ptroff, ptr_rc:$ptrreg),
- "stdu $rS, $ptroff($ptrreg)", LdStSTDU,
- [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
- isPPC64;
-
-
-def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-
-def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-
-def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti32 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-
-def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stdux $rS, $ptroff, $ptrreg", LdStSTDU,
- [(set ptr_rc:$ea_res,
- (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stdux $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked, isPPC64;
-
-// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
-def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
- "std $rT, $dst", LdStSTD,
- [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64;
-def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
- "stdx $rT, $dst", LdStSTD,
- [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64,
- PPC970_DGroup_Cracked;
}
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STDU $rS, iaddroff:$ptroff, $ptrreg)>;
+
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STDUX $rS, $ptrreg, $ptroff)>;
//===----------------------------------------------------------------------===//
@@ -894,10 +855,26 @@ def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations.
def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
"fcfid $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
"fctidz $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+ [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+
+def FCFIDU : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fcfidu $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+def FCFIDS : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB),
+ "fcfids $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB),
+ "fcfidus $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiduz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiwuz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
}
@@ -906,13 +883,13 @@ def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
//
// Extensions and truncates to/from 32-bit regs.
-def : Pat<(i64 (zext GPRC:$in)),
- (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32),
+def : Pat<(i64 (zext i32:$in)),
+ (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
0, 32)>;
-def : Pat<(i64 (anyext GPRC:$in)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32)>;
-def : Pat<(i32 (trunc G8RC:$in)),
- (EXTRACT_SUBREG G8RC:$in, sub_32)>;
+def : Pat<(i64 (anyext i32:$in)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>;
+def : Pat<(i32 (trunc i64:$in)),
+ (EXTRACT_SUBREG $in, sub_32)>;
// Extending loads with i64 targets.
def : Pat<(zextloadi1 iaddr:$src),
@@ -939,24 +916,24 @@ def : Pat<(extloadi32 xaddr:$src),
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
// amounts.
-def : Pat<(sra G8RC:$rS, GPRC:$rB),
- (SRAD G8RC:$rS, GPRC:$rB)>;
-def : Pat<(srl G8RC:$rS, GPRC:$rB),
- (SRD G8RC:$rS, GPRC:$rB)>;
-def : Pat<(shl G8RC:$rS, GPRC:$rB),
- (SLD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(sra i64:$rS, i32:$rB),
+ (SRAD $rS, $rB)>;
+def : Pat<(srl i64:$rS, i32:$rB),
+ (SRD $rS, $rB)>;
+def : Pat<(shl i64:$rS, i32:$rB),
+ (SLD $rS, $rB)>;
// SHL/SRL
-def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
- (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
-def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
- (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+def : Pat<(shl i64:$in, (i32 imm:$imm)),
+ (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>;
// ROTL
-def : Pat<(rotl G8RC:$in, GPRC:$sh),
- (RLDCL G8RC:$in, GPRC:$sh, 0)>;
-def : Pat<(rotl G8RC:$in, (i32 imm:$imm)),
- (RLDICL G8RC:$in, imm:$imm, 0)>;
+def : Pat<(rotl i64:$in, i32:$sh),
+ (RLDCL $in, $sh, 0)>;
+def : Pat<(rotl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, imm:$imm, 0)>;
// Hi and Lo for Darwin Global Addresses.
def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
@@ -967,15 +944,25 @@ def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>;
-def : Pat<(PPChi tglobaltlsaddr:$g, G8RC:$in),
- (ADDIS8 G8RC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(PPClo tglobaltlsaddr:$g, G8RC:$in),
- (ADDI8L G8RC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
- (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
- (ADDIS8 G8RC:$in, tconstpool:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
- (ADDIS8 G8RC:$in, tjumptable:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
- (ADDIS8 G8RC:$in, tblockaddress:$g)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in),
+ (ADDIS8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in),
+ (ADDI8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS8 $in, tglobaladdr:$g)>;
+def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS8 $in, tconstpool:$g)>;
+def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS8 $in, tjumptable:$g)>;
+def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS8 $in, tblockaddress:$g)>;
+
+// Patterns to match r+r indexed loads and stores for
+// addresses without at least 4-byte alignment.
+def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+ (LWAX xoaddr:$src)>;
+def : Pat<(i64 (unaligned4load xoaddr:$src)),
+ (LDX xoaddr:$src)>;
+def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
+ (STDX $rS, xoaddr:$dst)>;
+
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 0cf28ae4b5..fff91df418 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -161,27 +161,90 @@ def vecspltisw : PatLeaf<(build_vector), [{
//===----------------------------------------------------------------------===//
// Helpers for defining instructions that directly correspond to intrinsics.
-// VA1a_Int - A VAForm_1a intrinsic definition.
+// VA1a_Int - A VAForm_1a intrinsic definition of generic type.
class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
: VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
!strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
[(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
-// VX1_Int - A VXForm_1 intrinsic definition.
+// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
+class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
+
+// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
+
+// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two
+// input types and an output type.
+class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set OutTy:$vD,
+ (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
+
+// VX1_Int - A VXForm_1 intrinsic definition of generic type.
class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
: VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
!strconcat(opc, " $vD, $vA, $vB"), VecFP,
[(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
-// VX2_Int - A VXForm_2 intrinsic definition.
+// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
+class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
+
+// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
+
+// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two
+// input types and an output type.
+class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
+
+// VX2_Int - A VXForm_1 intrinsic definition of generic type.
class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
: VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
!strconcat(opc, " $vD, $vB"), VecFP,
[(set VRRC:$vD, (IntID VRRC:$vB))]>;
+// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
+class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ !strconcat(opc, " $vD, $vB"), VecFP,
+ [(set v4f32:$vD, (IntID v4f32:$vB))]>;
+
+// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ !strconcat(opc, " $vD, $vB"), VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vB))]>;
+
//===----------------------------------------------------------------------===//
// Instruction Definitions.
+def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">;
+let Predicates = [HasAltivec] in {
+
+let isCodeGenOnly = 1 in {
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
"dss $STRM", LdStLoad /*FIXME*/, []>;
@@ -213,74 +276,79 @@ def DSTST64 : DSS_Form<374, (outs),
def DSTSTT64 : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
"dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+}
def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
"mfvscr $vD", LdStStore,
- [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>;
+ [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
"mtvscr $vB", LdStLoad,
- [(int_ppc_altivec_mtvscr VRRC:$vB)]>;
+ [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src),
"lvebx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+ [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src),
"lvehx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+ [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src),
"lvewx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+ [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
"lvx $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+ [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
"lvxl $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+ [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
}
def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src),
"lvsl $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
PPC970_Unit_LSU;
def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src),
"lvsr $vD, $src", LdStLoad,
- [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
PPC970_Unit_LSU;
let PPC970_Unit = 2 in { // Stores.
def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
"stvebx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
"stvehx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
"stvewx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
"stvx $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
"stvxl $rS, $dst", LdStStore,
- [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+ [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
}
let PPC970_Unit = 5 in { // VALU Operations.
// VA-Form instructions. 3-input AltiVec ops.
def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vmaddfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>;
+ [(set v4f32:$vD,
+ (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vnmsubfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC,
- (fneg VRRC:$vB))))]>;
+ [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
+ (fneg v4f32:$vB))))]>;
+
+def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
+def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
+ v8i16>;
+def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
-def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
-def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
-def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>;
-def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>;
-def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>;
+def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
+ v4i32, v4i32, v16i8>;
+def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
// Shuffles.
def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
@@ -291,25 +359,25 @@ def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
// VX-Form instructions. AltiVec arithmetic ops.
def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vaddfp $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+ [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vaddubm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+ [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vadduhm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+ [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vadduwm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
-def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
-def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
-def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
-def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
-def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
-def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
-def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
+def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
+def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>;
+def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>;
+def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>;
+def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
+def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
@@ -322,20 +390,20 @@ def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vcfsx $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vcfux $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vctsxs $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vctuxs $vD, $vB, $UIMM", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
// Defines with the UIM field set to 0 for floating-point
// to integer (fp_to_sint/fp_to_uint) conversions and integer
@@ -343,49 +411,49 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
let VA = 0 in {
def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB),
"vcfsx $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB),
"vctuxs $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB),
"vcfux $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vcfux VRRC:$vB, 0))]>;
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB),
"vctsxs $vD, $vB, 0", VecFP,
- [(set VRRC:$vD,
- (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>;
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
}
-def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
-def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>;
-
-def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
-def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
-def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
-def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
-def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
-def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
-
-def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
-def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
-def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
-def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
-def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>;
-def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>;
-def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
-def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
-def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
-def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
-def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
-def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
-def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
-def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>;
+def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>;
+def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>;
+def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>;
+def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>;
+def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>;
+
+def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>;
+def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>;
+def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>;
+def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>;
+def VMAXUB : VX1_Int_Ty< 2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>;
+def VMAXUH : VX1_Int_Ty< 66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>;
+def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>;
+def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>;
+def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>;
+def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>;
+def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>;
+def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
+def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
+def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghb $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghh $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
@@ -394,7 +462,7 @@ def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
[(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglb $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
+ [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglh $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
@@ -402,55 +470,74 @@ def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglw $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
-def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
-def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
-def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
-def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
-def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
-def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
-
-def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
-def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
-def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
-def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
-def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
-def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
-def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>;
-def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
+ v4i32, v16i8, v4i32>;
+def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm,
+ v4i32, v8i16, v4i32>;
+def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
+ v4i32, v8i16, v4i32>;
+def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm,
+ v4i32, v16i8, v4i32>;
+def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
+ v4i32, v8i16, v4i32>;
+def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
+ v4i32, v8i16, v4i32>;
+
+def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
+ v8i16, v16i8>;
+def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh,
+ v4i32, v8i16>;
+def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub,
+ v8i16, v16i8>;
+def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh,
+ v4i32, v8i16>;
+def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb,
+ v8i16, v16i8>;
+def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh,
+ v4i32, v8i16>;
+def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub,
+ v8i16, v16i8>;
+def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
+ v4i32, v8i16>;
-def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>;
-def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>;
-def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>;
-def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>;
-def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>;
-def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>;
+def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>;
+def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>;
+def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>;
+def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
-def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsubfp $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+ [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsububm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+ [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsubuhm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+ [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vsubuwm $vD, $vA, $vB", VecGeneral,
- [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+ [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
-def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
-def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
-def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
-def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
-def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
-def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
-def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
-def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
-def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
-def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
-def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
+def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
+def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>;
+def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>;
+def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>;
+def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
+
+def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
+def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
+
+def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+ v4i32, v16i8, v4i32>;
+def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
+ v4i32, v8i16, v4i32>;
+def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
+ v4i32, v16i8, v4i32>;
def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vnor $vD, $vA, $vB", VecFP,
@@ -463,15 +550,16 @@ def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vxor $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
-def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>;
-def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>;
-def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>;
+def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>;
+def VRLW : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>;
-def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
-def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
-def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
-def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
-def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+def VSL : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl, v4i32 >;
+def VSLO : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>;
+
+def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
+def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
+def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vspltb $vD, $vB, $UIMM", VecPerm,
@@ -486,60 +574,74 @@ def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
[(set VRRC:$vD,
(vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
-def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>;
-def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
-def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
-def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
-def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
-def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
-def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
-def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>;
+def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>;
+
+def VSRAB : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>;
+def VSRAH : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>;
+def VSRAW : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>;
+def VSRB : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>;
+def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
+def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
"vspltisb $vD, $SIMM", VecPerm,
- [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+ [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
"vspltish $vD, $SIMM", VecPerm,
- [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+ [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
"vspltisw $vD, $SIMM", VecPerm,
- [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+ [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
// Vector Pack.
-def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
-def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
-def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
-def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
-def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx,
+ v8i16, v4i32>;
+def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
+ v16i8, v8i16>;
+def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
+ v16i8, v8i16>;
+def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
+ v16i8, v4i32>;
+def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
+ v8i16, v4i32>;
def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vpkuhum $vD, $vA, $vB", VecFP,
[(set VRRC:$vD,
(vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
-def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
+ v16i8, v8i16>;
def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vpkuwum $vD, $vA, $vB", VecFP,
[(set VRRC:$vD,
(vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
-def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
+ v8i16, v4i32>;
// Vector Unpack.
-def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
-def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
-def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
-def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
-def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
-def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx,
+ v4i32, v8i16>;
+def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb,
+ v8i16, v16i8>;
+def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh,
+ v4i32, v8i16>;
+def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx,
+ v4i32, v8i16>;
+def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb,
+ v8i16, v16i8>;
+def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
+ v4i32, v8i16>;
// Altivec Comparisons.
class VCMP<bits<10> xo, string asmstr, ValueType Ty>
: VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
- [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+ [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
: VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
- [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+ [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
let Defs = [CR6];
let RC = 1;
}
@@ -578,13 +680,14 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+let isCodeGenOnly = 1 in
def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
"vxor $vD, $vD, $vD", VecFP,
[(set VRRC:$vD, (v4i32 immAllZerosV))]>;
let IMM=-1 in {
def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins),
"vspltisw $vD, -1", VecFP,
- [(set VRRC:$vD, (v4i32 immAllOnesV))]>;
+ [(set v4i32:$vD, (v4i32 immAllOnesV))]>;
}
} // VALU Operations.
@@ -597,31 +700,31 @@ def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
// * 32-bit
-def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
- (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM),
+ (DST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM),
+ (DSTT 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM),
+ (DSTST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM),
+ (DSTSTT 1, imm:$STRM, $rA, $rB)>;
// * 64-bit
-def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM),
- (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM),
+ (DST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM),
+ (DSTT64 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM),
+ (DSTST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM),
+ (DSTSTT64 1, imm:$STRM, $rA, $rB)>;
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
// Stores.
-def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
- (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+def : Pat<(store v4i32:$rS, xoaddr:$dst),
+ (STVX $rS, xoaddr:$dst)>;
// Bit conversions.
def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
@@ -643,93 +746,96 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
// Shuffles.
// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
-def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
- (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
-def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VPKUWUM VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef),
+ (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm VRRC:$in))>;
+def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
+ (VPKUWUM $vA, $vA)>;
+def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
+ (VPKUHUM $vA, $vA)>;
// Match vmrg*(x,x)
-def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGLB VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGLH VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGLW VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGHB VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGHH VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
- (VMRGHW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
+ (VMRGLB $vA, $vA)>;
+def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef),
+ (VMRGLH $vA, $vA)>;
+def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef),
+ (VMRGLW $vA, $vA)>;
+def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef),
+ (VMRGHB $vA, $vA)>;
+def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
+ (VMRGHH $vA, $vA)>;
+def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
+ (VMRGHW $vA, $vA)>;
// Logical Operations
-def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR $vA, $vA)>;
def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
- (VNOR VRRC:$A, VRRC:$B)>;
+ (VNOR $A, $B)>;
def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
- (VANDC VRRC:$A, VRRC:$B)>;
+ (VANDC $A, $B)>;
-def : Pat<(fmul VRRC:$vA, VRRC:$vB),
- (VMADDFP VRRC:$vA, VRRC:$vB,
+def : Pat<(fmul v4f32:$vA, v4f32:$vB),
+ (VMADDFP $vA, $vB,
(v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>;
// Fused multiply add and multiply sub for packed float. These are represented
// separately from the real instructions above, for operations that must have
// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
-def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
-def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
-def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
-def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
- (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
- (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
+ (VPERM $vA, $vB, $vC)>;
// Vector shifts
-def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
- (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
- (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
- (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>;
-
-def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
- (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
- (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
- (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>;
-
-def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
- (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
- (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
- (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSLB $vA, $vB))>;
+def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSLH $vA, $vB))>;
+def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSLW $vA, $vB))>;
+
+def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRB $vA, $vB))>;
+def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRH $vA, $vB))>;
+def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRW $vA, $vB))>;
+
+def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRAB $vA, $vB))>;
+def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRAH $vA, $vB))>;
+def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRAW $vA, $vB))>;
// Float to integer and integer to float conversions
-def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))),
- (VCTSXS_0 VRRC:$vA)>;
-def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))),
- (VCTUXS_0 VRRC:$vA)>;
-def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))),
- (VCFSX_0 VRRC:$vA)>;
-def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))),
- (VCFUX_0 VRRC:$vA)>;
+def : Pat<(v4i32 (fp_to_sint v4f32:$vA)),
+ (VCTSXS_0 $vA)>;
+def : Pat<(v4i32 (fp_to_uint v4f32:$vA)),
+ (VCTUXS_0 $vA)>;
+def : Pat<(v4f32 (sint_to_fp v4i32:$vA)),
+ (VCFSX_0 $vA)>;
+def : Pat<(v4f32 (uint_to_fp v4i32:$vA)),
+ (VCFUX_0 $vA)>;
// Floating-point rounding
-def : Pat<(v4f32 (ffloor (v4f32 VRRC:$vA))),
- (VRFIM VRRC:$vA)>;
-def : Pat<(v4f32 (fceil (v4f32 VRRC:$vA))),
- (VRFIP VRRC:$vA)>;
-def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))),
- (VRFIZ VRRC:$vA)>;
-def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))),
- (VRFIN VRRC:$vA)>;
+def : Pat<(v4f32 (ffloor v4f32:$vA)),
+ (VRFIM $vA)>;
+def : Pat<(v4f32 (fceil v4f32:$vA)),
+ (VRFIP $vA)>;
+def : Pat<(v4f32 (ftrunc v4f32:$vA)),
+ (VRFIZ $vA)>;
+def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
+ (VRFIN $vA)>;
+
+} // end HasAltivec
+
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index c3c171cd21..400b7e367b 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
let CR = 0;
}
+class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = bi;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
// 1.7.4 D-Form
class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -664,14 +676,13 @@ class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
// This is probably 1.7.9, but I don't have the reference that uses this
// numbering scheme...
class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- string cstr, InstrItinClass itin, list<dag>pattern>
+ InstrItinClass itin, list<dag>pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<8> FM;
bits<5> rT;
bit RC = 0; // set by isDOT
let Pattern = pattern;
- let Constraints = cstr;
let Inst{6} = 0;
let Inst{7-14} = FM;
@@ -765,16 +776,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
bits<5> RT;
bits<5> RA;
bits<5> RB;
- bits<7> BIBO; // 2 bits of BI and 5 bits of BO (must be 12).
- bits<3> CR;
+ bits<5> COND;
let Pattern = pattern;
let Inst{6-10} = RT;
let Inst{11-15} = RA;
let Inst{16-20} = RB;
- let Inst{21-23} = CR;
- let Inst{24-25} = BIBO{6-5};
+ let Inst{21-25} = COND;
let Inst{26-30} = xo;
let Inst{31} = 0;
}
@@ -987,6 +996,7 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
//===----------------------------------------------------------------------===//
class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
: I<0, OOL, IOL, asmstr, NoItinerary> {
+ let isCodeGenOnly = 1;
let PPC64 = 0;
let Pattern = pattern;
let Inst{31-0} = 0;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index a0517a80a9..69c54ed084 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -33,11 +33,6 @@
#define GET_INSTRINFO_CTOR
#include "PPCGenInstrInfo.inc"
-namespace llvm {
-extern cl::opt<bool> DisablePPC32RS;
-extern cl::opt<bool> DisablePPC64RS;
-}
-
using namespace llvm;
static cl::
@@ -99,12 +94,18 @@ bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
+ // Note: This list must be kept consistent with LoadRegFromStackSlot.
switch (MI->getOpcode()) {
default: break;
case PPC::LD:
case PPC::LWZ:
case PPC::LFS:
case PPC::LFD:
+ case PPC::RESTORE_CR:
+ case PPC::LVX:
+ case PPC::RESTORE_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
MI->getOperand(2).isFI()) {
FrameIndex = MI->getOperand(2).getIndex();
@@ -117,12 +118,18 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
+ // Note: This list must be kept consistent with StoreRegToStackSlot.
switch (MI->getOpcode()) {
default: break;
case PPC::STD:
case PPC::STW:
case PPC::STFS:
case PPC::STFD:
+ case PPC::SPILL_CR:
+ case PPC::STVX:
+ case PPC::SPILL_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
MI->getOperand(2).isFI()) {
FrameIndex = MI->getOperand(2).getIndex();
@@ -444,40 +451,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill,
int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional store instructions are added here,
+ // update isStoreToStackSlot.
+
DebugLoc DL;
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
- if (SrcReg != PPC::LR) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- } else {
- // FIXME: this spills LR immediately to memory in one step. To do this,
- // we use R11, which we know cannot be used in the prolog/epilog. This is
- // a hack.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
- .addReg(PPC::R11,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
} else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
- if (SrcReg != PPC::LR8) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- } else {
- // FIXME: this spills LR immediately to memory in one step. To do this,
- // we use X11, which we know cannot be used in the prolog/epilog. This is
- // a hack.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
- .addReg(PPC::X11,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
.addReg(SrcReg,
@@ -489,47 +478,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
-
- bool is64Bit = TM.getSubtargetImpl()->isPPC64();
- // We need to store the CR in the low 4-bits of the saved value. First,
- // issue a MFCR to save all of the CRBits.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- (is64Bit ? PPC::X2 : PPC::R2) :
- (is64Bit ? PPC::X0 : PPC::R0);
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
- PPC::MFCRpseud), ScratchReg)
- .addReg(SrcReg, getKillRegState(isKill)));
-
- // If the saved register wasn't CR0, shift the bits left so that they are
- // in CR0's slot.
- if (SrcReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
- // rlwinm scratch, scratch, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
- PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(ShiftBits)
- .addImm(0).addImm(31));
- }
-
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
- PPC::STW8 : PPC::STW))
- .addReg(ScratchReg,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
// FIXME: We use CRi here because there is no mtcrf on a bit. Since the
// backend currently only uses CR1EQ as an individual bit, this should
@@ -562,23 +515,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
Reg = PPC::CR7;
return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // STVX VAL, 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
- .addReg(SrcReg, getKillRegState(isKill))
- .addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(TM.getSubtargetImpl()->isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ SpillsVRS = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -595,10 +547,19 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
- if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
FuncInfo->setSpillsCR();
- }
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
@@ -616,25 +577,17 @@ bool
PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional load instructions are added here,
+ // update isLoadFromStackSlot.
+
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
- if (DestReg != PPC::LR) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- DestReg), FrameIdx));
- } else {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- PPC::R11), FrameIdx));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ DestReg), FrameIdx));
} else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
- if (DestReg != PPC::LR8) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
- FrameIdx));
- } else {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
- PPC::X11), FrameIdx));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+ FrameIdx));
} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
FrameIdx));
@@ -642,37 +595,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
- get(PPC::RESTORE_CR), DestReg)
- , FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- ScratchReg), FrameIdx));
-
- // If the reloaded register isn't CR0, shift the bits right so that they are
- // in the right CR's slot.
- if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
- // rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
- .addImm(31));
- }
-
- NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
- PPC::MTCRF8 : PPC::MTCRF), DestReg)
- .addReg(ScratchReg));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
unsigned Reg = 0;
@@ -702,21 +628,20 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
Reg = PPC::CR7;
return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // Dest = LVX 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(TM.getSubtargetImpl()->isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_VRSAVE),
+ DestReg),
+ FrameIdx));
+ SpillsVRS = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -734,10 +659,21 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
- if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
FuncInfo->setSpillsCR();
- }
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
@@ -786,8 +722,8 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case PPC::GC_LABEL:
case PPC::DBG_VALUE:
return 0;
- case PPC::BL8_NOP_ELF:
- case PPC::BLA8_NOP_ELF:
+ case PPC::BL8_NOP:
+ case PPC::BLA8_NOP:
return 8;
default:
return 4; // PowerPC instructions are all 4 bytes
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 374213ea43..635e3480b0 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
public:
explicit PPCInstrInfo(PPCTargetMachine &TM);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 460e94342d..067f5aacfa 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -20,6 +20,10 @@ include "PPCInstrFormats.td"
def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
SDTCisVT<0, f64>, SDTCisPtrTy<1>
]>;
+def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+
def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -36,10 +40,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
]>;
def SDT_PPClbrx : SDTypeProfile<1, 2, [
- SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPCstbrx : SDTypeProfile<0, 3, [
- SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPClarx : SDTypeProfile<1, 1, [
@@ -53,32 +57,33 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
-def SDT_PPCnop : SDTypeProfile<0, 0, []>;
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
-def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;
+def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;
+def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>;
+def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>;
def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
+def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
[SDNPHasChain, SDNPMayStore]>;
+def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+
+// Extract FPSCR (not modeled at the DAG level).
+def PPCmffs : SDNode<"PPCISD::MFFS",
+ SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
+
+// Perform FADD in round-to-zero mode.
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
-// This sequence is used for long double->int conversions. It changes the
-// bits in the FPSCR which is not modelled.
-def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
- [SDNPOutGlue]>;
-def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
- [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3,
- [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
- SDTCisVT<3, f64>]>,
- [SDNPInGlue]>;
def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
@@ -113,10 +118,6 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
-def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>;
-def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore,
- [SDNPHasChain, SDNPMayStore]>;
-
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
@@ -124,16 +125,12 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
-def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
+def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
@@ -144,13 +141,9 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-
-def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
+def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -158,6 +151,14 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
@@ -278,6 +279,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
}], HI16>;
+// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// restricted memrix (offset/4) constants are alignment sensitive. If these
+// offsets are hidden behind TOC entries than the values of the lower-order
+// bits cannot be checked directly. As a result, we need to also incorporate
+// an alignment check into the relevant patterns.
+
+def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4pre_store : PatFrag<
+ (ops node:$val, node:$base, node:$offset),
+ (pre_store node:$val, node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@@ -314,9 +347,6 @@ def s16imm : Operand<i32> {
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
}
-def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing.
- let PrintMethod = "printS16X4ImmOperand";
-}
def directbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
@@ -344,26 +374,37 @@ def crbitm: Operand<i8> {
let EncoderMethod = "get_crbitm_encoding";
}
// Address operands
+// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
+def ptr_rc_nor0 : PointerLikeRegClass<1>;
+
+def dispRI : Operand<iPTR>;
+def dispRIX : Operand<iPTR>;
+
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIEncoding";
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg);
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg);
}
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
- let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
-// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
-// that doesn't matter.
-def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
- (ops (i32 20), (i32 zero_reg))> {
+// A single-register address. This is used with the SjLj
+// pseudo-instructions.
+def memr : Operand<iPTR> {
+ let MIOperandInfo = (ops ptr_rc:$ptrreg);
+}
+
+// PowerPC Predicate operand.
+def pred : Operand<OtherVT> {
let PrintMethod = "printPredicateOperand";
+ let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg);
}
// Define PowerPC specific addressing mode.
@@ -372,9 +413,12 @@ def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+// The address in a single register. This is used with the SjLj
+// pseudo-instructions.
+def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
+
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
-def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
@@ -401,17 +445,22 @@ def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
let Defs = [R1], Uses = [R1] in
def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC",
- [(set GPRC:$result,
- (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
+ [(set i32:$result,
+ (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
let usesCustomInserter = 1, // Expanded after instruction selection.
PPC970_Single = 1 in {
- def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
+ // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
+ // because either operand might become the first operand in an isel, and
+ // that operand cannot be r0.
+ def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond,
+ GPRC_NOR0:$T, GPRC_NOR0:$F,
i32imm:$BROPC), "#SELECT_CC_I4",
[]>;
- def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
+ def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond,
+ G8RC_NOX0:$T, G8RC_NOX0:$F,
i32imm:$BROPC), "#SELECT_CC_I8",
[]>;
def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
@@ -438,10 +487,9 @@ def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
"#RESTORE_CR", []>;
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
- let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in
- def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
- "b${p:cc}lr ${p:reg}", BrB,
- [(retflag)]>;
+ let isReturn = 1, Uses = [LR, RM] in
+ def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
+ [(retflag)]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
}
@@ -473,46 +521,29 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
}
}
-// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
- // Convenient aliases for call instructions
- let Uses = [RM] in {
- def BL_Darwin : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
- def BLA_Darwin : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
- }
- let Uses = [CTR, RM] in {
- def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
+// The direct BCL used by the SjLj setjmp code.
+let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in {
+ let Defs = [LR], Uses = [RM] in {
+ def BCL : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bcl 20, 31, $dst">;
}
}
-// SVR4 ABI Calls.
let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
- def BL_SVR4 : IForm<18, 0, 1,
- (outs), (ins calltarget:$func),
- "bl $func", BrB, []>; // See Pat patterns below.
- def BLA_SVR4 : IForm<18, 1, 1,
- (outs), (ins aaddr:$func),
- "bla $func", BrB,
- [(PPCcall_SVR4 (i32 imm:$func))]>;
+ def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+ "bla $func", BrB, [(PPCcall (i32 imm:$func))]>;
}
let Uses = [CTR, RM] in {
- def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
- (outs), (ins),
- "bctrl", BrB,
- [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
+ def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", BrB, [(PPCbctrl)]>,
+ Requires<[In32BitMode]>;
}
}
-
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi :Pseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
@@ -531,6 +562,8 @@ def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
[]>;
+let isCodeGenOnly = 1 in {
+
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
@@ -544,6 +577,7 @@ def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
"b $dst", BrB,
[]>;
+}
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
@@ -551,6 +585,22 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
"ba $dst", BrB,
[]>;
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP32",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In32BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP32",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In32BitMode]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+ "#EH_SjLj_Setup\t$dst", []>;
+}
// DCB* instructions.
def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
@@ -586,93 +636,90 @@ let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8",
- [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8",
- [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8",
- [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8",
- [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8",
- [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8",
- [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_ADD_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16",
- [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16",
- [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16",
- [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16",
- [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16",
- [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16",
- [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_ADD_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32",
- [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32",
- [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32",
- [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32",
- [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32",
- [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32",
- [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
+ [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_CMP_SWAP_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8",
- [(set GPRC:$dst,
- (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_CMP_SWAP_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
- [(set GPRC:$dst,
- (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_CMP_SWAP_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
- [(set GPRC:$dst,
- (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_SWAP_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8",
- [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
def ATOMIC_SWAP_I16 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16",
- [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
def ATOMIC_SWAP_I32 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32",
- [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
+ [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
}
}
// Instructions to support atomic operations
def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src),
"lwarx $rD, $src", LdStLWARX,
- [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
+ [(set i32:$rD, (PPClarx xoaddr:$src))]>;
let Defs = [CR0] in
def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
"stwcx. $rS, $dst", LdStSTWCX,
- [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+ [(PPCstcx i32:$rS, xoaddr:$dst)]>,
isDOT;
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
@@ -686,94 +733,94 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
"lbz $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+ [(set i32:$rD, (zextloadi8 iaddr:$src))]>;
def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
"lha $rD, $src", LdStLHA,
- [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+ [(set i32:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
"lhz $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+ [(set i32:$rD, (zextloadi16 iaddr:$src))]>;
def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
"lwz $rD, $src", LdStLoad,
- [(set GPRC:$rD, (load iaddr:$src))]>;
+ [(set i32:$rD, (load iaddr:$src))]>;
def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
"lfs $rD, $src", LdStLFD,
- [(set F4RC:$rD, (load iaddr:$src))]>;
+ [(set f32:$rD, (load iaddr:$src))]>;
def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
"lfd $rD, $src", LdStLFD,
- [(set F8RC:$rD, (load iaddr:$src))]>;
+ [(set f64:$rD, (load iaddr:$src))]>;
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1 in {
-def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhau $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfsu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfdu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// Indexed (r+r) Loads with Update (preinc).
-def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
+def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfsux $rD, $addr", LdStLFDU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
+def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfdux $rD, $addr", LdStLFDU,
- []>, RegConstraint<"$addr.offreg = $ea_result">,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
}
@@ -783,32 +830,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src),
"lbzx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+ [(set i32:$rD, (zextloadi8 xaddr:$src))]>;
def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
- [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+ [(set i32:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
"lhzx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+ [(set i32:$rD, (zextloadi16 xaddr:$src))]>;
def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src),
"lwzx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (load xaddr:$src))]>;
+ [(set i32:$rD, (load xaddr:$src))]>;
def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
"lhbrx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
"lwbrx $rD, $src", LdStLoad,
- [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
"lfsx $frD, $src", LdStLFD,
- [(set F4RC:$frD, (load xaddr:$src))]>;
+ [(set f32:$frD, (load xaddr:$src))]>;
def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
"lfdx $frD, $src", LdStLFD,
- [(set F8RC:$frD, (load xaddr:$src))]>;
+ [(set f64:$frD, (load xaddr:$src))]>;
+
+def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
+ "lfiwax $frD, $src", LdStLFD,
+ [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
+def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
+ "lfiwzx $frD, $src", LdStLFD,
+ [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
}
//===----------------------------------------------------------------------===//
@@ -819,137 +873,128 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
let PPC970_Unit = 2 in {
def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
"stb $rS, $src", LdStStore,
- [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+ [(truncstorei8 i32:$rS, iaddr:$src)]>;
def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
"sth $rS, $src", LdStStore,
- [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+ [(truncstorei16 i32:$rS, iaddr:$src)]>;
def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
"stw $rS, $src", LdStStore,
- [(store GPRC:$rS, iaddr:$src)]>;
+ [(store i32:$rS, iaddr:$src)]>;
def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
"stfs $rS, $dst", LdStSTFD,
- [(store F4RC:$rS, iaddr:$dst)]>;
+ [(store f32:$rS, iaddr:$dst)]>;
def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
"stfd $rS, $dst", LdStSTFD,
- [(store F8RC:$rS, iaddr:$dst)]>;
+ [(store f64:$rS, iaddr:$dst)]>;
}
// Unindexed (r+i) Stores with Update (preinc).
-let PPC970_Unit = 2 in {
-def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU,
- [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU,
- [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst),
+ "stfsu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst),
+ "stfdu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
}
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFSU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFDU $rS, iaddroff:$ptroff, $ptrreg)>;
// Indexed (r+r) Stores.
-//
let PPC970_Unit = 2 in {
def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
"stbx $rS, $dst", LdStStore,
- [(truncstorei8 GPRC:$rS, xaddr:$dst)]>,
+ [(truncstorei8 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
"sthx $rS, $dst", LdStStore,
- [(truncstorei16 GPRC:$rS, xaddr:$dst)]>,
+ [(truncstorei16 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
- [(store GPRC:$rS, xaddr:$dst)]>,
- PPC970_DGroup_Cracked;
-
-def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 GPRC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ [(store i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 GPRC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
- (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU,
- [(set ptr_rc:$ea_res,
- (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
- (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU,
- [(set ptr_rc:$ea_res,
- (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
"sthbrx $rS, $dst", LdStStore,
- [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
PPC970_DGroup_Cracked;
def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
"stwbrx $rS, $dst", LdStStore,
- [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
PPC970_DGroup_Cracked;
def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
"stfiwx $frS, $dst", LdStSTFD,
- [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+ [(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
"stfsx $frS, $dst", LdStSTFD,
- [(store F4RC:$frS, xaddr:$dst)]>;
+ [(store f32:$frS, xaddr:$dst)]>;
def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
"stfdx $frS, $dst", LdStSTFD,
- [(store F8RC:$frS, xaddr:$dst)]>;
+ [(store f64:$frS, xaddr:$dst)]>;
}
+// Indexed (r+r) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst),
+ "stfsux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst),
+ "stfdux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFDUX $rS, $ptrreg, $ptroff)>;
+
def SYNC : XForm_24_sync<31, 598, (outs), (ins),
"sync", LdStSync,
[(int_ppc_sync)]>;
@@ -959,68 +1004,66 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
//
let PPC970_Unit = 1 in { // FXU Operations.
-def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
-def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm),
- "addi $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+ [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>;
let Defs = [CARRY] in {
def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"addic $rD, $rA, $imm", IntGeneral,
- [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
+ [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>,
PPC970_DGroup_Cracked;
def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"addic. $rD, $rA, $imm", IntGeneral,
[]>;
}
-def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm),
"addis $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
-def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+ [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
+let isCodeGenOnly = 1 in
+def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym),
"la $rD, $sym($rA)", IntGeneral,
- [(set GPRC:$rD, (add GPRC:$rA,
+ [(set i32:$rD, (add i32:$rA,
(PPClo tglobaladdr:$sym, 0)))]>;
def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"mulli $rD, $rA, $imm", IntMulLI,
- [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+ [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>;
let Defs = [CARRY] in {
def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"subfic $rD, $rA, $imm", IntGeneral,
- [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+ [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
"li $rD, $imm", IntSimple,
- [(set GPRC:$rD, immSExt16:$imm)]>;
+ [(set i32:$rD, immSExt16:$imm)]>;
def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
"lis $rD, $imm", IntSimple,
- [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+ [(set i32:$rD, imm16ShiftedSExt:$imm)]>;
}
}
let PPC970_Unit = 1 in { // FXU Operations.
def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"andi. $dst, $src1, $src2", IntGeneral,
- [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+ [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
isDOT;
def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"andis. $dst, $src1, $src2", IntGeneral,
- [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+ [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"ori $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+ [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"oris $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+ [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"xori $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+ [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
"xoris $dst, $src1, $src2", IntSimple,
- [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+ [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
[]>;
def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
@@ -1033,38 +1076,38 @@ def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
let PPC970_Unit = 1 in { // FXU Operations.
def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"nand $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
+ [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"and $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"andc $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
+ [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"or $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"nor $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
+ [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"orc $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
+ [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"eqv $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
+ [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"xor $rA, $rS, $rB", IntSimple,
- [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"slw $rA, $rS, $rB", IntGeneral,
- [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"srw $rA, $rS, $rB", IntGeneral,
- [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
let Defs = [CARRY] in {
def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
"sraw $rA, $rS, $rB", IntShift,
- [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+ [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
}
}
@@ -1072,17 +1115,17 @@ let PPC970_Unit = 1 in { // FXU Operations.
let Defs = [CARRY] in {
def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH),
"srawi $rA, $rS, $SH", IntShift,
- [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+ [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
}
def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS),
"cntlzw $rA, $rS", IntGeneral,
- [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
+ [(set i32:$rA, (ctlz i32:$rS))]>;
def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
"extsb $rA, $rS", IntSimple,
- [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
+ [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
"extsh $rA, $rS", IntSimple,
- [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
+ [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
"cmpw $crD, $rA, $rB", IntCompare>;
@@ -1100,16 +1143,54 @@ def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
let Uses = [RM] in {
def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
"fctiwz $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
+ [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+
def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
"frsp $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fround F8RC:$frB))]>;
+ [(set f32:$frD, (fround f64:$frB))]>;
+
+ // The frin -> nearbyint mapping is valid only in fast-math mode.
+ def FRIND : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frin $frD, $frB", FPGeneral,
+ [(set f64:$frD, (fnearbyint f64:$frB))]>;
+ def FRINS : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frin $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fnearbyint f32:$frB))]>;
+
+ // These pseudos expand to rint but also set FE_INEXACT when the result does
+ // not equal the argument.
+ let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
+ def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB),
+ "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
+ def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB),
+ "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
+ }
+
+ def FRIPD : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frip $frD, $frB", FPGeneral,
+ [(set f64:$frD, (fceil f64:$frB))]>;
+ def FRIPS : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frip $frD, $frB", FPGeneral,
+ [(set f32:$frD, (fceil f32:$frB))]>;
+ def FRIZD : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB),
+ "friz $frD, $frB", FPGeneral,
+ [(set f64:$frD, (ftrunc f64:$frB))]>;
+ def FRIZS : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB),
+ "friz $frD, $frB", FPGeneral,
+ [(set f32:$frD, (ftrunc f32:$frB))]>;
+ def FRIMD : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frim $frD, $frB", FPGeneral,
+ [(set f64:$frD, (ffloor f64:$frB))]>;
+ def FRIMS : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frim $frD, $frB", FPGeneral,
+ [(set f32:$frD, (ffloor f32:$frB))]>;
+
def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
"fsqrt $frD, $frB", FPSqrt,
- [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
+ [(set f64:$frD, (fsqrt f64:$frB))]>;
def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
"fsqrts $frD, $frB", FPSqrt,
- [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+ [(set f32:$frD, (fsqrt f32:$frB))]>;
}
}
@@ -1119,29 +1200,29 @@ let Uses = [RM] in {
/// sneak into a d-group with a store).
def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
"fmr $frD, $frB", FPGeneral,
- []>, // (set F4RC:$frD, F4RC:$frB)
+ []>, // (set f32:$frD, f32:$frB)
PPC970_Unit_Pseudo;
let PPC970_Unit = 3 in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
"fabs $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fabs F4RC:$frB))]>;
+ [(set f32:$frD, (fabs f32:$frB))]>;
def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
"fabs $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fabs F8RC:$frB))]>;
+ [(set f64:$frD, (fabs f64:$frB))]>;
def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
"fnabs $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
+ [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
"fnabs $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
+ [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
"fneg $frD, $frB", FPGeneral,
- [(set F4RC:$frD, (fneg F4RC:$frB))]>;
+ [(set f32:$frD, (fneg f32:$frB))]>;
def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
"fneg $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fneg F8RC:$frB))]>;
+ [(set f64:$frD, (fneg f64:$frB))]>;
}
@@ -1161,6 +1242,7 @@ def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD),
"cror $CRD, $CRA, $CRB", BrCR,
[]>;
+let isCodeGenOnly = 1 in {
def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
"creqv $dst, $dst, $dst", BrCR,
[]>;
@@ -1178,6 +1260,7 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
"crxor 6, 6, 6", BrCR,
[(PPCcr6unset)]>;
}
+}
// XFX-Form instructions. Instructions that deal with SPRs.
//
@@ -1186,7 +1269,7 @@ def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
"mfctr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in {
+let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -1213,6 +1296,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
+let isCodeGenOnly = 1 in {
+ def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
+ (outs VRSAVERC:$reg), (ins GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT),
+ (ins VRSAVERC:$reg),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
+// so we'll need to scavenge a register for it.
+let mayStore = 1 in
+def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+ "#SPILL_VRSAVE", []>;
+
+// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+ "#RESTORE_VRSAVE", []>;
+
def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1227,6 +1333,7 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
// instruction to keep the register allocator from becoming confused.
//
// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
+let isCodeGenOnly = 1 in
def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
"#MFCRpseud", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1239,38 +1346,29 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
-// Instructions to manipulate FPSCR. Only long double handling uses these.
-// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+// Pseudo instruction to perform FADD in round-to-zero mode.
+let usesCustomInserter = 1, Uses = [RM] in {
+ def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "",
+ [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+}
+// The above pseudo gets expanded to make use of the following instructions
+// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
let Uses = [RM], Defs = [RM] in {
def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
- "mtfsb0 $FM", IntMTFSB0,
- [(PPCmtfsb0 (i32 imm:$FM))]>,
+ "mtfsb0 $FM", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
- "mtfsb1 $FM", IntMTFSB0,
- [(PPCmtfsb1 (i32 imm:$FM))]>,
+ "mtfsb1 $FM", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
- // MTFSF does not actually produce an FP result. We pretend it copies
- // input reg B to the output. If we didn't do this it would look like the
- // instruction had no outputs (because we aren't modelling the FPSCR) and
- // it would be deleted.
- def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA),
- (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
- "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
- [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM),
- F8RC:$rT, F8RC:$FRB))]>,
+ def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT),
+ "mtfsf $FM, $rT", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
let Uses = [RM] in {
def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins),
"mffs $rT", IntMFFS,
- [(set F8RC:$rT, (PPCmffs))]>,
- PPC970_DGroup_Single, PPC970_Unit_FPU;
- def FADDrtz: AForm_2<63, 21,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fadd $FRT, $FRA, $FRB", FPAddSub,
- [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+ [(set f64:$rT, (PPCmffs))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
@@ -1281,61 +1379,61 @@ let PPC970_Unit = 1 in { // FXU Operations.
//
def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"add $rT, $rA, $rB", IntSimple,
- [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
let Defs = [CARRY] in {
def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"addc $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
+ [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
PPC970_DGroup_Cracked;
}
def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"divw $rT, $rA, $rB", IntDivW,
- [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
+ [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"divwu $rT, $rA, $rB", IntDivW,
- [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
+ [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
PPC970_DGroup_First, PPC970_DGroup_Cracked;
def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"mulhw $rT, $rA, $rB", IntMulHW,
- [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"mulhwu $rT, $rA, $rB", IntMulHWU,
- [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"mullw $rT, $rA, $rB", IntMulHW,
- [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"subf $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+ [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
let Defs = [CARRY] in {
def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"subfc $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
+ [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
PPC970_DGroup_Cracked;
}
def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"neg $rT, $rA", IntSimple,
- [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+ [(set i32:$rT, (ineg i32:$rA))]>;
let Uses = [CARRY], Defs = [CARRY] in {
def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"adde $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+ [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"addme $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
+ [(set i32:$rT, (adde i32:$rA, -1))]>;
def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"addze $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
+ [(set i32:$rT, (adde i32:$rA, 0))]>;
def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
"subfe $rT, $rA, $rB", IntGeneral,
- [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+ [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"subfme $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
+ [(set i32:$rT, (sube -1, i32:$rA))]>;
def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"subfze $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+ [(set i32:$rT, (sube 0, i32:$rA))]>;
}
}
@@ -1347,43 +1445,41 @@ let Uses = [RM] in {
def FMADD : AForm_1<63, 29,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT,
- (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
def FMADDS : AForm_1<59, 29,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT,
- (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
def FMSUB : AForm_1<63, 28,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT,
- (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>;
+ [(set f64:$FRT,
+ (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
def FMSUBS : AForm_1<59, 28,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT,
- (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>;
+ [(set f32:$FRT,
+ (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
def FNMADD : AForm_1<63, 31,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT,
- (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>;
+ [(set f64:$FRT,
+ (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
def FNMADDS : AForm_1<59, 31,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT,
- (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>;
+ [(set f32:$FRT,
+ (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
def FNMSUB : AForm_1<63, 30,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC,
- (fneg F8RC:$FRB))))]>;
+ [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
+ (fneg f64:$FRB))))]>;
def FNMSUBS : AForm_1<59, 30,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC,
- (fneg F4RC:$FRB))))]>;
+ [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
+ (fneg f32:$FRB))))]>;
}
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
// having 4 of these, force the comparison to always be an 8-byte double (code
@@ -1392,50 +1488,50 @@ let Uses = [RM] in {
def FSELD : AForm_1<63, 23,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
+ [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
def FSELS : AForm_1<63, 23,
(outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+ [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
let Uses = [RM] in {
def FADD : AForm_2<63, 21,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
"fadd $FRT, $FRA, $FRB", FPAddSub,
- [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
def FADDS : AForm_2<59, 21,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
"fadds $FRT, $FRA, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
def FDIV : AForm_2<63, 18,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
"fdiv $FRT, $FRA, $FRB", FPDivD,
- [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
def FDIVS : AForm_2<59, 18,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
"fdivs $FRT, $FRA, $FRB", FPDivS,
- [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
def FMUL : AForm_3<63, 25,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
"fmul $FRT, $FRA, $FRC", FPFused,
- [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>;
+ [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
def FMULS : AForm_3<59, 25,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
"fmuls $FRT, $FRA, $FRC", FPGeneral,
- [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>;
+ [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
def FSUB : AForm_2<63, 20,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
"fsub $FRT, $FRA, $FRB", FPAddSub,
- [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
+ [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
def FSUBS : AForm_2<59, 20,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
"fsubs $FRT, $FRA, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+ [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
}
}
let PPC970_Unit = 1 in { // FXU Operations.
def ISEL : AForm_4<31, 15,
- (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
+ (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
}
@@ -1475,47 +1571,43 @@ def : Pat<(i32 imm:$imm),
(ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
// Implement the 'not' operation with the NOR instruction.
-def NOT : Pat<(not GPRC:$in),
- (NOR GPRC:$in, GPRC:$in)>;
+def NOT : Pat<(not i32:$in),
+ (NOR $in, $in)>;
// ADD an arbitrary immediate.
-def : Pat<(add GPRC:$in, imm:$imm),
- (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+def : Pat<(add i32:$in, imm:$imm),
+ (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
// OR an arbitrary immediate.
-def : Pat<(or GPRC:$in, imm:$imm),
- (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+def : Pat<(or i32:$in, imm:$imm),
+ (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
// XOR an arbitrary immediate.
-def : Pat<(xor GPRC:$in, imm:$imm),
- (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+def : Pat<(xor i32:$in, imm:$imm),
+ (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
// SUBFIC
-def : Pat<(sub immSExt16:$imm, GPRC:$in),
- (SUBFIC GPRC:$in, imm:$imm)>;
+def : Pat<(sub immSExt16:$imm, i32:$in),
+ (SUBFIC $in, imm:$imm)>;
// SHL/SRL
-def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
- (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
-def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
- (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+def : Pat<(shl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>;
// ROTL
-def : Pat<(rotl GPRC:$in, GPRC:$sh),
- (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
-def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
- (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+def : Pat<(rotl i32:$in, i32:$sh),
+ (RLWNM $in, $sh, 0, 31)>;
+def : Pat<(rotl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, 31)>;
// RLWNM
-def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
- (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
+ (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
// Calls
-def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)),
- (BL_Darwin tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)),
- (BL_Darwin texternalsym:$dst)>;
-def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)),
- (BL_SVR4 tglobaladdr:$dst)>;
-def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)),
- (BL_SVR4 texternalsym:$dst)>;
+def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
+ (BL tglobaladdr:$dst)>;
+def : Pat<(PPCcall (i32 texternalsym:$dst)),
+ (BL texternalsym:$dst)>;
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
@@ -1538,28 +1630,28 @@ def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
-def : Pat<(PPChi tglobaltlsaddr:$g, GPRC:$in),
- (ADDIS GPRC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(PPClo tglobaltlsaddr:$g, GPRC:$in),
- (ADDIL GPRC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
- (ADDIS GPRC:$in, tglobaladdr:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
- (ADDIS GPRC:$in, tconstpool:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
- (ADDIS GPRC:$in, tjumptable:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
- (ADDIS GPRC:$in, tblockaddress:$g)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in),
+ (ADDIS $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in),
+ (ADDI $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS $in, tglobaladdr:$g)>;
+def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS $in, tconstpool:$g)>;
+def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS $in, tjumptable:$g)>;
+def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS $in, tblockaddress:$g)>;
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
// amounts.
-def : Pat<(sra GPRC:$rS, GPRC:$rB),
- (SRAW GPRC:$rS, GPRC:$rB)>;
-def : Pat<(srl GPRC:$rS, GPRC:$rB),
- (SRW GPRC:$rS, GPRC:$rB)>;
-def : Pat<(shl GPRC:$rS, GPRC:$rB),
- (SLW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(sra i32:$rS, i32:$rB),
+ (SRAW $rS, $rB)>;
+def : Pat<(srl i32:$rS, i32:$rB),
+ (SRW $rS, $rB)>;
+def : Pat<(shl i32:$rS, i32:$rB),
+ (SLW $rS, $rB)>;
def : Pat<(zextloadi1 iaddr:$src),
(LBZ iaddr:$src)>;
@@ -1582,8 +1674,8 @@ def : Pat<(f64 (extloadf32 iaddr:$src)),
def : Pat<(f64 (extloadf32 xaddr:$src)),
(COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
-def : Pat<(f64 (fextend F4RC:$src)),
- (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
+def : Pat<(f64 (fextend f32:$src)),
+ (COPY_TO_REGCLASS $src, F8RC)>;
// Memory barriers
def : Pat<(membarrier (i32 imm /*ll*/),
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 045b375dd8..ee18eadf6e 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -37,9 +37,19 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// PEI.
bool MustSaveLR;
+ /// Does this function have any stack spills.
+ bool HasSpills;
+
+ /// Does this function spill using instructions with only r+r (not r+i)
+ /// forms.
+ bool HasNonRISpills;
+
/// SpillsCR - Indicates whether CR is spilled in the current function.
bool SpillsCR;
+ /// Indicates whether VRSAVE is spilled in the current function.
+ bool SpillsVRSAVE;
+
/// LRStoreRequired - The bool indicates whether there is some explicit use of
/// the LR/LR8 stack slot that is not obvious from scanning the code. This
/// requires that the code generator produce a store of LR to the stack on
@@ -78,7 +88,10 @@ public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
ReturnAddrSaveIndex(0),
+ HasSpills(false),
+ HasNonRISpills(false),
SpillsCR(false),
+ SpillsVRSAVE(false),
LRStoreRequired(false),
MinReservedArea(0),
TailCallSPDelta(0),
@@ -109,9 +122,18 @@ public:
void setMustSaveLR(bool U) { MustSaveLR = U; }
bool mustSaveLR() const { return MustSaveLR; }
+ void setHasSpills() { HasSpills = true; }
+ bool hasSpills() const { return HasSpills; }
+
+ void setHasNonRISpills() { HasNonRISpills = true; }
+ bool hasNonRISpills() const { return HasNonRISpills; }
+
void setSpillsCR() { SpillsCR = true; }
bool isCRSpilled() const { return SpillsCR; }
+ void setSpillsVRSAVE() { SpillsVRSAVE = true; }
+ bool isVRSAVESpilled() const { return SpillsVRSAVE; }
+
void setLRStoreRequired() { LRStoreRequired = true; }
bool isLRStoreRequired() const { return LRStoreRequired; }
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index df245cc655..1d61a3a8ea 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -46,26 +46,8 @@
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
-namespace llvm {
-cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC32 register scavenger"),
- cl::Hidden);
-cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC64 register scavenger"),
- cl::Hidden);
-}
-
using namespace llvm;
-// FIXME (64-bit): Should be inlined.
-bool
-PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
- return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
- (!DisablePPC64RS && Subtarget.isPPC64()));
-}
-
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
const TargetInstrInfo &tii)
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
@@ -86,20 +68,20 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
- ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
-}
-
-bool
-PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- return requiresRegisterScavenging(MF);
+ ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
}
-
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const {
+ if (Kind == 1) {
+ if (Subtarget.isPPC64())
+ return &PPC::G8RC_NOX0RegClass;
+ return &PPC::GPRC_NOR0RegClass;
+ }
+
if (Subtarget.isPPC64())
return &PPC::G8RCRegClass;
return &PPC::GPRCRegClass;
@@ -123,12 +105,35 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
}
+const uint32_t*
+PPCRegisterInfo::getNoPreservedMask() const {
+ // The naming here is inverted: The CSR_NoRegs_Altivec has the
+ // Altivec registers masked so that they're not saved and restored around
+ // instructions with this preserved mask.
+
+ if (!Subtarget.hasAltivec())
+ return CSR_NoRegs_Altivec_RegMask;
+
+ if (Subtarget.isDarwin())
+ return CSR_NoRegs_Darwin_RegMask;
+ return CSR_NoRegs_RegMask;
+}
+
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const PPCFrameLowering *PPCFI =
static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
- Reserved.set(PPC::R0);
+ // The ZERO register is not really a register, but the representation of r0
+ // when used in instructions that treat r0 as the constant 0.
+ Reserved.set(PPC::ZERO);
+ Reserved.set(PPC::ZERO8);
+
+ // The FP register is also not really a register, but is the representation
+ // of the frame pointer register used by ISD::FRAMEADDR.
+ Reserved.set(PPC::FP);
+ Reserved.set(PPC::FP8);
+
Reserved.set(PPC::R1);
Reserved.set(PPC::LR);
Reserved.set(PPC::LR8);
@@ -139,35 +144,21 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R2); // System-reserved register
Reserved.set(PPC::R13); // Small Data Area pointer register
}
- // Reserve R2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::R2);
- }
// On PPC64, r13 is the thread pointer. Never allocate this register.
- // Note that this is over conservative, as it also prevents allocation of R31
- // when the FP is not needed.
if (Subtarget.isPPC64()) {
Reserved.set(PPC::R13);
- Reserved.set(PPC::R31);
- Reserved.set(PPC::X0);
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
- Reserved.set(PPC::X31);
+
+ if (PPCFI->needsFP(MF))
+ Reserved.set(PPC::X31);
// The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
}
- // Reserve X2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::X2);
- }
}
if (PPCFI->needsFP(MF))
@@ -185,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
switch (RC->getID()) {
default:
return 0;
+ case PPC::G8RC_NOX0RegClassID:
+ case PPC::GPRC_NOR0RegClassID:
case PPC::G8RCRegClassID:
case PPC::GPRCRegClassID: {
unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -199,38 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-bool
-PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
- switch (RC->getID()) {
- case PPC::G8RCRegClassID:
- case PPC::GPRCRegClassID:
- case PPC::F8RCRegClassID:
- case PPC::F4RCRegClassID:
- case PPC::VRRCRegClassID:
- return true;
- default:
- return false;
- }
-}
-
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
-/// register first and then a spilled callee-saved register if that fails.
-static
-unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
- const TargetRegisterClass *RC, int SPAdj) {
- assert(RS && "Register scavenging must be on");
- unsigned Reg = RS->FindUnusedReg(RC);
- // FIXME: move ARM callee-saved reg scan to target independent code, then
- // search for already spilled CS register here.
- if (Reg == 0)
- Reg = RS->scavengeRegister(RC, II, SPAdj);
- return Reg;
-}
-
/// lowerDynamicAlloc - Generate the code for allocating an object in the
/// current frame. The sequence of code with be in the general form
///
@@ -238,8 +203,7 @@ unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
///
-void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Get the instruction.
MachineInstr &MI = *II;
// Get the instruction's basic block.
@@ -271,28 +235,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
-
- // FIXME (64-bit): Use "findScratchRegister"
- unsigned Reg;
- if (requiresRegisterScavenging(MF))
- Reg = findScratchRegister(II, RS, RC, SPAdj);
- else
- Reg = PPC::R0;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
.addReg(PPC::R31)
.addImm(FrameSize);
} else if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
- .addImm(0)
- .addReg(PPC::X1);
- else
- BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
- .addImm(0)
- .addReg(PPC::X1);
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
} else {
BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
.addImm(0)
@@ -302,17 +254,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Grow the stack and update the stack pointer link, then determine the
// address of new allocated space.
if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(Reg, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
- else
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
-
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
.addReg(PPC::X1)
@@ -354,23 +299,19 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
/// stw rA, FI ; Store rA to the frame.
///
void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
- unsigned FrameIndex, int SPAdj,
- RegScavenger *RS) const {
+ unsigned FrameIndex) const {
// Get the instruction.
MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
DebugLoc dl = MI.getDebugLoc();
- // FIXME: Once LLVM supports creating virtual registers here, or the register
- // scavenger can return multiple registers, stop using reserved registers
- // here.
- (void) SPAdj;
- (void) RS;
-
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
@@ -380,16 +321,20 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
- if (SrcReg != PPC::CR0)
+ if (SrcReg != PPC::CR0) {
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
// rlwinm rA, rA, ShiftBits, 0, 31.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(getPPCRegisterNumbering(SrcReg) * 4)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg) * 4)
.addImm(0)
.addImm(31);
+ }
addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
- .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+ .addReg(Reg, RegState::Kill),
FrameIndex);
// Discard the pseudo instruction.
@@ -397,23 +342,19 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
}
void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
- unsigned FrameIndex, int SPAdj,
- RegScavenger *RS) const {
+ unsigned FrameIndex) const {
// Get the instruction.
MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset>
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
DebugLoc dl = MI.getDebugLoc();
- // FIXME: Once LLVM supports creating virtual registers here, or the register
- // scavenger can return multiple registers, stop using reserved registers
- // here.
- (void) SPAdj;
- (void) RS;
-
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_CR does not define its destination");
@@ -424,15 +365,67 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ unsigned ShiftBits = getEncodingValue(DestReg)*4;
// rlwinm r11, r11, 32-ShiftBits, 0, 31.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
- .addReg(Reg).addImm(32-ShiftBits).addImm(0)
+ .addReg(Reg1, RegState::Kill).addImm(32-ShiftBits).addImm(0)
.addImm(31);
}
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
- .addReg(Reg);
+ .addReg(Reg, RegState::Kill);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_VRSAVE does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
+ .addReg(Reg, RegState::Kill);
// Discard the pseudo instruction.
MBB.erase(II);
@@ -494,19 +487,23 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Special case for dynamic alloca.
if (FPSI && FrameIndex == FPSI &&
(OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
- lowerDynamicAlloc(II, SPAdj, RS);
+ lowerDynamicAlloc(II);
return;
}
- // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
- if (requiresRegisterScavenging(MF)) {
- if (OpC == PPC::SPILL_CR) {
- lowerCRSpilling(II, FrameIndex, SPAdj, RS);
- return;
- } else if (OpC == PPC::RESTORE_CR) {
- lowerCRRestore(II, FrameIndex, SPAdj, RS);
- return;
- }
+ // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::SPILL_VRSAVE) {
+ lowerVRSAVESpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_VRSAVE) {
+ lowerVRSAVERestore(II, FrameIndex);
+ return;
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
@@ -525,11 +522,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case PPC::LWA:
case PPC::LD:
case PPC::STD:
- case PPC::STD_32:
isIXAddr = true;
break;
}
-
+
+ // If the instruction is not present in ImmToIdxMap, then it has no immediate
+ // form (and must be r+r).
+ bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC);
+
// Now add the frame object offset to the offset from r1.
int Offset = MFI->getObjectOffset(FrameIndex);
if (!isIXAddr)
@@ -553,7 +553,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
- (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
+ (!noImmForm &&
+ isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -563,19 +564,17 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- unsigned SReg;
- if (requiresRegisterScavenging(MF)) {
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
- } else
- SReg = is64Bit ? PPC::X0 : PPC::R0;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
+ unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC),
+ SReg = MF.getRegInfo().createVirtualRegister(RC);
// Insert a set of rA with the full offset value before the ld, st, or add
- BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi)
.addImm(Offset >> 16);
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
- .addReg(SReg, RegState::Kill)
+ .addReg(SRegHi, RegState::Kill)
.addImm(Offset);
// Convert into indexed form of the instruction:
@@ -584,7 +583,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
unsigned OperandBase;
- if (OpC != TargetOpcode::INLINEASM) {
+ if (noImmForm)
+ OperandBase = 1;
+ else if (OpC != TargetOpcode::INLINEASM) {
assert(ImmToIdxMap.count(OpC) &&
"No indexed form of load or store available!");
unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 9840666242..7e6683eeb2 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -15,8 +15,8 @@
#ifndef POWERPC32_REGISTERINFO_H
#define POWERPC32_REGISTERINFO_H
+#include "llvm/ADT/DenseMap.h"
#include "PPC.h"
-#include <map>
#define GET_REGINFO_HEADER
#include "PPCGenRegisterInfo.inc"
@@ -27,7 +27,7 @@ class TargetInstrInfo;
class Type;
class PPCRegisterInfo : public PPCGenRegisterInfo {
- std::map<unsigned, unsigned> ImmToIdxMap;
+ DenseMap<unsigned, unsigned> ImmToIdxMap;
const PPCSubtarget &Subtarget;
const TargetInstrInfo &TII;
public:
@@ -44,23 +44,33 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
+ const uint32_t *getNoPreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+ /// We require the register scavenger.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+ }
+
+ void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+ void lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
- /// requiresRegisterScavenging - We require a register scavenger.
- /// FIXME (64-bit): Should be inlined.
- bool requiresRegisterScavenging(const MachineFunction &MF) const;
-
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
-
- void lowerDynamicAlloc(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const;
- void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
- int SPAdj, RegScavenger *RS) const;
- void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
- int SPAdj, RegScavenger *RS) const;
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 8ee9b1ec9f..57a25f5143 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -27,40 +27,40 @@ class PPCReg<string n> : Register<n> {
// GPR - One of the 32 32-bit general-purpose registers
class GPR<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
// GP8 - One of the 32 64-bit general-purpose registers
class GP8<GPR SubReg, string n> : PPCReg<n> {
- field bits<5> Num = SubReg.Num;
+ let HWEncoding = SubReg.HWEncoding;
let SubRegs = [SubReg];
let SubRegIndices = [sub_32];
}
// SPR - One of the 32-bit special-purpose registers
class SPR<bits<10> num, string n> : PPCReg<n> {
- field bits<10> Num = num;
+ let HWEncoding{9-0} = num;
}
// FPR - One of the 32 64-bit floating-point registers
class FPR<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
// VR - One of the 32 128-bit vector registers
class VR<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
// CR - One of the 8 4-bit condition registers
class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
- field bits<3> Num = num;
+ let HWEncoding{2-0} = num;
let SubRegs = subregs;
}
// CRBIT - One of the 32 1-bit condition register fields
class CRBIT<bits<5> num, string n> : PPCReg<n> {
- field bits<5> Num = num;
+ let HWEncoding{4-0} = num;
}
// General-purpose registers
@@ -86,6 +86,14 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
+// The reprsentation of r0 when treated as the constant 0.
+def ZERO : GPR<0, "0">;
+def ZERO8 : GP8<ZERO, "0">;
+
+// Representations of the frame pointer used by ISD::FRAMEADDR.
+def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
+def FP8 : GP8<FP, "**FRAME POINTER**">;
+
// Condition register bits
def CR0LT : CRBIT< 0, "0">;
def CR0GT : CRBIT< 1, "1">;
@@ -164,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">;
// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
(sequence "R%u", 30, 13),
- R31, R0, R1, LR)>;
+ R31, R0, R1, FP)>;
def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
(sequence "X%u", 30, 14),
- X31, X13, X0, X1, LR8)>;
+ X31, X13, X0, X1, FP8)>;
+
+// For some instructions r0 is special (representing the value 0 instead of
+// the value in the r0 register), and we use these register subclasses to
+// prevent r0 from being allocated for use by those instructions.
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
// ABI the size of the Floating-point register save area is determined by the
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 18e4c07942..1b7150fd37 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -39,7 +39,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
, HasQPX(false)
, HasFSQRT(false)
, HasSTFIWX(false)
+ , HasLFIWAX(false)
+ , HasFPRND(false)
+ , HasFPCVT(false)
, HasISEL(false)
+ , HasPOPCNTD(false)
+ , HasLDBRX(false)
, IsBookE(false)
, HasLazyResolverStubs(false)
, IsJITCodeModel(false)
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 15885bd2df..d5dfa1e8e1 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -78,7 +78,12 @@ protected:
bool HasQPX;
bool HasFSQRT;
bool HasSTFIWX;
+ bool HasLFIWAX;
+ bool HasFPRND;
+ bool HasFPCVT;
bool HasISEL;
+ bool HasPOPCNTD;
+ bool HasLDBRX;
bool IsBookE;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
@@ -155,10 +160,15 @@ public:
// Specific obvious features.
bool hasFSQRT() const { return HasFSQRT; }
bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasLFIWAX() const { return HasLFIWAX; }
+ bool hasFPRND() const { return HasFPRND; }
+ bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
bool hasQPX() const { return HasQPX; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
+ bool hasPOPCNTD() const { return HasPOPCNTD; }
+ bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 5e9ad347d3..00037edafc 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -122,9 +122,8 @@ llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
- // FIXME: PPC currently does not have custom popcnt lowering even though
- // there is hardware support. Once this is fixed, update this function
- // to reflect the real capabilities of the hardware.
+ if (ST->hasPOPCNTD() && TyWidth <= 64)
+ return PSK_FastHardware;
return PSK_Software;
}
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index b6763aa738..9a7902a82d 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -1,7 +1,6 @@
//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
TODO:
-* gpr0 allocation
* lmw/stmw pass a la arm load store optimizer for prolog/epilog
===-------------------------------------------------------------------------===
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index e099a9fc31..0b01433cc9 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,6 +23,8 @@ class AMDGPUTargetMachine;
// R600 Passes
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
// SI Passes
FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 5995b6f5e8..a266df535d 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -60,6 +60,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Expand);
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index a59c775272..e740348717 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -202,8 +202,8 @@ class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
(vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
>;
-class Vector_Build <ValueType vecType, RegisterClass vectorClass,
- ValueType elemType, RegisterClass elemClass> : Pat <
+class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
(elemType elemClass:$z), (elemType elemClass:$w))),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp
new file mode 100644
index 0000000000..0223ec8e4f
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -0,0 +1,22 @@
+#include "AMDGPUMachineFunction.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+
+const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType";
+
+AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
+ MachineFunctionInfo() {
+ AttributeSet Set = MF.getFunction()->getAttributes();
+ Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
+ ShaderTypeAttribute);
+
+ if (A.isStringAttribute()) {
+ StringRef Str = A.getValueAsString();
+ if (Str.getAsInteger(0, ShaderType))
+ llvm_unreachable("Can't parse shader type!");
+ }
+}
+
+}
diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
new file mode 100644
index 0000000000..21c8c51dae
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMachineFunction.h
@@ -0,0 +1,29 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUMACHINEFUNCTION_H
+#define AMDGPUMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class AMDGPUMachineFunction : public MachineFunctionInfo {
+private:
+ static const char *ShaderTypeAttribute;
+public:
+ AMDGPUMachineFunction(const MachineFunction &MF);
+ unsigned ShaderType;
+};
+
+}
+#endif // AMDGPUMACHINEFUNCTION_H
diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
index b723433c16..dea43b874c 100644
--- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp
+++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
@@ -17,6 +17,7 @@
#include "AMDGPU.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
@@ -40,13 +41,14 @@ typedef SmallVector<BBValuePair, 2> BBValueVector;
typedef SmallPtrSet<BasicBlock *, 8> BBSet;
-typedef DenseMap<PHINode *, BBValueVector> PhiMap;
+typedef MapVector<PHINode *, BBValueVector> PhiMap;
+typedef MapVector<BasicBlock *, BBVector> BB2BBVecMap;
+
typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap;
typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
typedef DenseMap<BasicBlock *, Value *> BBPredicates;
typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap;
-typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap;
// The name for newly created blocks.
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 0185747544..e7ea876e2a 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -151,7 +151,9 @@ bool AMDGPUPassConfig::addPreEmitPass() {
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
addPass(createAMDGPUCFGPreparationPass(*TM));
addPass(createAMDGPUCFGStructurizerPass(*TM));
+ addPass(createR600EmitClauseMarkers(*TM));
addPass(createR600ExpandSpecialInstrsPass(*TM));
+ addPass(createR600ControlFlowFinalizer(*TM));
addPass(&FinalizeMachineBundlesID);
} else {
addPass(createSILowerControlFlowPass(*TM));
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 0c7880d232..fa8f62de9c 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
SDValue Operand = Ops[OperandIdx[i] - 1];
switch (Operand.getOpcode()) {
case AMDGPUISD::CONST_ADDRESS: {
- if (i == 2)
- break;
SDValue CstOffset;
- if (!Operand.getValueType().isVector() &&
- SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
- Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
- Ops[SelIdx[i] - 1] = CstOffset;
- return true;
+ if (Operand.getValueType().isVector() ||
+ !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
+ break;
+
+ // Gather others constants values
+ std::vector<unsigned> Consts;
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = OperandIdx[j];
+ if (SrcIdx < 0)
+ break;
+ if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+ if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+ Consts.push_back(Cst->getZExtValue());
+ }
+ }
}
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ Consts.push_back(Cst->getZExtValue());
+ if (!TII->fitsConstReadLimitations(Consts))
+ break;
+
+ Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ Ops[SelIdx[i] - 1] = CstOffset;
+ return true;
}
- break;
case ISD::FNEG:
if (NegIdx[i] < 0)
break;
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 63c59e1cb5..8efba5846b 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -27,6 +27,7 @@ add_llvm_target(R600CodeGen
AMDGPUFrameLowering.cpp
AMDGPUIndirectAddressing.cpp
AMDGPUMCInstLower.cpp
+ AMDGPUMachineFunction.cpp
AMDGPUSubtarget.cpp
AMDGPUStructurizeCFG.cpp
AMDGPUTargetMachine.cpp
@@ -34,6 +35,8 @@ add_llvm_target(R600CodeGen
AMDGPUConvertToISA.cpp
AMDGPUInstrInfo.cpp
AMDGPURegisterInfo.cpp
+ R600ControlFlowFinalizer.cpp
+ R600EmitClauseMarkers.cpp
R600ExpandSpecialInstrs.cpp
R600InstrInfo.cpp
R600ISelLowering.cpp
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 4d3d3e7945..b7cdd7c8cd 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -68,8 +68,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
//===--- Dwarf Emission Directives -----------------------------------===//
HasLEB128 = true;
SupportsDebugInformation = true;
- ExceptionsType = ExceptionHandling::None;
- DwarfUsesInlineInfoSection = false;
DwarfSectionOffsetDirective = ".offset";
}
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index d20716000d..6ef4d40934 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -66,8 +66,6 @@ private:
void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
raw_ostream &OS) const;
void EmitDst(const MCInst &MI, raw_ostream &OS) const;
- void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
- raw_ostream &OS) const;
void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
@@ -103,7 +101,8 @@ enum InstrTypes {
INSTR_FC,
INSTR_NATIVE,
INSTR_VTX,
- INSTR_EXPORT
+ INSTR_EXPORT,
+ INSTR_CFALU
};
enum FCInstr {
@@ -140,9 +139,7 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
- if (isTexOp(MI.getOpcode())) {
- EmitTexInstr(MI, Fixups, OS);
- } else if (isFCOp(MI.getOpcode())){
+ if (isFCOp(MI.getOpcode())){
EmitFCInstr(MI, OS);
} else if (MI.getOpcode() == AMDGPU::RETURN ||
MI.getOpcode() == AMDGPU::BUNDLE ||
@@ -150,6 +147,10 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
return;
} else {
switch(MI.getOpcode()) {
+ case AMDGPU::STACK_SIZE: {
+ EmitByte(MI.getOperand(0).getImm(), OS);
+ break;
+ }
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
@@ -175,6 +176,77 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit(InstWord2, OS);
break;
}
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V: {
+ unsigned Opcode = MI.getOpcode();
+ bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
+ unsigned OpOffset = HasOffsets ? 3 : 0;
+ int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
+ int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
+
+ uint32_t SrcSelect[4] = {0, 1, 2, 3};
+ uint32_t Offsets[3] = {0, 0, 0};
+ uint64_t CoordType[4] = {1, 1, 1, 1};
+
+ if (HasOffsets)
+ for (unsigned i = 0; i < 3; i++)
+ Offsets[i] = MI.getOperand(i + 2).getImm();
+
+ if (TextureType == TEXTURE_RECT ||
+ TextureType == TEXTURE_SHADOWRECT) {
+ CoordType[ELEMENT_X] = 0;
+ CoordType[ELEMENT_Y] = 0;
+ }
+
+ if (TextureType == TEXTURE_1D_ARRAY ||
+ TextureType == TEXTURE_SHADOW1D_ARRAY) {
+ if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
+ Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
+ CoordType[ELEMENT_Y] = 0;
+ } else {
+ CoordType[ELEMENT_Z] = 0;
+ SrcSelect[ELEMENT_Z] = ELEMENT_Y;
+ }
+ } else if (TextureType == TEXTURE_2D_ARRAY ||
+ TextureType == TEXTURE_SHADOW2D_ARRAY) {
+ CoordType[ELEMENT_Z] = 0;
+ }
+
+
+ if ((TextureType == TEXTURE_SHADOW1D ||
+ TextureType == TEXTURE_SHADOW2D ||
+ TextureType == TEXTURE_SHADOWRECT ||
+ TextureType == TEXTURE_SHADOW1D_ARRAY) &&
+ Opcode != AMDGPU::TEX_SAMPLE_C_L &&
+ Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+ SrcSelect[ELEMENT_W] = ELEMENT_Z;
+ }
+
+ uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
+ CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
+ CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
+ uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
+ SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
+ SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
+ Offsets[2] << 10;
+
+ EmitByte(INSTR_TEX, OS);
+ Emit(Word01, OS);
+ Emit(Word2, OS);
+ break;
+ }
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz:
case AMDGPU::EG_ExportBuf:
@@ -184,7 +256,29 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit(Inst, OS);
break;
}
-
+ case AMDGPU::CF_ALU:
+ case AMDGPU::CF_ALU_PUSH_BEFORE: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_CFALU, OS);
+ Emit(Inst, OS);
+ break;
+ }
+ case AMDGPU::CF_TC:
+ case AMDGPU::CF_VC:
+ case AMDGPU::CF_CALL_FS:
+ return;
+ case AMDGPU::WHILE_LOOP:
+ case AMDGPU::END_LOOP:
+ case AMDGPU::LOOP_BREAK:
+ case AMDGPU::CF_CONTINUE:
+ case AMDGPU::CF_JUMP:
+ case AMDGPU::CF_ELSE:
+ case AMDGPU::POP: {
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ EmitByte(INSTR_NATIVE, OS);
+ Emit(Inst, OS);
+ break;
+ }
default:
EmitALUInstr(MI, Fixups, OS);
break;
@@ -334,99 +428,6 @@ void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
Emit(InlineConstant.i, OS);
}
-void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups,
- raw_ostream &OS) const {
-
- unsigned Opcode = MI.getOpcode();
- bool hasOffsets = (Opcode == AMDGPU::TEX_LD);
- unsigned OpOffset = hasOffsets ? 3 : 0;
- int64_t Resource = MI.getOperand(OpOffset + 2).getImm();
- int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
- int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
- unsigned srcSelect[4] = {0, 1, 2, 3};
-
- // Emit instruction type
- EmitByte(1, OS);
-
- // Emit instruction
- EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
-
- // Emit resource id
- EmitByte(Resource, OS);
-
- // Emit source register
- EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
-
- // XXX: Emit src isRelativeAddress
- EmitByte(0, OS);
-
- // Emit destination register
- EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
-
- // XXX: Emit dst isRealtiveAddress
- EmitByte(0, OS);
-
- // XXX: Emit dst select
- EmitByte(0, OS); // X
- EmitByte(1, OS); // Y
- EmitByte(2, OS); // Z
- EmitByte(3, OS); // W
-
- // XXX: Emit lod bias
- EmitByte(0, OS);
-
- // XXX: Emit coord types
- unsigned coordType[4] = {1, 1, 1, 1};
-
- if (TextureType == TEXTURE_RECT
- || TextureType == TEXTURE_SHADOWRECT) {
- coordType[ELEMENT_X] = 0;
- coordType[ELEMENT_Y] = 0;
- }
-
- if (TextureType == TEXTURE_1D_ARRAY
- || TextureType == TEXTURE_SHADOW1D_ARRAY) {
- if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
- coordType[ELEMENT_Y] = 0;
- } else {
- coordType[ELEMENT_Z] = 0;
- srcSelect[ELEMENT_Z] = ELEMENT_Y;
- }
- } else if (TextureType == TEXTURE_2D_ARRAY
- || TextureType == TEXTURE_SHADOW2D_ARRAY) {
- coordType[ELEMENT_Z] = 0;
- }
-
- for (unsigned i = 0; i < 4; i++) {
- EmitByte(coordType[i], OS);
- }
-
- // XXX: Emit offsets
- if (hasOffsets)
- for (unsigned i = 2; i < 5; i++)
- EmitByte(MI.getOperand(i).getImm()<<1, OS);
- else
- EmitNullBytes(3, OS);
-
- // Emit sampler id
- EmitByte(Sampler, OS);
-
- // XXX:Emit source select
- if ((TextureType == TEXTURE_SHADOW1D
- || TextureType == TEXTURE_SHADOW2D
- || TextureType == TEXTURE_SHADOWRECT
- || TextureType == TEXTURE_SHADOW1D_ARRAY)
- && Opcode != AMDGPU::TEX_SAMPLE_C_L
- && Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
- srcSelect[ELEMENT_W] = ELEMENT_Z;
- }
-
- for (unsigned i = 0; i < 4; i++) {
- EmitByte(srcSelect[i], OS);
- }
-}
-
void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
// Emit instruction type
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index e27abccbe1..5af83209a0 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -39,8 +39,6 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
- const MCSubtargetInfo &STI;
- MCContext &Ctx;
/// \brief Can this operand also contain immediate values?
bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
@@ -51,7 +49,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
public:
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
const MCSubtargetInfo &sti, MCContext &ctx)
- : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+ : MCII(mcii), MRI(mri) { }
~SIMCCodeEmitter() { }
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
new file mode 100644
index 0000000000..bd87d741ec
--- /dev/null
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -0,0 +1,264 @@
+//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass compute turns all control flow pseudo instructions into native one
+/// computing their address on the fly ; it also sets STACK_SIZE info.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600ControlFlowFinalizer : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+ unsigned MaxFetchInst;
+
+ bool isFetch(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::TEX_VTX_CONSTBUF:
+ case AMDGPU::TEX_VTX_TEXBUF:
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TXD:
+ case AMDGPU::TXD_SHADOW:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool IsTrivialInst(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::KILL:
+ case AMDGPU::RETURN:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ MachineBasicBlock::iterator
+ MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned CfAddress) const {
+ MachineBasicBlock::iterator ClauseHead = I;
+ unsigned AluInstCount = 0;
+ for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+ if (IsTrivialInst(I))
+ continue;
+ if (!isFetch(I))
+ break;
+ AluInstCount ++;
+ if (AluInstCount > MaxFetchInst)
+ break;
+ }
+ BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
+ TII->get(AMDGPU::CF_TC))
+ .addImm(CfAddress) // ADDR
+ .addImm(AluInstCount); // COUNT
+ return I;
+ }
+ void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::WHILE_LOOP:
+ MI->getOperand(0).setImm(Addr + 1);
+ break;
+ default:
+ MI->getOperand(0).setImm(Addr);
+ break;
+ }
+ }
+ void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
+ const {
+ for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
+ It != E; ++It) {
+ MachineInstr *MI = *It;
+ CounterPropagateAddr(MI, Addr);
+ }
+ }
+
+public:
+ R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+ const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
+ MaxFetchInst = 8;
+ else
+ MaxFetchInst = 16;
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ unsigned MaxStack = 0;
+ unsigned CurrentStack = 0;
+ for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
+ ++MB) {
+ MachineBasicBlock &MBB = *MB;
+ unsigned CfCount = 0;
+ std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
+ std::vector<std::pair<unsigned, MachineInstr *> > IfThenElseStack;
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ if (MFI->ShaderType == 1) {
+ BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::CF_CALL_FS));
+ CfCount++;
+ }
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E;) {
+ if (isFetch(I)) {
+ I = MakeFetchClause(MBB, I, 0);
+ CfCount++;
+ continue;
+ }
+
+ MachineBasicBlock::iterator MI = I;
+ I++;
+ switch (MI->getOpcode()) {
+ case AMDGPU::CF_ALU_PUSH_BEFORE:
+ CurrentStack++;
+ MaxStack = std::max(MaxStack, CurrentStack);
+ case AMDGPU::KILLGT:
+ case AMDGPU::CF_ALU:
+ CfCount++;
+ break;
+ case AMDGPU::WHILELOOP: {
+ CurrentStack++;
+ MaxStack = std::max(MaxStack, CurrentStack);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::WHILE_LOOP))
+ .addImm(0);
+ std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
+ std::set<MachineInstr *>());
+ Pair.second.insert(MIb);
+ LoopStack.push_back(Pair);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ENDLOOP: {
+ CurrentStack--;
+ std::pair<unsigned, std::set<MachineInstr *> > Pair =
+ LoopStack.back();
+ LoopStack.pop_back();
+ CounterPropagateAddr(Pair.second, CfCount);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+ .addImm(Pair.first + 1);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::IF_PREDICATE_SET: {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_JUMP))
+ .addImm(0)
+ .addImm(0);
+ std::pair<unsigned, MachineInstr *> Pair(CfCount, MIb);
+ IfThenElseStack.push_back(Pair);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ELSE: {
+ std::pair<unsigned, MachineInstr *> Pair = IfThenElseStack.back();
+ IfThenElseStack.pop_back();
+ CounterPropagateAddr(Pair.second, CfCount);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_ELSE))
+ .addImm(0)
+ .addImm(1);
+ std::pair<unsigned, MachineInstr *> NewPair(CfCount, MIb);
+ IfThenElseStack.push_back(NewPair);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::ENDIF: {
+ CurrentStack--;
+ std::pair<unsigned, MachineInstr *> Pair = IfThenElseStack.back();
+ IfThenElseStack.pop_back();
+ CounterPropagateAddr(Pair.second, CfCount + 1);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+ .addImm(CfCount + 1)
+ .addImm(1);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ case AMDGPU::PREDICATED_BREAK: {
+ CurrentStack--;
+ CfCount += 3;
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
+ .addImm(CfCount)
+ .addImm(1);
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::LOOP_BREAK))
+ .addImm(0);
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+ .addImm(CfCount)
+ .addImm(1);
+ LoopStack.back().second.insert(MIb);
+ MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::CONTINUE: {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ TII->get(AMDGPU::CF_CONTINUE))
+ .addImm(CfCount);
+ LoopStack.back().second.insert(MIb);
+ MI->eraseFromParent();
+ CfCount++;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::STACK_SIZE))
+ .addImm(MaxStack);
+ }
+
+ return false;
+ }
+
+ const char *getPassName() const {
+ return "R600 Control Flow Finalizer Pass";
+ }
+};
+
+char R600ControlFlowFinalizer::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
+ return new R600ControlFlowFinalizer(TM);
+}
+
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
new file mode 100644
index 0000000000..7c7469a04b
--- /dev/null
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -0,0 +1,253 @@
+//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
+/// 128 Alu instructions ; these instructions can access up to 4 prefetched
+/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
+/// initiated by CF_ALU instructions.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600EmitClauseMarkersPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+
+ unsigned OccupiedDwords(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::DOT4_eg_pseudo:
+ case AMDGPU::DOT4_r600_pseudo:
+ return 4;
+ case AMDGPU::KILL:
+ return 0;
+ default:
+ break;
+ }
+
+ if(TII->isVector(*MI) ||
+ TII->isCubeOp(MI->getOpcode()) ||
+ TII->isReductionOp(MI->getOpcode()))
+ return 4;
+
+ unsigned NumLiteral = 0;
+ for (MachineInstr::mop_iterator It = MI->operands_begin(),
+ E = MI->operands_end(); It != E; ++It) {
+ MachineOperand &MO = *It;
+ if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+ ++NumLiteral;
+ }
+ return 1 + NumLiteral;
+ }
+
+ bool isALU(const MachineInstr *MI) const {
+ if (MI->getOpcode() == AMDGPU::KILLGT)
+ return false;
+ if (TII->isALUInstr(MI->getOpcode()))
+ return true;
+ if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()))
+ return true;
+ switch (MI->getOpcode()) {
+ case AMDGPU::PRED_X:
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::COPY:
+ case AMDGPU::DOT4_eg_pseudo:
+ case AMDGPU::DOT4_r600_pseudo:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool IsTrivialInst(MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::KILL:
+ case AMDGPU::RETURN:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ // Register Idx, then Const value
+ std::vector<std::pair<unsigned, unsigned> > ExtractConstRead(MachineInstr *MI)
+ const {
+ const R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+ std::vector<std::pair<unsigned, unsigned> > Result;
+
+ if (!TII->isALUInstr(MI->getOpcode()))
+ return Result;
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+ unsigned Const = MI->getOperand(
+ TII->getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Result.push_back(std::pair<unsigned, unsigned>(SrcIdx, Const));
+ }
+ }
+ return Result;
+ }
+
+ std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
+ // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
+ // (See also R600ISelLowering.cpp)
+ // ConstIndex value is in [0, 4095];
+ return std::pair<unsigned, unsigned>(
+ ((Sel >> 2) - 512) >> 12, // KC_BANK
+ // Line Number of ConstIndex
+ // A line contains 16 constant registers however KCX bank can lock
+ // two line at the same time ; thus we want to get an even line number.
+ // Line number can be retrieved with (>>4), using (>>5) <<1 generates
+ // an even number.
+ ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
+ }
+
+ bool SubstituteKCacheBank(MachineInstr *MI,
+ std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const {
+ std::vector<std::pair<unsigned, unsigned> > UsedKCache;
+ std::vector<std::pair<unsigned, unsigned> > Consts = ExtractConstRead(MI);
+ assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const");
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ unsigned Sel = Consts[i].second;
+ unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
+ unsigned KCacheIndex = Index * 4 + Chan;
+ const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
+ if (CachedConsts.empty()) {
+ CachedConsts.push_back(BankLine);
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+ continue;
+ }
+ if (CachedConsts[0] == BankLine) {
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+ continue;
+ }
+ if (CachedConsts.size() == 1) {
+ CachedConsts.push_back(BankLine);
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+ continue;
+ }
+ if (CachedConsts[1] == BankLine) {
+ UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+ continue;
+ }
+ return false;
+ }
+
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ switch(UsedKCache[i].first) {
+ case 0:
+ MI->getOperand(Consts[i].first).setReg(
+ AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[i].second));
+ break;
+ case 1:
+ MI->getOperand(Consts[i].first).setReg(
+ AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[i].second));
+ break;
+ default:
+ llvm_unreachable("Wrong Cache Line");
+ }
+ }
+ return true;
+ }
+
+ MachineBasicBlock::iterator
+ MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+ MachineBasicBlock::iterator ClauseHead = I;
+ std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
+ bool PushBeforeModifier = false;
+ unsigned AluInstCount = 0;
+ for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+ if (IsTrivialInst(I))
+ continue;
+ if (!isALU(I))
+ break;
+ if (I->getOpcode() == AMDGPU::PRED_X) {
+ if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH)
+ PushBeforeModifier = true;
+ AluInstCount ++;
+ continue;
+ }
+ if (TII->isALUInstr(I->getOpcode()) &&
+ !SubstituteKCacheBank(I, KCacheBanks))
+ break;
+ AluInstCount += OccupiedDwords(I);
+ if (AluInstCount > 124)
+ break;
+ }
+ unsigned Opcode = PushBeforeModifier ?
+ AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
+ BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
+ .addImm(0) // ADDR
+ .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
+ .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
+ .addImm(KCacheBanks.empty()?0:2) // KM0
+ .addImm((KCacheBanks.size() < 2)?0:2) // KM1
+ .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
+ .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
+ .addImm(AluInstCount); // COUNT
+ return I;
+ }
+
+public:
+ R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ MachineBasicBlock::iterator I = MBB.begin();
+ if (I->getOpcode() == AMDGPU::CF_ALU)
+ continue; // BB was already parsed
+ for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
+ if (isALU(I))
+ I = MakeALUClause(MBB, I);
+ else
+ ++I;
+ }
+ }
+ return false;
+ }
+
+ const char *getPassName() const {
+ return "R600 Emit Clause Markers Pass";
+ }
+};
+
+char R600EmitClauseMarkersPass::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) {
+ return new R600EmitClauseMarkersPass(TM);
+}
+
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index a73691dd3c..53e6e51dd2 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -28,7 +28,6 @@ using namespace llvm;
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM),
TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
- setOperationAction(ISD::MUL, MVT::i64, Expand);
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
@@ -58,7 +57,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
- setOperationAction(ISD::FPOW, MVT::f32, Custom);
setOperationAction(ISD::ROTL, MVT::i32, Custom);
@@ -316,7 +314,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
- case ISD::FPOW: return LowerFPOW(Op, DAG);
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
@@ -918,15 +915,6 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
return DAG.getMergeValues(Ops, 2, DL);
}
-SDValue R600TargetLowering::LowerFPOW(SDValue Op,
- SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
- SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
- return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
-}
-
/// XXX Only kernel functions are supported, so we can assume for now that
/// every function is a kernel function, but in the future we should use
/// separate calling conventions for kernel and non-kernel functions.
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index 5cb4b912a1..2c09acb9af 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -59,7 +59,6 @@ private:
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index be3318a0b4..08650980fd 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
(TargetFlags & R600_InstFlag::OP3));
}
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
+ const {
+ assert (Consts.size() <= 12 && "Too many operands in instructions group");
+ unsigned Pair1 = 0, Pair2 = 0;
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ unsigned ReadConstHalf = Consts[i] & 2;
+ unsigned ReadConstIndex = Consts[i] & (~3);
+ unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
+ if (!Pair1) {
+ Pair1 = ReadHalfConst;
+ continue;
+ }
+ if (Pair1 == ReadHalfConst)
+ continue;
+ if (!Pair2) {
+ Pair2 = ReadHalfConst;
+ continue;
+ }
+ if (Pair2 != ReadHalfConst)
+ return false;
+ }
+ return true;
+}
+
+bool
+R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
+ std::vector<unsigned> Consts;
+ for (unsigned i = 0, n = MIs.size(); i < n; i++) {
+ const MachineInstr *MI = MIs[i];
+
+ const R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+
+ if (!isALUInstr(MI->getOpcode()))
+ continue;
+
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+ unsigned Const = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Consts.push_back(Const);
+ }
+ }
+ }
+ return fitsConstReadLimitations(Consts);
+}
+
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const {
const InstrItineraryData *II = TM->getInstrItineraryData();
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index efe721c00c..bf9569e659 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -53,6 +53,9 @@ namespace llvm {
/// \returns true if this \p Opcode represents an ALU instruction.
bool isALUInstr(unsigned Opcode) const;
+ bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
+ bool canBundle(const std::vector<MachineInstr *> &) const;
+
/// \breif Vector instructions are instructions that must fill all
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index c5fa3347dc..663b41a66d 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -234,6 +234,80 @@ class VTX_WORD1_GPR {
let Word1{31} = SRF_MODE_ALL;
}
+class TEX_WORD0 {
+ field bits<32> Word0;
+
+ bits<5> TEX_INST;
+ bits<2> INST_MOD;
+ bits<1> FETCH_WHOLE_QUAD;
+ bits<8> RESOURCE_ID;
+ bits<7> SRC_GPR;
+ bits<1> SRC_REL;
+ bits<1> ALT_CONST;
+ bits<2> RESOURCE_INDEX_MODE;
+ bits<2> SAMPLER_INDEX_MODE;
+
+ let Word0{4-0} = TEX_INST;
+ let Word0{6-5} = INST_MOD;
+ let Word0{7} = FETCH_WHOLE_QUAD;
+ let Word0{15-8} = RESOURCE_ID;
+ let Word0{22-16} = SRC_GPR;
+ let Word0{23} = SRC_REL;
+ let Word0{24} = ALT_CONST;
+ let Word0{26-25} = RESOURCE_INDEX_MODE;
+ let Word0{28-27} = SAMPLER_INDEX_MODE;
+}
+
+class TEX_WORD1 {
+ field bits<32> Word1;
+
+ bits<7> DST_GPR;
+ bits<1> DST_REL;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ bits<7> LOD_BIAS;
+ bits<1> COORD_TYPE_X;
+ bits<1> COORD_TYPE_Y;
+ bits<1> COORD_TYPE_Z;
+ bits<1> COORD_TYPE_W;
+
+ let Word1{6-0} = DST_GPR;
+ let Word1{7} = DST_REL;
+ let Word1{11-9} = DST_SEL_X;
+ let Word1{14-12} = DST_SEL_Y;
+ let Word1{17-15} = DST_SEL_Z;
+ let Word1{20-18} = DST_SEL_W;
+ let Word1{27-21} = LOD_BIAS;
+ let Word1{28} = COORD_TYPE_X;
+ let Word1{29} = COORD_TYPE_Y;
+ let Word1{30} = COORD_TYPE_Z;
+ let Word1{31} = COORD_TYPE_W;
+}
+
+class TEX_WORD2 {
+ field bits<32> Word2;
+
+ bits<5> OFFSET_X;
+ bits<5> OFFSET_Y;
+ bits<5> OFFSET_Z;
+ bits<5> SAMPLER_ID;
+ bits<3> SRC_SEL_X;
+ bits<3> SRC_SEL_Y;
+ bits<3> SRC_SEL_Z;
+ bits<3> SRC_SEL_W;
+
+ let Word2{4-0} = OFFSET_X;
+ let Word2{9-5} = OFFSET_Y;
+ let Word2{14-10} = OFFSET_Z;
+ let Word2{19-15} = SAMPLER_ID;
+ let Word2{22-20} = SRC_SEL_X;
+ let Word2{25-23} = SRC_SEL_Y;
+ let Word2{28-26} = SRC_SEL_Z;
+ let Word2{31-29} = SRC_SEL_W;
+}
+
/*
XXX: R600 subtarget uses a slightly different encoding than the other
subtargets. We currently handle this in R600MCCodeEmitter, but we may
@@ -277,9 +351,9 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
(ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
- !strconcat(opName,
+ !strconcat(" ", opName,
"$clamp $dst$write$dst_rel$omod, "
- "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
+ "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@@ -318,10 +392,10 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
- !strconcat(opName,
+ !strconcat(" ", opName,
"$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
- "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
- "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, "
+ "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+ "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@@ -356,10 +430,10 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
- !strconcat(opName, "$clamp $dst$dst_rel, "
- "$src0_neg$src0$src0_sel$src0_rel, "
- "$src1_neg$src1$src1_sel$src1_rel, "
- "$src2_neg$src2$src2_sel$src2_rel, "
+ !strconcat(" ", opName, "$clamp $dst$dst_rel, "
+ "$src0_neg$src0$src0_rel, "
+ "$src1_neg$src1$src1_rel, "
+ "$src2_neg$src2$src2_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@@ -386,12 +460,32 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
InstR600 <inst,
- (outs R600_Reg128:$dst),
- (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
- !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"),
+ (outs R600_Reg128:$DST_GPR),
+ (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
+ !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
pattern,
- itin>{
- let Inst {10-0} = inst;
+ itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+ let TEX_INST = inst{4-0};
+ let SRC_REL = 0;
+ let DST_REL = 0;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let LOD_BIAS = 0;
+
+ let INST_MOD = 0;
+ let FETCH_WHOLE_QUAD = 0;
+ let ALT_CONST = 0;
+ let SAMPLER_INDEX_MODE = 0;
+
+ let COORD_TYPE_X = 0;
+ let COORD_TYPE_Y = 0;
+ let COORD_TYPE_Z = 0;
+ let COORD_TYPE_W = 0;
}
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
@@ -671,6 +765,167 @@ class ExportBufInst : InstR600ISA<(
let Inst{63-32} = Word1;
}
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions
+//===----------------------------------------------------------------------===//
+
+class CF_ALU_WORD0 {
+ field bits<32> Word0;
+
+ bits<22> ADDR;
+ bits<4> KCACHE_BANK0;
+ bits<4> KCACHE_BANK1;
+ bits<2> KCACHE_MODE0;
+
+ let Word0{21-0} = ADDR;
+ let Word0{25-22} = KCACHE_BANK0;
+ let Word0{29-26} = KCACHE_BANK1;
+ let Word0{31-30} = KCACHE_MODE0;
+}
+
+class CF_ALU_WORD1 {
+ field bits<32> Word1;
+
+ bits<2> KCACHE_MODE1;
+ bits<8> KCACHE_ADDR0;
+ bits<8> KCACHE_ADDR1;
+ bits<7> COUNT;
+ bits<1> ALT_CONST;
+ bits<4> CF_INST;
+ bits<1> WHOLE_QUAD_MODE;
+ bits<1> BARRIER;
+
+ let Word1{1-0} = KCACHE_MODE1;
+ let Word1{9-2} = KCACHE_ADDR0;
+ let Word1{17-10} = KCACHE_ADDR1;
+ let Word1{24-18} = COUNT;
+ let Word1{25} = ALT_CONST;
+ let Word1{29-26} = CF_INST;
+ let Word1{30} = WHOLE_QUAD_MODE;
+ let Word1{31} = BARRIER;
+}
+
+class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
+(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1,
+i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
+!strconcat(OpName, " $COUNT, @$ADDR, "
+"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]"
+", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"),
+[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
+ field bits<64> Inst;
+
+ let CF_INST = inst;
+ let ALT_CONST = 0;
+ let WHOLE_QUAD_MODE = 0;
+ let BARRIER = 1;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+class CF_WORD0 {
+ field bits<32> Word0;
+
+ bits<24> ADDR;
+ bits<3> JUMPTABLE_SEL;
+
+ let Word0{23-0} = ADDR;
+ let Word0{26-24} = JUMPTABLE_SEL;
+}
+
+class CF_WORD1 {
+ field bits<32> Word1;
+
+ bits<3> POP_COUNT;
+ bits<5> CF_CONST;
+ bits<2> COND;
+ bits<6> COUNT;
+ bits<1> VALID_PIXEL_MODE;
+ bits<8> CF_INST;
+ bits<1> BARRIER;
+
+ let Word1{2-0} = POP_COUNT;
+ let Word1{7-3} = CF_CONST;
+ let Word1{9-8} = COND;
+ let Word1{15-10} = COUNT;
+ let Word1{20} = VALID_PIXEL_MODE;
+ let Word1{29-22} = CF_INST;
+ let Word1{31} = BARRIER;
+}
+
+class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
+ field bits<64> Inst;
+
+ let CF_INST = inst;
+ let BARRIER = 1;
+ let JUMPTABLE_SEL = 0;
+ let CF_CONST = 0;
+ let VALID_PIXEL_MODE = 0;
+ let COND = 0;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+}
+
+def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+"TEX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+}
+
+def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+"VTX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+}
+
+def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+}
+
+def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
+def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
+def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
+ let ADDR = 0;
+ let COUNT = 0;
+ let POP_COUNT = 0;
+}
+
+def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+}
+
+def CF_ALU : ALU_CLAUSE<8, "ALU">;
+def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
+
+def STACK_SIZE : AMDGPUInst <(outs),
+(ins i32imm:$num), "nstack $num", [] > {
+ field bits<8> Inst;
+ bits<8> num;
+ let Inst = num;
+}
+
let Predicates = [isR600toCayman] in {
//===----------------------------------------------------------------------===//
@@ -867,25 +1122,33 @@ def CNDGT_INT : R600_3OP <
def TEX_LD : R600_TEX <
0x03, "TEX_LD",
- [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR,
+ imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
+ imm:$SAMPLER_ID, imm:$textureTarget))]
> {
-let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget";
-let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget);
+let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z,"
+ "$RESOURCE_ID, $SAMPLER_ID, $textureTarget";
+let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
+ i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
+ i32imm:$textureTarget);
}
def TEX_GET_TEXTURE_RESINFO : R600_TEX <
0x04, "TEX_GET_TEXTURE_RESINFO",
- [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_GET_GRADIENTS_H : R600_TEX <
0x07, "TEX_GET_GRADIENTS_H",
- [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_GET_GRADIENTS_V : R600_TEX <
0x08, "TEX_GET_GRADIENTS_V",
- [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_SET_GRADIENTS_H : R600_TEX <
@@ -900,32 +1163,38 @@ def TEX_SET_GRADIENTS_V : R600_TEX <
def TEX_SAMPLE : R600_TEX <
0x10, "TEX_SAMPLE",
- [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_SAMPLE_C : R600_TEX <
0x18, "TEX_SAMPLE_C",
- [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
>;
def TEX_SAMPLE_L : R600_TEX <
0x11, "TEX_SAMPLE_L",
- [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_SAMPLE_C_L : R600_TEX <
0x19, "TEX_SAMPLE_C_L",
- [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
>;
def TEX_SAMPLE_LB : R600_TEX <
0x12, "TEX_SAMPLE_LB",
- [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_SAMPLE_C_LB : R600_TEX <
0x1A, "TEX_SAMPLE_C_LB",
- [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+ [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+ imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
>;
def TEX_SAMPLE_G : R600_TEX <
@@ -1141,6 +1410,7 @@ let Predicates = [isR600] in {
def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
+ def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>;
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
def : Pat<(fsqrt R600_Reg32:$src),
@@ -1212,6 +1482,7 @@ def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
+def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>;
def : SIN_PAT <SIN_eg>;
def : COS_PAT <COS_eg>;
def : Pat<(fsqrt R600_Reg32:$src),
@@ -1540,13 +1811,14 @@ def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
-def LOG_IEEE_ : LOG_IEEE_Common<0x83>;
+def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
+def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>;
def : SIN_PAT <SIN_cm>;
def : COS_PAT <COS_cm>;
@@ -1979,8 +2251,8 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
-def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
-def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
+def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
// bitconvert patterns
diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp
index 40aec833ea..018b403633 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.cpp
+++ b/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -13,6 +13,6 @@
using namespace llvm;
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
- : MachineFunctionInfo() {
- memset(Outputs, 0, sizeof(Outputs));
- }
+ : AMDGPUMachineFunction(MF) { }
+
+
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index 4b901f4bbc..99c1f91b09 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -14,19 +14,17 @@
#define R600MACHINEFUNCTIONINFO_H
#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "AMDGPUMachineFunction.h"
#include <vector>
namespace llvm {
-class R600MachineFunctionInfo : public MachineFunctionInfo {
-
+class R600MachineFunctionInfo : public AMDGPUMachineFunction {
public:
R600MachineFunctionInfo(const MachineFunction &MF);
SmallVector<unsigned, 4> LiveOuts;
std::vector<unsigned> IndirectRegs;
- SDNode *Outputs[16];
};
} // End llvm namespace
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 19baef94c7..9074364bb3 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
CurInstKind = IDOther;
CurEmitted = 0;
OccupedSlotsMask = 15;
- memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
@@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
}
}
-class ConstPairs {
-private:
- unsigned XYPair;
- unsigned ZWPair;
-public:
- ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) {
- for (unsigned i = 0; i < 3; i++) {
- unsigned ReadConstChan = ReadConst[i] & 3;
- unsigned ReadConstIndex = ReadConst[i] & (~3);
- if (ReadConstChan < 2) {
- if (!XYPair) {
- XYPair = ReadConstIndex;
- }
- } else {
- if (!ZWPair) {
- ZWPair = ReadConstIndex;
- }
- }
- }
- }
-
- bool isCompatibleWith(const ConstPairs& CP) const {
- return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
- (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
- }
-};
-
-static
-const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) {
- unsigned ReadConsts[3] = {0, 0, 0};
- R600Operands::Ops OpTable[3][2] = {
- {R600Operands::SRC0, R600Operands::SRC0_SEL},
- {R600Operands::SRC1, R600Operands::SRC1_SEL},
- {R600Operands::SRC2, R600Operands::SRC2_SEL},
- };
-
- if (!TII->isALUInstr(MI.getOpcode()))
- return ConstPairs(ReadConsts);
-
- for (unsigned i = 0; i < 3; i++) {
- int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
- if (SrcIdx < 0)
- break;
- if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
- ReadConsts[i] =MI.getOperand(
- TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
- }
- return ConstPairs(ReadConsts);
-}
-
-bool
-R600SchedStrategy::isBundleable(const MachineInstr& MI) {
- const ConstPairs &MIPair = getPairs(TII, MI);
- for (unsigned i = 0; i < 4; i++) {
- if (!InstructionsGroupCandidate[i])
- continue;
- const ConstPairs &IGPair = getPairs(TII,
- *InstructionsGroupCandidate[i]->getInstr());
- if (!IGPair.isCompatibleWith(MIPair))
- return false;
- }
- return true;
-}
-
SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
if (Q.empty())
return NULL;
for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
It != E; ++It) {
SUnit *SU = *It;
- if (isBundleable(*SU->getInstr())) {
+ InstructionsGroupCandidate.push_back(SU->getInstr());
+ if (TII->canBundle(InstructionsGroupCandidate)) {
+ InstructionsGroupCandidate.pop_back();
Q.erase(It);
return SU;
+ } else {
+ InstructionsGroupCandidate.pop_back();
}
}
return NULL;
@@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot() {
DEBUG(dbgs() << "New Slot\n");
assert (OccupedSlotsMask && "Slot wasn't filled");
OccupedSlotsMask = 0;
- memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
+ InstructionsGroupCandidate.clear();
LoadAlu();
}
@@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() {
SUnit *SU = AttemptFillSlot(Chan);
if (SU) {
OccupedSlotsMask |= (1 << Chan);
- InstructionsGroupCandidate[Chan] = SU;
+ InstructionsGroupCandidate.push_back(SU->getInstr());
return SU;
}
}
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index d74ff1e076..3d0367fd8e 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -98,7 +98,7 @@ public:
virtual void releaseBottomNode(SUnit *SU);
private:
- SUnit *InstructionsGroupCandidate[4];
+ std::vector<MachineInstr *> InstructionsGroupCandidate;
int getInstKind(SUnit *SU);
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
@@ -112,7 +112,6 @@ private:
void AssignSlot(MachineInstr *MI, unsigned Slot);
SUnit* pickAlu();
SUnit* pickOther(int QID);
- bool isBundleable(const MachineInstr& MI);
void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
};
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index ce5994ca36..03f49761ea 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -43,6 +43,37 @@ foreach Index = 0-127 in {
Index>;
}
+// KCACHE_BANK0
+foreach Index = 159-128 in {
+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
+ // 32-bit Temporary Registers
+ def KC0_#Index#_#Chan : R600RegWithChan <"KC0["#Index#"-128]."#Chan, Index, Chan>;
+ }
+ // 128-bit Temporary Registers
+ def KC0_#Index#_XYZW : R600Reg_128 <"KC0["#Index#"-128].XYZW",
+ [!cast<Register>("KC0_"#Index#"_X"),
+ !cast<Register>("KC0_"#Index#"_Y"),
+ !cast<Register>("KC0_"#Index#"_Z"),
+ !cast<Register>("KC0_"#Index#"_W")],
+ Index>;
+}
+
+// KCACHE_BANK1
+foreach Index = 191-160 in {
+ foreach Chan = [ "X", "Y", "Z", "W" ] in {
+ // 32-bit Temporary Registers
+ def KC1_#Index#_#Chan : R600RegWithChan <"KC1["#Index#"-160]."#Chan, Index, Chan>;
+ }
+ // 128-bit Temporary Registers
+ def KC1_#Index#_XYZW : R600Reg_128 <"KC1["#Index#"-160].XYZW",
+ [!cast<Register>("KC1_"#Index#"_X"),
+ !cast<Register>("KC1_"#Index#"_Y"),
+ !cast<Register>("KC1_"#Index#"_Z"),
+ !cast<Register>("KC1_"#Index#"_W")],
+ Index>;
+}
+
+
// Array Base Register holding input in FS
foreach Index = 448-480 in {
def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
@@ -80,6 +111,38 @@ def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X",
} // End isAllocatable = 0
+def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_X", 128, 159))>;
+
+def R600_KC0_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_Y", 128, 159))>;
+
+def R600_KC0_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_Z", 128, 159))>;
+
+def R600_KC0_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC0_%u_W", 128, 159))>;
+
+def R600_KC0 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (interleave R600_KC0_X, R600_KC0_Y,
+ R600_KC0_Z, R600_KC0_W)>;
+
+def R600_KC1_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_X", 160, 191))>;
+
+def R600_KC1_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_Y", 160, 191))>;
+
+def R600_KC1_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_Z", 160, 191))>;
+
+def R600_KC1_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (add (sequence "KC1_%u_W", 160, 191))>;
+
+def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32,
+ (interleave R600_KC1_X, R600_KC1_Y,
+ R600_KC1_Z, R600_KC1_W)>;
+
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_X", 0, 127), AR_X)>;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 063f5faa63..6f0c307615 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -58,6 +58,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
computeRegisterProperties();
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+
setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal);
@@ -69,7 +74,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setTargetDAGCombine(ISD::SETCC);
- setSchedulingPreference(Sched::Source);
+ setSchedulingPreference(Sched::RegPressure);
}
SDValue SITargetLowering::LowerFormalArguments(
@@ -203,32 +208,23 @@ SDValue SITargetLowering::LowerFormalArguments(
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
- MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
- MachineBasicBlock::iterator I = MI;
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
- case AMDGPU::SI_WQM:
- LowerSI_WQM(MI, *BB, I, MRI);
- break;
}
return BB;
}
-void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC);
-
- MI->eraseFromParent();
-}
-
EVT SITargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
+MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
+ return MVT::i32;
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
@@ -488,22 +484,23 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op,
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDNode *Node = Op.getNode();
- int OpClass;
+ const TargetRegisterClass *OpClass;
if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) {
const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode());
- OpClass = Desc.OpInfo[Op.getResNo()].RegClass;
+ int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
+ if (OpClassID == -1)
+ OpClass = getRegClassFor(Op.getSimpleValueType());
+ else
+ OpClass = TRI->getRegClass(OpClassID);
} else if (Node->getOpcode() == ISD::CopyFromReg) {
RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode());
- OpClass = MRI.getRegClass(Reg->getReg())->getID();
+ OpClass = MRI.getRegClass(Reg->getReg());
} else
return false;
- if (OpClass == -1)
- return false;
-
- return TRI->getRegClass(RegClass)->hasSubClassEq(TRI->getRegClass(OpClass));
+ return TRI->getRegClass(RegClass)->hasSubClassEq(OpClass);
}
/// \brief Make sure that we don't exeed the number of allowed scalars
@@ -547,6 +544,13 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
unsigned NumDefs = Desc->getNumDefs();
unsigned NumOps = Desc->getNumOperands();
+ // Commuted opcode if available
+ int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
+ const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
+
+ assert(!DescRev || DescRev->getNumDefs() == NumDefs);
+ assert(!DescRev || DescRev->getNumOperands() == NumOps);
+
// e64 version if available, -1 otherwise
int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
@@ -599,41 +603,54 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
// Is this a VSrc or SSrc operand ?
unsigned RegClass = Desc->OpInfo[Op].RegClass;
- if (!isVSrc(RegClass) && !isSSrc(RegClass)) {
+ if (isVSrc(RegClass) || isSSrc(RegClass)) {
+ // Try to fold the immediates
+ if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
+ // Folding didn't worked, make sure we don't hit the SReg limit
+ ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
+ }
+ continue;
+ }
+
+ if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
- if (i == 1 && Desc->isCommutable() &&
- fitsRegClass(DAG, Ops[0], RegClass) &&
- foldImm(Ops[1], Immediate, ScalarSlotUsed)) {
+ unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
+ assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
- assert(isVSrc(Desc->OpInfo[NumDefs].RegClass) ||
- isSSrc(Desc->OpInfo[NumDefs].RegClass));
+ // Test if it makes sense to swap operands
+ if (foldImm(Ops[1], Immediate, ScalarSlotUsed) ||
+ (!fitsRegClass(DAG, Ops[1], RegClass) &&
+ fitsRegClass(DAG, Ops[1], OtherRegClass))) {
// Swap commutable operands
SDValue Tmp = Ops[1];
Ops[1] = Ops[0];
Ops[0] = Tmp;
- } else if (DescE64 && !Immediate) {
- // Test if it makes sense to switch to e64 encoding
-
- RegClass = DescE64->OpInfo[Op].RegClass;
- int32_t TmpImm = -1;
- if ((isVSrc(RegClass) || isSSrc(RegClass)) &&
- foldImm(Ops[i], TmpImm, ScalarSlotUsed)) {
-
- Immediate = -1;
- Promote2e64 = true;
- Desc = DescE64;
- DescE64 = 0;
- }
+ Desc = DescRev;
+ DescRev = 0;
+ continue;
}
- continue;
}
- // Try to fold the immediates
- if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
- // Folding didn't worked, make sure we don't hit the SReg limit
- ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
+ if (DescE64 && !Immediate) {
+
+ // Test if it makes sense to switch to e64 encoding
+ unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
+ if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
+ continue;
+
+ int32_t TmpImm = -1;
+ if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) ||
+ (!fitsRegClass(DAG, Ops[i], RegClass) &&
+ fitsRegClass(DAG, Ops[1], OtherRegClass))) {
+
+ // Switch to e64 encoding
+ Immediate = -1;
+ Promote2e64 = true;
+ Desc = DescE64;
+ DescE64 = 0;
+ }
}
}
@@ -647,10 +664,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
Ops.push_back(Node->getOperand(i));
- // Either create a complete new or update the current instruction
- if (Promote2e64)
- return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(),
- Node->getVTList(), Ops.data(), Ops.size());
- else
- return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+ // Create a complete new instruction
+ return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(),
+ Node->getVTList(), Ops.data(), Ops.size());
}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 0411565ee3..5ad2f40f0f 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -24,9 +24,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
const SIInstrInfo * TII;
const TargetRegisterInfo * TRI;
- void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
-
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
@@ -48,6 +45,7 @@ public:
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
MachineBasicBlock * BB) const;
virtual EVT getSetCCResultType(EVT VT) const;
+ virtual MVT getScalarShiftAmountTy(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 67fbdf7be1..98bd3dbb66 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -302,21 +302,8 @@ static void increaseCounters(Counters &Dst, const Counters &Src) {
Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
}
-bool SIInsertWaits::unorderedDefines(MachineInstr &MI) {
-
- uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
- if (TSFlags & SIInstrFlags::LGKM_CNT)
- return true;
-
- if (TSFlags & SIInstrFlags::EXP_CNT)
- return ExpInstrTypesSeen == 3;
-
- return false;
-}
-
Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
- bool UnorderedDefines = unorderedDefines(MI);
Counters Result = ZeroCounts;
// For each register affected by this
@@ -329,8 +316,7 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
if (Op.isDef()) {
increaseCounters(Result, UsedRegs[j]);
- if (UnorderedDefines)
- increaseCounters(Result, DefinedRegs[j]);
+ increaseCounters(Result, DefinedRegs[j]);
}
if (Op.isUse())
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index de2373b11a..0bfcef562f 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -65,6 +65,26 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned Opcode;
const int16_t *SubIndices;
+ if (AMDGPU::M0 == DestReg) {
+ // Check if M0 isn't already set to this value
+ for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
+ I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
+
+ if (!I->definesRegister(AMDGPU::M0))
+ continue;
+
+ unsigned Opc = I->getOpcode();
+ if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
+ break;
+
+ if (!I->readsRegister(SrcReg))
+ break;
+
+ // The copy isn't necessary
+ return;
+ }
+ }
+
if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
@@ -138,6 +158,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
}
+unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+
+ int NewOpc;
+
+ // Try to map original to commuted opcode
+ if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
+ return NewOpc;
+
+ // Try to map commuted to original opcode
+ if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
+ return NewOpc;
+
+ return Opcode;
+}
+
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
@@ -145,7 +180,12 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
!MI->getOperand(2).isReg())
return 0;
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+
+ if (MI)
+ MI->setDesc(get(commuteOpcode(MI->getOpcode())));
+
+ return MI;
}
MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 5789af5d21..d4e60e5086 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -35,6 +35,8 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
+ unsigned commuteOpcode(unsigned Opcode) const;
+
virtual MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI=false) const;
@@ -76,6 +78,8 @@ public:
namespace AMDGPU {
int getVOPe64(uint16_t Opcode);
+ int getCommuteRev(uint16_t Opcode);
+ int getCommuteOrig(uint16_t Opcode);
} // End namespace AMDGPU
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 260c651dd4..617f0b871c 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -115,16 +115,17 @@ class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
opName#" $dst, $src0", pattern
>;
-multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> {
+multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
+ RegisterClass dstClass> {
def _IMM : SMRD <
op, 1, (outs dstClass:$dst),
- (ins SReg_64:$sbase, i32imm:$offset),
+ (ins baseClass:$sbase, i32imm:$offset),
asm#" $dst, $sbase, $offset", []
>;
def _SGPR : SMRD <
op, 0, (outs dstClass:$dst),
- (ins SReg_64:$sbase, SReg_32:$soff),
+ (ins baseClass:$sbase, SReg_32:$soff),
asm#" $dst, $sbase, $soff", []
>;
}
@@ -137,6 +138,11 @@ class VOP <string opName> {
string OpName = opName;
}
+class VOP2_REV <string revOp, bit isOrig> {
+ string RevOp = revOp;
+ bit IsOrig = isOrig;
+}
+
multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
string opName, list<dag> pattern> {
@@ -165,11 +171,11 @@ multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
: VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
- string opName, list<dag> pattern> {
+ string opName, list<dag> pattern, string revOp> {
def _e32 : VOP2 <
op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
opName#"_e32 $dst, $src0, $src1", pattern
- >, VOP <opName>;
+ >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
def _e64 : VOP3 <
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
@@ -178,23 +184,26 @@ multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
i32imm:$abs, i32imm:$clamp,
i32imm:$omod, i32imm:$neg),
opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
- >, VOP <opName> {
+ >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
let SRC2 = SIOperand.ZERO;
}
}
-multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern>
- : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
+ string revOp = opName>
+ : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
-multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern>
- : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
+ string revOp = opName>
+ : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
-multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> {
+multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
+ string revOp = opName> {
def _e32 : VOP2 <
op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
opName#"_e32 $dst, $src0, $src1", pattern
- >, VOP <opName>;
+ >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
def _e64 : VOP3b <
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
@@ -203,7 +212,7 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> {
i32imm:$abs, i32imm:$clamp,
i32imm:$omod, i32imm:$neg),
opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
- >, VOP <opName> {
+ >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
let SRC2 = SIOperand.ZERO;
/* the VOP2 variant puts the carry out into VCC, the VOP3 variant
can write it into any SGPR. We currently don't use the carry out,
@@ -304,7 +313,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
op,
(outs VReg_128:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
- i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
SReg_256:$srsrc, SReg_128:$ssamp),
asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
#" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
@@ -326,4 +335,22 @@ def getVOPe64 : InstrMapping {
let ValueCols = [["8"]];
}
+// Maps an original opcode to its commuted version
+def getCommuteRev : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+// Maps an commuted opcode to its original version
+def getCommuteOrig : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
include "SIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 0ab9e4ec0c..4f734f9124 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -403,9 +403,9 @@ def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT
//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
-//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>;
-//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>;
-//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>;
+def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
+def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
+def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
@@ -458,17 +458,31 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM
let mayLoad = 1 in {
-defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>;
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
-//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>;
-//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>;
-//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>;
-//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>;
-//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>;
-//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>;
-//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>;
+defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
+ 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
+>;
+
+defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
+ 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
+>;
+
+defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
+ 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
+>;
+
+defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
+ 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
+>;
+
+defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
+ 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
+>;
} // mayLoad = 1
@@ -790,13 +804,13 @@ let isCommutable = 1 in {
defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
[(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
>;
-} // End isCommutable = 1
defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
[(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
>;
+defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
+} // End isCommutable = 1
-defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>;
defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
let isCommutable = 1 in {
@@ -834,16 +848,20 @@ defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
-} // End isCommutable = 1
+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
+ [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
-defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
-defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
-defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
-defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
-defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
-defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
+ [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
-let isCommutable = 1 in {
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
+ [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
[(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
@@ -864,25 +882,24 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
-let Defs = [VCC] in { // Carry-out goes to VCC
-let isCommutable = 1 in {
+let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
[(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
>;
-} // End isCommutable = 1
defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
[(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
>;
+defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
-defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>;
let Uses = [VCC] in { // Carry-out comes from VCC
defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
-defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", []>;
+defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
} // End Uses = [VCC]
-} // End Defs = [VCC]
+} // End isCommutable = 1, Defs = [VCC]
+
defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
@@ -955,14 +972,31 @@ def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
+
+let isCommutable = 1 in {
+
def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
+def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+
+} // isCommutable = 1
+
def : Pat <
(mul VSrc_32:$src0, VReg_32:$src1),
(V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
>;
-def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+
+def : Pat <
+ (mulhu VSrc_32:$src0, VReg_32:$src1),
+ (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
+def : Pat <
+ (mulhs VSrc_32:$src0, VReg_32:$src1),
+ (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
@@ -1051,17 +1085,6 @@ def LOAD_CONST : AMDGPUShaderInst <
[(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
>;
-let usesCustomInserter = 1 in {
-
-def SI_WQM : InstSI <
- (outs),
- (ins),
- "SI_WQM",
- [(int_SI_wqm)]
->;
-
-} // end usesCustomInserter
-
// SI Psuedo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
@@ -1133,6 +1156,31 @@ def SI_KILL : InstSI <
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
// Uses = [EXEC], Defs = [EXEC]
+let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+
+def SI_INDIRECT_SRC : InstSI <
+ (outs VReg_32:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+ "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
+ []
+>;
+
+class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
+ (outs rc:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
+ "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
+ []
+> {
+ let Constraints = "$src = $dst";
+}
+
+def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
+def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
+def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
+def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
+
+} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+
} // end IsCodeGenOnly, isPseudo
def : Pat<
@@ -1165,10 +1213,9 @@ def : Pat <
/* int_SI_sample for simple 1D texture lookup */
def : Pat <
- (int_SI_sample imm:$writemask, (v1i32 VReg_32:$addr),
+ (int_SI_sample imm:$writemask, VReg_32:$addr,
SReg_256:$rsrc, SReg_128:$sampler, imm),
- (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0,
- (i32 (COPY_TO_REGCLASS VReg_32:$addr, VReg_32)),
+ (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr,
SReg_256:$rsrc, SReg_128:$sampler)
>;
@@ -1176,8 +1223,7 @@ class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
ValueType addr_type> : Pat <
(name imm:$writemask, (addr_type addr_class:$addr),
SReg_256:$rsrc, SReg_128:$sampler, imm),
- (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0,
- (EXTRACT_SUBREG addr_class:$addr, sub0),
+ (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
SReg_256:$rsrc, SReg_128:$sampler)
>;
@@ -1185,8 +1231,7 @@ class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
ValueType addr_type> : Pat <
(name imm:$writemask, (addr_type addr_class:$addr),
SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
- (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0,
- (EXTRACT_SUBREG addr_class:$addr, sub0),
+ (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr,
SReg_256:$rsrc, SReg_128:$sampler)
>;
@@ -1194,8 +1239,7 @@ class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
ValueType addr_type> : Pat <
(name imm:$writemask, (addr_type addr_class:$addr),
SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
- (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0,
- (EXTRACT_SUBREG addr_class:$addr, sub0),
+ (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
SReg_256:$rsrc, SReg_128:$sampler)
>;
@@ -1203,8 +1247,7 @@ class SampleShadowPattern<Intrinsic name, MIMG opcode,
RegisterClass addr_class, ValueType addr_type> : Pat <
(name imm:$writemask, (addr_type addr_class:$addr),
SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
- (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0,
- (EXTRACT_SUBREG addr_class:$addr, sub0),
+ (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
SReg_256:$rsrc, SReg_128:$sampler)
>;
@@ -1212,8 +1255,7 @@ class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
RegisterClass addr_class, ValueType addr_type> : Pat <
(name imm:$writemask, (addr_type addr_class:$addr),
SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
- (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0,
- (EXTRACT_SUBREG addr_class:$addr, sub0),
+ (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
SReg_256:$rsrc, SReg_128:$sampler)
>;
@@ -1241,22 +1283,83 @@ defm : SamplePatterns<VReg_128, v4i32>;
defm : SamplePatterns<VReg_256, v8i32>;
defm : SamplePatterns<VReg_512, v16i32>;
-def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>;
-def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>;
-def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>;
-def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>;
+/********** ============================================ **********/
+/********** Extraction, Insertion, Building and Casting **********/
+/********** ============================================ **********/
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>;
+foreach Index = 0-2 in {
+ def Extract_Element_v2i32_#Index : Extract_Element <
+ i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2i32_#Index : Insert_Element <
+ i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v2f32_#Index : Extract_Element <
+ f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2f32_#Index : Insert_Element <
+ f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-3 in {
+ def Extract_Element_v4i32_#Index : Extract_Element <
+ i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4i32_#Index : Insert_Element <
+ i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v4f32_#Index : Extract_Element <
+ f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4f32_#Index : Insert_Element <
+ f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-7 in {
+ def Extract_Element_v8i32_#Index : Extract_Element <
+ i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8i32_#Index : Insert_Element <
+ i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v8f32_#Index : Extract_Element <
+ f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8f32_#Index : Insert_Element <
+ f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-15 in {
+ def Extract_Element_v16i32_#Index : Extract_Element <
+ i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16i32_#Index : Insert_Element <
+ i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v16f32_#Index : Extract_Element <
+ f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16f32_#Index : Insert_Element <
+ f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
-def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
-def : Vector_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
+def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
+def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
+def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;
@@ -1340,8 +1443,7 @@ def : Pat <
/********** ================== **********/
/* llvm.AMDGPU.pow */
-/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */
-def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>;
+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>;
def : Pat <
(int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
@@ -1389,6 +1491,24 @@ def : Pat <
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
>;
+// 1. Offset as 8bit DWORD immediate
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
+>;
+
+// 2. Offset loaded in an 32bit SGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, imm:$offset),
+ (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
+>;
+
+// 3. Offset in an 32Bit VGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
+ (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
+>;
+
/********** ================== **********/
/********** VOP3 Patterns **********/
/********** ================== **********/
@@ -1426,4 +1546,62 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+/********** ====================== **********/
+/********** Indirect adressing **********/
+/********** ====================== **********/
+
+multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
+ SI_INDIRECT_DST IndDst> {
+ // 1. Extract with offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
+ >;
+
+ // 2. Extract without offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
+ >;
+
+ // 3. Insert with offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
+ >;
+
+ // 4. Insert without offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
+ >;
+}
+
+defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
+
+/********** =============== **********/
+/********** Conditions **********/
+/********** =============== **********/
+
+def : Pat<
+ (i1 (setcc f32:$src0, f32:$src1, SETO)),
+ (V_CMP_O_F32_e64 f32:$src0, f32:$src1)
+>;
+
+def : Pat<
+ (i1 (setcc f32:$src0, f32:$src1, SETUO)),
+ (V_CMP_U_F32_e64 f32:$src0, f32:$src1)
+>;
+
} // End isSI predicate
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 7c23d1706b..0af378edfe 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -16,12 +16,10 @@ let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
- /* XXX: We may need a seperate intrinsic here for loading integer values */
- def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
- def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
- def int_SI_wqm : Intrinsic <[], [], []>;
+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
- class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>;
+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_sample : Sample;
def int_SI_sampleb : Sample;
@@ -29,8 +27,8 @@ let TargetPrefix = "SI", isTarget = 1 in {
/* Interpolation Intrinsics */
- def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
- def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrReadMem]>;
+ def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>;
/* Control flow Intrinsics */
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index b215aa22db..2b60eb9fb3 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -66,6 +66,7 @@ private:
static const unsigned SkipThreshold = 12;
static char ID;
+ const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@@ -84,9 +85,14 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
+ void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+ void IndirectSrc(MachineInstr &MI);
+ void IndirectDst(MachineInstr &MI);
+
public:
SILowerControlFlowPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
+ MachineFunctionPass(ID), TRI(tm.getRegisterInfo()),
+ TII(tm.getInstrInfo()) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -191,7 +197,8 @@ void SILowerControlFlowPass::Else(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Src = MI.getOperand(1).getReg();
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
+ BuildMI(MBB, MBB.getFirstNonPHI(), DL,
+ TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
.addReg(Src); // Saved EXEC
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
@@ -302,9 +309,108 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator I = MI;
+
+ unsigned Save = MI.getOperand(1).getReg();
+ unsigned Idx = MI.getOperand(3).getReg();
+
+ if (AMDGPU::SReg_32RegClass.contains(Idx)) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(Idx);
+ MBB.insert(I, MovRel);
+ MI.eraseFromParent();
+ return;
+ }
+
+ assert(AMDGPU::SReg_64RegClass.contains(Save));
+ assert(AMDGPU::VReg_32RegClass.contains(Idx));
+
+ // Save the EXEC mask
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+ .addReg(AMDGPU::EXEC);
+
+ // Read the next variant into VCC (lower 32 bits) <- also loop target
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
+ .addReg(Idx);
+
+ // Move index from VCC into M0
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(AMDGPU::VCC);
+
+ // Compare the just read M0 value to all possible Idx values
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
+
+ // Update EXEC, save the original EXEC value to VCC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
+
+ // Do the actual move
+ MBB.insert(I, MovRel);
+
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
+
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(-7)
+ .addReg(AMDGPU::EXEC);
+
+ // Restore EXEC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(Save);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Vec = MI.getOperand(2).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
+ .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Vec, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
+void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+ unsigned Val = MI.getOperand(5).getReg();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
+ .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+ .addReg(Val)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Dst, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
bool HaveKill = false;
+ bool NeedWQM = false;
unsigned Depth = 0;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -363,9 +469,33 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::S_BRANCH:
Branch(MI);
break;
+
+ case AMDGPU::SI_INDIRECT_SRC:
+ IndirectSrc(MI);
+ break;
+
+ case AMDGPU::SI_INDIRECT_DST_V2:
+ case AMDGPU::SI_INDIRECT_DST_V4:
+ case AMDGPU::SI_INDIRECT_DST_V8:
+ case AMDGPU::SI_INDIRECT_DST_V16:
+ IndirectDst(MI);
+ break;
+
+ case AMDGPU::V_INTERP_P1_F32:
+ case AMDGPU::V_INTERP_P2_F32:
+ case AMDGPU::V_INTERP_MOV_F32:
+ NeedWQM = true;
+ break;
+
}
}
}
+ if (NeedWQM) {
+ MachineBasicBlock &MBB = MF.front();
+ BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
+ AMDGPU::EXEC).addReg(AMDGPU::EXEC);
+ }
+
return true;
}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
index 1a4e4cbbbb..ee0e30755f 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -10,25 +10,9 @@
#include "SIMachineFunctionInfo.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
using namespace llvm;
-const char *SIMachineFunctionInfo::ShaderTypeAttribute = "ShaderType";
-
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
- : MachineFunctionInfo(),
- ShaderType(0),
- PSInputAddr(0) {
-
- AttributeSet Set = MF.getFunction()->getAttributes();
- Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
- ShaderTypeAttribute);
-
- if (A.isStringAttribute()) {
- StringRef Str = A.getValueAsString();
- if (Str.getAsInteger(0, ShaderType))
- llvm_unreachable("Can't parse shader type!");
- }
-}
+ : AMDGPUMachineFunction(MF),
+ PSInputAddr(0) { }
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
index 91a809b124..6da9f7f9a1 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -15,18 +15,15 @@
#ifndef SIMACHINEFUNCTIONINFO_H_
#define SIMACHINEFUNCTIONINFO_H_
-#include "llvm/CodeGen/MachineFunction.h"
+#include "AMDGPUMachineFunction.h"
namespace llvm {
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
-class SIMachineFunctionInfo : public MachineFunctionInfo {
+class SIMachineFunctionInfo : public AMDGPUMachineFunction {
public:
- static const char *ShaderTypeAttribute;
-
SIMachineFunctionInfo(const MachineFunction &MF);
- unsigned ShaderType;
unsigned PSInputAddr;
};
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 88275c523f..99278ae8dc 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -30,6 +30,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ return RC->getNumRegs();
+}
+
const TargetRegisterClass *
SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
switch (rc->getID()) {
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index 40171e4450..caec228413 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -31,6 +31,9 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+ virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
/// \param RC is an AMDIL reg class.
///
/// \returns the SI register class that is equivalent to \p RC.
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 3dcad506d2..4f14931a9c 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -158,15 +158,15 @@ def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>;
+def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
-def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
-def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
-def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
//===----------------------------------------------------------------------===//
// [SV]Src_* register classes, can have either an immediate or an register
@@ -174,9 +174,9 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
-def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
+def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>;
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
-def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;
+def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 90b698d507..c3810b2a4d 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -182,11 +182,11 @@ multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
def rr : F3_1<2, Op3Val,
(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
!strconcat(OpcStr, " $b, $c, $dst"),
- [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ [(set i32:$dst, (OpNode i32:$b, i32:$c))]>;
def ri : F3_2<2, Op3Val,
(outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
!strconcat(OpcStr, " $b, $c, $dst"),
- [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+ [(set i32:$dst, (OpNode i32:$b, simm13:$c))]>;
}
/// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
@@ -243,10 +243,10 @@ let Predicates = [HasNoV9] in { // Only emit these in V8 mode.
"!FpMOVD $src, $dst", []>;
def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
"!FpNEGD $src, $dst",
- [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ [(set f64:$dst, (fneg f64:$src))]>;
def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
"!FpABSD $src, $dst",
- [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+ [(set f64:$dst, (fabs f64:$src))]>;
}
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
@@ -257,19 +257,16 @@ let Uses = [ICC], usesCustomInserter = 1 in {
def SELECT_CC_Int_ICC
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
"; SELECT_CC_Int_ICC PSEUDO!",
- [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
- imm:$Cond))]>;
+ [(set i32:$dst, (SPselecticc i32:$T, i32:$F, imm:$Cond))]>;
def SELECT_CC_FP_ICC
: Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
"; SELECT_CC_FP_ICC PSEUDO!",
- [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
- imm:$Cond))]>;
+ [(set f32:$dst, (SPselecticc f32:$T, f32:$F, imm:$Cond))]>;
def SELECT_CC_DFP_ICC
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
"; SELECT_CC_DFP_ICC PSEUDO!",
- [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
- imm:$Cond))]>;
+ [(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>;
}
let usesCustomInserter = 1, Uses = [FCC] in {
@@ -277,19 +274,16 @@ let usesCustomInserter = 1, Uses = [FCC] in {
def SELECT_CC_Int_FCC
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
"; SELECT_CC_Int_FCC PSEUDO!",
- [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
- imm:$Cond))]>;
+ [(set i32:$dst, (SPselectfcc i32:$T, i32:$F, imm:$Cond))]>;
def SELECT_CC_FP_FCC
: Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
"; SELECT_CC_FP_FCC PSEUDO!",
- [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
- imm:$Cond))]>;
+ [(set f32:$dst, (SPselectfcc f32:$T, f32:$F, imm:$Cond))]>;
def SELECT_CC_DFP_FCC
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
"; SELECT_CC_DFP_FCC PSEUDO!",
- [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
- imm:$Cond))]>;
+ [(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>;
}
@@ -309,111 +303,111 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
def LDSBrr : F3_1<3, 0b001001,
(outs IntRegs:$dst), (ins MEMrr:$addr),
"ldsb [$addr], $dst",
- [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+ [(set i32:$dst, (sextloadi8 ADDRrr:$addr))]>;
def LDSBri : F3_2<3, 0b001001,
(outs IntRegs:$dst), (ins MEMri:$addr),
"ldsb [$addr], $dst",
- [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+ [(set i32:$dst, (sextloadi8 ADDRri:$addr))]>;
def LDSHrr : F3_1<3, 0b001010,
(outs IntRegs:$dst), (ins MEMrr:$addr),
"ldsh [$addr], $dst",
- [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+ [(set i32:$dst, (sextloadi16 ADDRrr:$addr))]>;
def LDSHri : F3_2<3, 0b001010,
(outs IntRegs:$dst), (ins MEMri:$addr),
"ldsh [$addr], $dst",
- [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+ [(set i32:$dst, (sextloadi16 ADDRri:$addr))]>;
def LDUBrr : F3_1<3, 0b000001,
(outs IntRegs:$dst), (ins MEMrr:$addr),
"ldub [$addr], $dst",
- [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+ [(set i32:$dst, (zextloadi8 ADDRrr:$addr))]>;
def LDUBri : F3_2<3, 0b000001,
(outs IntRegs:$dst), (ins MEMri:$addr),
"ldub [$addr], $dst",
- [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+ [(set i32:$dst, (zextloadi8 ADDRri:$addr))]>;
def LDUHrr : F3_1<3, 0b000010,
(outs IntRegs:$dst), (ins MEMrr:$addr),
"lduh [$addr], $dst",
- [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+ [(set i32:$dst, (zextloadi16 ADDRrr:$addr))]>;
def LDUHri : F3_2<3, 0b000010,
(outs IntRegs:$dst), (ins MEMri:$addr),
"lduh [$addr], $dst",
- [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+ [(set i32:$dst, (zextloadi16 ADDRri:$addr))]>;
def LDrr : F3_1<3, 0b000000,
(outs IntRegs:$dst), (ins MEMrr:$addr),
"ld [$addr], $dst",
- [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+ [(set i32:$dst, (load ADDRrr:$addr))]>;
def LDri : F3_2<3, 0b000000,
(outs IntRegs:$dst), (ins MEMri:$addr),
"ld [$addr], $dst",
- [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+ [(set i32:$dst, (load ADDRri:$addr))]>;
// Section B.2 - Load Floating-point Instructions, p. 92
def LDFrr : F3_1<3, 0b100000,
(outs FPRegs:$dst), (ins MEMrr:$addr),
"ld [$addr], $dst",
- [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+ [(set f32:$dst, (load ADDRrr:$addr))]>;
def LDFri : F3_2<3, 0b100000,
(outs FPRegs:$dst), (ins MEMri:$addr),
"ld [$addr], $dst",
- [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+ [(set f32:$dst, (load ADDRri:$addr))]>;
def LDDFrr : F3_1<3, 0b100011,
(outs DFPRegs:$dst), (ins MEMrr:$addr),
"ldd [$addr], $dst",
- [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+ [(set f64:$dst, (load ADDRrr:$addr))]>;
def LDDFri : F3_2<3, 0b100011,
(outs DFPRegs:$dst), (ins MEMri:$addr),
"ldd [$addr], $dst",
- [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+ [(set f64:$dst, (load ADDRri:$addr))]>;
// Section B.4 - Store Integer Instructions, p. 95
def STBrr : F3_1<3, 0b000101,
(outs), (ins MEMrr:$addr, IntRegs:$src),
"stb $src, [$addr]",
- [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+ [(truncstorei8 i32:$src, ADDRrr:$addr)]>;
def STBri : F3_2<3, 0b000101,
(outs), (ins MEMri:$addr, IntRegs:$src),
"stb $src, [$addr]",
- [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+ [(truncstorei8 i32:$src, ADDRri:$addr)]>;
def STHrr : F3_1<3, 0b000110,
(outs), (ins MEMrr:$addr, IntRegs:$src),
"sth $src, [$addr]",
- [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+ [(truncstorei16 i32:$src, ADDRrr:$addr)]>;
def STHri : F3_2<3, 0b000110,
(outs), (ins MEMri:$addr, IntRegs:$src),
"sth $src, [$addr]",
- [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+ [(truncstorei16 i32:$src, ADDRri:$addr)]>;
def STrr : F3_1<3, 0b000100,
(outs), (ins MEMrr:$addr, IntRegs:$src),
"st $src, [$addr]",
- [(store IntRegs:$src, ADDRrr:$addr)]>;
+ [(store i32:$src, ADDRrr:$addr)]>;
def STri : F3_2<3, 0b000100,
(outs), (ins MEMri:$addr, IntRegs:$src),
"st $src, [$addr]",
- [(store IntRegs:$src, ADDRri:$addr)]>;
+ [(store i32:$src, ADDRri:$addr)]>;
// Section B.5 - Store Floating-point Instructions, p. 97
def STFrr : F3_1<3, 0b100100,
(outs), (ins MEMrr:$addr, FPRegs:$src),
"st $src, [$addr]",
- [(store FPRegs:$src, ADDRrr:$addr)]>;
+ [(store f32:$src, ADDRrr:$addr)]>;
def STFri : F3_2<3, 0b100100,
(outs), (ins MEMri:$addr, FPRegs:$src),
"st $src, [$addr]",
- [(store FPRegs:$src, ADDRri:$addr)]>;
+ [(store f32:$src, ADDRri:$addr)]>;
def STDFrr : F3_1<3, 0b100111,
(outs), (ins MEMrr:$addr, DFPRegs:$src),
"std $src, [$addr]",
- [(store DFPRegs:$src, ADDRrr:$addr)]>;
+ [(store f64:$src, ADDRrr:$addr)]>;
def STDFri : F3_2<3, 0b100111,
(outs), (ins MEMri:$addr, DFPRegs:$src),
"std $src, [$addr]",
- [(store DFPRegs:$src, ADDRri:$addr)]>;
+ [(store f64:$src, ADDRri:$addr)]>;
// Section B.9 - SETHI Instruction, p. 104
def SETHIi: F2_1<0b100,
(outs IntRegs:$dst), (ins i32imm:$src),
"sethi $src, $dst",
- [(set IntRegs:$dst, SETHIimm:$src)]>;
+ [(set i32:$dst, SETHIimm:$src)]>;
// Section B.10 - NOP Instruction, p. 105
// (It's a special case of SETHI)
@@ -426,7 +420,7 @@ defm AND : F3_12<"and", 0b000001, and>;
def ANDNrr : F3_1<2, 0b000101,
(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
"andn $b, $c, $dst",
- [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+ [(set i32:$dst, (and i32:$b, (not i32:$c)))]>;
def ANDNri : F3_2<2, 0b000101,
(outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
"andn $b, $c, $dst", []>;
@@ -436,7 +430,7 @@ defm OR : F3_12<"or", 0b000010, or>;
def ORNrr : F3_1<2, 0b000110,
(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
"orn $b, $c, $dst",
- [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+ [(set i32:$dst, (or i32:$b, (not i32:$c)))]>;
def ORNri : F3_2<2, 0b000110,
(outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
"orn $b, $c, $dst", []>;
@@ -445,7 +439,7 @@ defm XOR : F3_12<"xor", 0b000011, xor>;
def XNORrr : F3_1<2, 0b000111,
(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
"xnor $b, $c, $dst",
- [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+ [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>;
def XNORri : F3_2<2, 0b000111,
(outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
"xnor $b, $c, $dst", []>;
@@ -462,7 +456,7 @@ defm ADD : F3_12<"add", 0b000000, add>;
def LEA_ADDri : F3_2<2, 0b000000,
(outs IntRegs:$dst), (ins MEMri:$addr),
"add ${addr:arith}, $dst",
- [(set IntRegs:$dst, ADDRri:$addr)]>;
+ [(set i32:$dst, ADDRri:$addr)]>;
let Defs = [ICC] in
defm ADDCC : F3_12<"addcc", 0b010000, addc>;
@@ -603,11 +597,11 @@ def FDTOI : F3_3<2, 0b110100, 0b011010010,
def FSTOD : F3_3<2, 0b110100, 0b011001001,
(outs DFPRegs:$dst), (ins FPRegs:$src),
"fstod $src, $dst",
- [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+ [(set f64:$dst, (fextend f32:$src))]>;
def FDTOS : F3_3<2, 0b110100, 0b011000110,
(outs FPRegs:$dst), (ins DFPRegs:$src),
"fdtos $src, $dst",
- [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+ [(set f32:$dst, (fround f64:$src))]>;
// Floating-point Move Instructions, p. 144
def FMOVS : F3_3<2, 0b110100, 0b000000001,
@@ -616,22 +610,22 @@ def FMOVS : F3_3<2, 0b110100, 0b000000001,
def FNEGS : F3_3<2, 0b110100, 0b000000101,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fnegs $src, $dst",
- [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
+ [(set f32:$dst, (fneg f32:$src))]>;
def FABSS : F3_3<2, 0b110100, 0b000001001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fabss $src, $dst",
- [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+ [(set f32:$dst, (fabs f32:$src))]>;
// Floating-point Square Root Instructions, p.145
def FSQRTS : F3_3<2, 0b110100, 0b000101001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fsqrts $src, $dst",
- [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
+ [(set f32:$dst, (fsqrt f32:$src))]>;
def FSQRTD : F3_3<2, 0b110100, 0b000101010,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fsqrtd $src, $dst",
- [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+ [(set f64:$dst, (fsqrt f64:$src))]>;
@@ -639,42 +633,42 @@ def FSQRTD : F3_3<2, 0b110100, 0b000101010,
def FADDS : F3_3<2, 0b110100, 0b001000001,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fadds $src1, $src2, $dst",
- [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+ [(set f32:$dst, (fadd f32:$src1, f32:$src2))]>;
def FADDD : F3_3<2, 0b110100, 0b001000010,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"faddd $src1, $src2, $dst",
- [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+ [(set f64:$dst, (fadd f64:$src1, f64:$src2))]>;
def FSUBS : F3_3<2, 0b110100, 0b001000101,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fsubs $src1, $src2, $dst",
- [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+ [(set f32:$dst, (fsub f32:$src1, f32:$src2))]>;
def FSUBD : F3_3<2, 0b110100, 0b001000110,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fsubd $src1, $src2, $dst",
- [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+ [(set f64:$dst, (fsub f64:$src1, f64:$src2))]>;
// Floating-point Multiply and Divide Instructions, p. 147
def FMULS : F3_3<2, 0b110100, 0b001001001,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fmuls $src1, $src2, $dst",
- [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+ [(set f32:$dst, (fmul f32:$src1, f32:$src2))]>;
def FMULD : F3_3<2, 0b110100, 0b001001010,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fmuld $src1, $src2, $dst",
- [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+ [(set f64:$dst, (fmul f64:$src1, f64:$src2))]>;
def FSMULD : F3_3<2, 0b110100, 0b001101001,
(outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fsmuld $src1, $src2, $dst",
- [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
- (fextend FPRegs:$src2)))]>;
+ [(set f64:$dst, (fmul (fextend f32:$src1),
+ (fextend f32:$src2)))]>;
def FDIVS : F3_3<2, 0b110100, 0b001001101,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fdivs $src1, $src2, $dst",
- [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+ [(set f32:$dst, (fdiv f32:$src1, f32:$src2))]>;
def FDIVD : F3_3<2, 0b110100, 0b001001110,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fdivd $src1, $src2, $dst",
- [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+ [(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>;
// Floating-point Compare Instructions, p. 148
// Note: the 2nd template arg is different for these guys.
@@ -685,11 +679,11 @@ let Defs = [FCC] in {
def FCMPS : F3_3<2, 0b110101, 0b001010001,
(outs), (ins FPRegs:$src1, FPRegs:$src2),
"fcmps $src1, $src2\n\tnop",
- [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+ [(SPcmpfcc f32:$src1, f32:$src2)]>;
def FCMPD : F3_3<2, 0b110101, 0b001010010,
(outs), (ins DFPRegs:$src1, DFPRegs:$src2),
"fcmpd $src1, $src2\n\tnop",
- [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+ [(SPcmpfcc f64:$src1, f64:$src2)]>;
}
//===----------------------------------------------------------------------===//
@@ -704,52 +698,45 @@ let Predicates = [HasV9], Constraints = "$T = $dst" in {
def MOVICCrr
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
"mov$cc %icc, $F, $dst",
- [(set IntRegs:$dst,
- (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ [(set i32:$dst, (SPselecticc i32:$F, i32:$T, imm:$cc))]>;
def MOVICCri
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
"mov$cc %icc, $F, $dst",
- [(set IntRegs:$dst,
- (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+ [(set i32:$dst, (SPselecticc simm11:$F, i32:$T, imm:$cc))]>;
}
let Uses = [FCC] in {
def MOVFCCrr
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
"mov$cc %fcc0, $F, $dst",
- [(set IntRegs:$dst,
- (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ [(set i32:$dst, (SPselectfcc i32:$F, i32:$T, imm:$cc))]>;
def MOVFCCri
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
"mov$cc %fcc0, $F, $dst",
- [(set IntRegs:$dst,
- (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+ [(set i32:$dst, (SPselectfcc simm11:$F, i32:$T, imm:$cc))]>;
}
let Uses = [ICC] in {
def FMOVS_ICC
: Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
"fmovs$cc %icc, $F, $dst",
- [(set FPRegs:$dst,
- (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ [(set f32:$dst,
+ (SPselecticc f32:$F, f32:$T, imm:$cc))]>;
def FMOVD_ICC
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
"fmovd$cc %icc, $F, $dst",
- [(set DFPRegs:$dst,
- (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ [(set f64:$dst, (SPselecticc f64:$F, f64:$T, imm:$cc))]>;
}
let Uses = [FCC] in {
def FMOVS_FCC
: Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
"fmovs$cc %fcc0, $F, $dst",
- [(set FPRegs:$dst,
- (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ [(set f32:$dst, (SPselectfcc f32:$F, f32:$T, imm:$cc))]>;
def FMOVD_FCC
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
"fmovd$cc %fcc0, $F, $dst",
- [(set DFPRegs:$dst,
- (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ [(set f64:$dst, (SPselectfcc f64:$F, f64:$T, imm:$cc))]>;
}
}
@@ -762,11 +749,11 @@ let Predicates = [HasV9] in {
def FNEGD : F3_3<2, 0b110100, 0b000000110,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fnegd $src, $dst",
- [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ [(set f64:$dst, (fneg f64:$src))]>;
def FABSD : F3_3<2, 0b110100, 0b000001010,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fabsd $src, $dst",
- [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+ [(set f64:$dst, (fabs f64:$src))]>;
}
// POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear
@@ -774,8 +761,8 @@ let Predicates = [HasV9] in {
def POPCrr : F3_1<2, 0b101110,
(outs IntRegs:$dst), (ins IntRegs:$src),
"popc $src, $dst", []>, Requires<[HasV9]>;
-def : Pat<(ctpop IntRegs:$src),
- (POPCrr (SLLri IntRegs:$src, 0))>;
+def : Pat<(ctpop i32:$src),
+ (POPCrr (SLLri $src, 0))>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -789,10 +776,10 @@ def : Pat<(i32 imm:$val),
(ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
// subc
-def : Pat<(subc IntRegs:$b, IntRegs:$c),
- (SUBCCrr IntRegs:$b, IntRegs:$c)>;
-def : Pat<(subc IntRegs:$b, simm13:$val),
- (SUBCCri IntRegs:$b, imm:$val)>;
+def : Pat<(subc i32:$b, i32:$c),
+ (SUBCCrr $b, $c)>;
+def : Pat<(subc i32:$b, simm13:$val),
+ (SUBCCri $b, imm:$val)>;
// Global addresses, constant pool entries
def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
@@ -801,10 +788,10 @@ def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
// Add reg, lo. This is used when taking the addr of a global/constpool entry.
-def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
- (ADDri IntRegs:$r, tglobaladdr:$in)>;
-def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
- (ADDri IntRegs:$r, tconstpool:$in)>;
+def : Pat<(add i32:$r, (SPlo tglobaladdr:$in)),
+ (ADDri $r, tglobaladdr:$in)>;
+def : Pat<(add i32:$r, (SPlo tconstpool:$in)),
+ (ADDri $r, tconstpool:$in)>;
// Calls:
def : Pat<(call tglobaladdr:$dst),
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 357879bf6c..b53a1ed095 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -40,7 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index c6cafe59eb..ee88ce77c0 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -610,6 +610,9 @@ struct StringComparator {
// Provided for compatibility with MSVC's debug mode.
bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; }
bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; }
+ bool operator()(const char *LHS, const char *RHS) const {
+ return std::strcmp(LHS, RHS) < 0;
+ }
};
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 7d8b49cdf2..e7282519d5 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -61,6 +63,30 @@ TargetMachine::~TargetMachine() {
delete AsmInfo;
}
+/// \brief Reset the target options based on the function's attributes.
+void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
+ const Function *F = MF->getFunction();
+ TargetOptions &TO = MF->getTarget().Options;
+
+#define RESET_OPTION(X, Y) \
+ do { \
+ if (F->hasFnAttribute(Y)) \
+ TO.X = \
+ (F->getAttributes(). \
+ getAttribute(AttributeSet::FunctionIndex, \
+ Y).getValueAsString() == "true"); \
+ } while (0)
+
+ RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim");
+ RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf");
+ RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad");
+ RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
+ RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
+ RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
+ RESET_OPTION(UseSoftFloat, "use-soft-float");
+ RESET_OPTION(DisableTailCalls, "disable-tail-calls");
+}
+
/// getRelocationModel - Returns the code generation relocation model. The
/// choices are static, PIC, and dynamic-no-pic, and target default.
Reloc::Model TargetMachine::getRelocationModel() const {
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index ee5c2b2bfd..75d26f55c3 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -58,10 +58,15 @@ private:
X86Operand *ParseIntelOperand();
X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind);
- X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc);
- X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
+ X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
+ SMLoc StartLoc);
+ X86Operand *ParseIntelBracExpression(unsigned SegReg, uint64_t ImmDisp,
+ unsigned Size);
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+ X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End,
+ SMLoc SizeDirLoc, unsigned Size);
+
bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
SmallString<64> &Err);
@@ -170,30 +175,33 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc OffsetOfLoc;
bool AddressOf;
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNo;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned SegReg;
+ const MCExpr *Disp;
+ unsigned BaseReg;
+ unsigned IndexReg;
+ unsigned Scale;
+ unsigned Size;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNo;
- } Reg;
-
- struct {
- const MCExpr *Val;
- bool NeedAsmRewrite;
- } Imm;
-
- struct {
- unsigned SegReg;
- const MCExpr *Disp;
- unsigned BaseReg;
- unsigned IndexReg;
- unsigned Scale;
- unsigned Size;
- bool NeedSizeDir;
- } Mem;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
};
X86Operand(KindTy K, SMLoc Start, SMLoc End)
@@ -231,11 +239,6 @@ struct X86Operand : public MCParsedAsmOperand {
return Imm.Val;
}
- bool needAsmRewrite() const {
- assert(Kind == Immediate && "Invalid access!");
- return Imm.NeedAsmRewrite;
- }
-
const MCExpr *getMemDisp() const {
assert(Kind == Memory && "Invalid access!");
return Mem.Disp;
@@ -332,11 +335,6 @@ struct X86Operand : public MCParsedAsmOperand {
return isImmSExti64i32Value(CE->getValue());
}
- unsigned getMemSize() const {
- assert(Kind == Memory && "Invalid access!");
- return Mem.Size;
- }
-
bool isOffsetOf() const {
return OffsetOfLoc.getPointer();
}
@@ -345,11 +343,6 @@ struct X86Operand : public MCParsedAsmOperand {
return AddressOf;
}
- bool needSizeDirective() const {
- assert(Kind == Memory && "Invalid access!");
- return Mem.NeedSizeDir;
- }
-
bool isMem() const { return Kind == Memory; }
bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
@@ -485,17 +478,15 @@ struct X86Operand : public MCParsedAsmOperand {
return Res;
}
- static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc,
- bool NeedRewrite = true){
+ static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
Res->Imm.Val = Val;
- Res->Imm.NeedAsmRewrite = NeedRewrite;
return Res;
}
/// Create an absolute memory operand.
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, bool NeedSizeDir = false) {
+ unsigned Size = 0) {
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -503,7 +494,6 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
- Res->Mem.NeedSizeDir = NeedSizeDir;
Res->AddressOf = false;
return Res;
}
@@ -512,7 +502,7 @@ struct X86Operand : public MCParsedAsmOperand {
static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, bool NeedSizeDir = false) {
+ unsigned Size = 0) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -527,7 +517,6 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
- Res->Mem.NeedSizeDir = NeedSizeDir;
Res->AddressOf = false;
return Res;
}
@@ -711,8 +700,8 @@ class IntelBracExprStateMachine {
bool isPlus;
public:
- IntelBracExprStateMachine(MCAsmParser &parser) :
- State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(0),
+ IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
+ State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(disp),
TmpReg(0), TmpInteger(0), isPlus(true) {}
unsigned getBaseReg() { return BaseReg; }
@@ -890,7 +879,47 @@ public:
}
};
-X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
+X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start,
+ SMLoc End, SMLoc SizeDirLoc,
+ unsigned Size) {
+ bool NeedSizeDir = false;
+ bool IsVarDecl = false;
+ if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
+ const MCSymbol &Sym = SymRef->getSymbol();
+ // FIXME: The SemaLookup will fail if the name is anything other then an
+ // identifier.
+ // FIXME: Pass a valid SMLoc.
+ unsigned tLength, tSize, tType;
+ SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
+ tSize, tType, IsVarDecl);
+ if (!Size) {
+ Size = tType * 8; // Size is in terms of bits in this context.
+ NeedSizeDir = Size > 0;
+ }
+ }
+
+ // If this is not a VarDecl then assume it is a FuncDecl or some other label
+ // reference. We need an 'r' constraint here, so we need to create register
+ // operand to ensure proper matching. Just pick a GPR based on the size of
+ // a pointer.
+ if (!IsVarDecl) {
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
+ }
+
+ if (NeedSizeDir)
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
+ /*Len*/0, Size));
+
+ // When parsing inline assembly we set the base register to a non-zero value
+ // as we don't know the actual value at this time. This is necessary to
+ // get the matching correct in some cases.
+ return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
+ /*Scale*/1, Start, End, Size);
+}
+
+X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
+ uint64_t ImmDisp,
unsigned Size) {
const AsmToken &Tok = Parser.getTok();
SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
@@ -902,7 +931,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
unsigned TmpReg = 0;
- // Try to handle '[' 'symbol' ']'
+ // Try to handle '[' 'Symbol' ']'
if (getLexer().is(AsmToken::Identifier)) {
if (ParseRegister(TmpReg, Start, End)) {
const MCExpr *Disp;
@@ -911,16 +940,27 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
if (getLexer().isNot(AsmToken::RBrac))
return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
+
+ // FIXME: We don't handle 'ImmDisp' '[' 'Symbol' ']'.
+ if (ImmDisp)
+ return ErrorOperand(Start, "Unsupported immediate displacement!");
+
// Adjust the EndLoc due to the ']'.
End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
Parser.Lex();
- return X86Operand::CreateMem(Disp, Start, End, Size);
+ if (!isParsingInlineAsm())
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+
+ // We want the size directive before the '['.
+ SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1);
+ return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size);
}
}
- // Parse [ BaseReg + Scale*IndexReg + Disp ].
+ // Parse [ BaseReg + Scale*IndexReg + Disp ]. We may have already parsed an
+ // immediate displacement before the bracketed expression.
bool Done = false;
- IntelBracExprStateMachine SM(Parser);
+ IntelBracExprStateMachine SM(Parser, ImmDisp);
// If we parsed a register, then the end loc has already been set and
// the identifier has already been lexed. We also need to update the
@@ -1007,7 +1047,9 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
}
/// ParseIntelMemOperand - Parse intel style memory operand.
-X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
+X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
+ uint64_t ImmDisp,
+ SMLoc Start) {
const AsmToken &Tok = Parser.getTok();
SMLoc End;
@@ -1019,8 +1061,21 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
Parser.Lex();
}
+ // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+ if (getLexer().is(AsmToken::Integer)) {
+ const AsmToken &IntTok = Parser.getTok();
+ if (isParsingInlineAsm())
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
+ IntTok.getLoc()));
+ uint64_t ImmDisp = IntTok.getIntVal();
+ Parser.Lex(); // Eat the integer.
+ if (getLexer().isNot(AsmToken::LBrac))
+ return ErrorOperand(Start, "Expected '[' token!");
+ return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+ }
+
if (getLexer().is(AsmToken::LBrac))
- return ParseIntelBracExpression(SegReg, Size);
+ return ParseIntelBracExpression(SegReg, ImmDisp, Size);
if (!ParseRegister(SegReg, Start, End)) {
// Handel SegReg : [ ... ]
@@ -1029,47 +1084,16 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
Parser.Lex(); // Eat :
if (getLexer().isNot(AsmToken::LBrac))
return ErrorOperand(Start, "Expected '[' token!");
- return ParseIntelBracExpression(SegReg, Size);
+ return ParseIntelBracExpression(SegReg, ImmDisp, Size);
}
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
if (getParser().parseExpression(Disp, End))
return 0;
- bool NeedSizeDir = false;
- bool IsVarDecl = false;
- if (isParsingInlineAsm()) {
- if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
- const MCSymbol &Sym = SymRef->getSymbol();
- // FIXME: The SemaLookup will fail if the name is anything other then an
- // identifier.
- // FIXME: Pass a valid SMLoc.
- unsigned tLength, tSize, tType;
- SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
- tSize, tType, IsVarDecl);
- if (!Size)
- Size = tType * 8; // Size is in terms of bits in this context.
- NeedSizeDir = Size > 0;
- }
- }
if (!isParsingInlineAsm())
return X86Operand::CreateMem(Disp, Start, End, Size);
- else {
- // If this is not a VarDecl then assume it is a FuncDecl or some other label
- // reference. We need an 'r' constraint here, so we need to create register
- // operand to ensure proper matching. Just pick a GPR based on the size of
- // a pointer.
- if (!IsVarDecl) {
- unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
- return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
- }
-
- // When parsing inline assembly we set the base register to a non-zero value
- // as we don't know the actual value at this time. This is necessary to
- // get the matching correct in some cases.
- return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
- /*Scale*/1, Start, End, Size, NeedSizeDir);
- }
+ return CreateMemForInlineAsm(Disp, Start, End, Start, Size);
}
/// Parse the '.' operator.
@@ -1197,7 +1221,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
- return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false);
+ return X86Operand::CreateImm(Imm, Start, End);
}
X86Operand *X86AsmParser::ParseIntelOperand() {
@@ -1220,8 +1244,24 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
getLexer().is(AsmToken::Minus)) {
const MCExpr *Val;
+ bool isInteger = getLexer().is(AsmToken::Integer);
if (!getParser().parseExpression(Val, End)) {
- return X86Operand::CreateImm(Val, Start, End);
+ if (isParsingInlineAsm())
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
+ // Immediate.
+ if (getLexer().isNot(AsmToken::LBrac))
+ return X86Operand::CreateImm(Val, Start, End);
+
+ // Only positive immediates are valid.
+ if (!isInteger) {
+ Error(Parser.getTok().getLoc(), "expected a positive immediate "
+ "displacement before bracketed expr.");
+ return 0;
+ }
+
+ // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+ if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
+ return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
}
}
@@ -1234,11 +1274,11 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
return X86Operand::CreateReg(RegNo, Start, End);
getParser().Lex(); // Eat the colon.
- return ParseIntelMemOperand(RegNo, Start);
+ return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
}
// Memory operand.
- return ParseIntelMemOperand(0, Start);
+ return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
}
X86Operand *X86AsmParser::ParseATTOperand() {
@@ -1262,7 +1302,6 @@ X86Operand *X86AsmParser::ParseATTOperand() {
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
-
getParser().Lex(); // Eat the colon.
return ParseMemOperand(RegNo, Start);
}
@@ -1734,242 +1773,74 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
return false;
}
-bool X86AsmParser::
-processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
- switch (Inst.getOpcode()) {
- default: return false;
- case X86::AND16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::AND32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::AND64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
+ bool isCmp) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode);
+ if (!isCmp)
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
+bool X86AsmParser::
+processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
+ switch (Inst.getOpcode()) {
+ default: return false;
+ case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
+ case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
+ case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
+ case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
+ case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
+ case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
+ case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
+ case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
+ case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
+ case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
+ case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
+ case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
+ case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
+ case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
+ case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
+ case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
+ case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
+ case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
+ case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
+ case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
+ case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
+ case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
+ case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
+ case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
}
}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 85d8a991dd..e40edba6d6 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -61,7 +61,7 @@ static int modRMRequired(OpcodeType type,
InstructionContext insnContext,
uint8_t opcode) {
const struct ContextDecision* decision = 0;
-
+
switch (type) {
case ONEBYTE:
decision = &ONEBYTE_SYM;
@@ -102,7 +102,7 @@ static InstrUID decode(OpcodeType type,
uint8_t opcode,
uint8_t modRM) {
const struct ModRMDecision* dec = 0;
-
+
switch (type) {
case ONEBYTE:
dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
@@ -123,7 +123,7 @@ static InstrUID decode(OpcodeType type,
dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
}
-
+
switch (dec->modrm_type) {
default:
debug("Corrupt table! Unknown modrm_type");
@@ -171,10 +171,10 @@ static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
*/
static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
-
+
if (!ret)
++(insn->readerCursor);
-
+
return ret;
}
@@ -238,19 +238,19 @@ CONSUME_FUNC(consumeUInt64, uint64_t)
*/
static void dbgprintf(struct InternalInstruction* insn,
const char* format,
- ...) {
+ ...) {
char buffer[256];
va_list ap;
-
+
if (!insn->dlog)
return;
-
+
va_start(ap, format);
(void)vsnprintf(buffer, sizeof(buffer), format, ap);
va_end(ap);
-
+
insn->dlog(insn->dlogArg, buffer);
-
+
return;
}
@@ -305,27 +305,40 @@ static int readPrefixes(struct InternalInstruction* insn) {
BOOL prefixGroups[4] = { FALSE };
uint64_t prefixLocation;
uint8_t byte = 0;
-
+
BOOL hasAdSize = FALSE;
BOOL hasOpSize = FALSE;
-
+
dbgprintf(insn, "readPrefixes()");
-
+
while (isPrefix) {
prefixLocation = insn->readerCursor;
-
+
if (consumeByte(insn, &byte))
return -1;
/*
- * If the first byte is a LOCK prefix break and let it be disassembled
- * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
- * FIXME there is currently no way to get the disassembler to print the
- * lock prefix if it is not the first byte.
+ * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
+ * break and let it be disassembled as a normal "instruction".
*/
- if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
- break;
-
+ if (insn->readerCursor - 1 == insn->startLocation
+ && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) {
+ uint8_t nextByte;
+ if (byte == 0xf0)
+ break;
+ if (lookAtByte(insn, &nextByte))
+ return -1;
+ if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
+ if (consumeByte(insn, &nextByte))
+ return -1;
+ if (lookAtByte(insn, &nextByte))
+ return -1;
+ unconsumeByte(insn);
+ }
+ if (nextByte != 0x0f && nextByte != 0x90)
+ break;
+ }
+
switch (byte) {
case 0xf0: /* LOCK */
case 0xf2: /* REPNE/REPNZ */
@@ -387,21 +400,21 @@ static int readPrefixes(struct InternalInstruction* insn) {
isPrefix = FALSE;
break;
}
-
+
if (isPrefix)
dbgprintf(insn, "Found prefix 0x%hhx", byte);
}
-
+
insn->vexSize = 0;
-
+
if (byte == 0xc4) {
uint8_t byte1;
-
+
if (lookAtByte(insn, &byte1)) {
dbgprintf(insn, "Couldn't read second byte of VEX");
return -1;
}
-
+
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vexSize = 3;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
@@ -410,67 +423,67 @@ static int readPrefixes(struct InternalInstruction* insn) {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
-
+
if (insn->vexSize == 3) {
insn->vexPrefix[0] = byte;
consumeByte(insn, &insn->vexPrefix[1]);
consumeByte(insn, &insn->vexPrefix[2]);
/* We simulate the REX prefix for simplicity's sake */
-
+
if (insn->mode == MODE_64BIT) {
- insn->rexPrefix = 0x40
+ insn->rexPrefix = 0x40
| (wFromVEX3of3(insn->vexPrefix[2]) << 3)
| (rFromVEX2of3(insn->vexPrefix[1]) << 2)
| (xFromVEX2of3(insn->vexPrefix[1]) << 1)
| (bFromVEX2of3(insn->vexPrefix[1]) << 0);
}
-
+
switch (ppFromVEX3of3(insn->vexPrefix[2]))
{
default:
break;
case VEX_PREFIX_66:
- hasOpSize = TRUE;
+ hasOpSize = TRUE;
break;
}
-
+
dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
}
}
else if (byte == 0xc5) {
uint8_t byte1;
-
+
if (lookAtByte(insn, &byte1)) {
dbgprintf(insn, "Couldn't read second byte of VEX");
return -1;
}
-
+
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
insn->vexSize = 2;
}
else {
unconsumeByte(insn);
}
-
+
if (insn->vexSize == 2) {
insn->vexPrefix[0] = byte;
consumeByte(insn, &insn->vexPrefix[1]);
-
+
if (insn->mode == MODE_64BIT) {
- insn->rexPrefix = 0x40
+ insn->rexPrefix = 0x40
| (rFromVEX2of2(insn->vexPrefix[1]) << 2);
}
-
+
switch (ppFromVEX2of2(insn->vexPrefix[1]))
{
default:
break;
case VEX_PREFIX_66:
- hasOpSize = TRUE;
+ hasOpSize = TRUE;
break;
}
-
+
dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
}
}
@@ -478,17 +491,17 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (insn->mode == MODE_64BIT) {
if ((byte & 0xf0) == 0x40) {
uint8_t opcodeByte;
-
+
if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
dbgprintf(insn, "Redundant REX prefix");
return -1;
}
-
+
insn->rexPrefix = byte;
insn->necessaryPrefixLocation = insn->readerCursor - 2;
-
+
dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
- } else {
+ } else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
@@ -526,7 +539,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->immediateSize = (hasOpSize ? 2 : 4);
}
}
-
+
return 0;
}
@@ -537,22 +550,22 @@ static int readPrefixes(struct InternalInstruction* insn) {
* @param insn - The instruction whose opcode is to be read.
* @return - 0 if the opcode could be read successfully; nonzero otherwise.
*/
-static int readOpcode(struct InternalInstruction* insn) {
+static int readOpcode(struct InternalInstruction* insn) {
/* Determine the length of the primary opcode */
-
+
uint8_t current;
-
+
dbgprintf(insn, "readOpcode()");
-
+
insn->opcodeType = ONEBYTE;
-
+
if (insn->vexSize == 3)
{
switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
{
default:
dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
- return -1;
+ return -1;
case 0:
break;
case VEX_LOB_0F:
@@ -564,7 +577,7 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->threeByteEscape = 0x38;
insn->opcodeType = THREEBYTE_38;
return consumeByte(insn, &insn->opcode);
- case VEX_LOB_0F3A:
+ case VEX_LOB_0F3A:
insn->twoByteEscape = 0x0f;
insn->threeByteEscape = 0x3a;
insn->opcodeType = THREEBYTE_3A;
@@ -577,68 +590,68 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
}
-
+
if (consumeByte(insn, &current))
return -1;
-
+
if (current == 0x0f) {
dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
-
+
insn->twoByteEscape = current;
-
+
if (consumeByte(insn, &current))
return -1;
-
+
if (current == 0x38) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-
+
insn->threeByteEscape = current;
-
+
if (consumeByte(insn, &current))
return -1;
-
+
insn->opcodeType = THREEBYTE_38;
} else if (current == 0x3a) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-
+
insn->threeByteEscape = current;
-
+
if (consumeByte(insn, &current))
return -1;
-
+
insn->opcodeType = THREEBYTE_3A;
} else if (current == 0xa6) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-
+
insn->threeByteEscape = current;
-
+
if (consumeByte(insn, &current))
return -1;
-
+
insn->opcodeType = THREEBYTE_A6;
} else if (current == 0xa7) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-
+
insn->threeByteEscape = current;
-
+
if (consumeByte(insn, &current))
return -1;
-
+
insn->opcodeType = THREEBYTE_A7;
} else {
dbgprintf(insn, "Didn't find a three-byte escape prefix");
-
+
insn->opcodeType = TWOBYTE;
}
}
-
+
/*
* At this point we have consumed the full opcode.
* Anything we consume from here on must be unconsumed.
*/
-
+
insn->opcode = current;
-
+
return 0;
}
@@ -660,19 +673,19 @@ static int getIDWithAttrMask(uint16_t* instructionID,
struct InternalInstruction* insn,
uint8_t attrMask) {
BOOL hasModRMExtension;
-
+
uint8_t instructionClass;
instructionClass = contextForAttrs(attrMask);
-
+
hasModRMExtension = modRMRequired(insn->opcodeType,
instructionClass,
insn->opcode);
-
+
if (hasModRMExtension) {
if (readModRM(insn))
return -1;
-
+
*instructionID = decode(insn->opcodeType,
instructionClass,
insn->opcode,
@@ -683,7 +696,7 @@ static int getIDWithAttrMask(uint16_t* instructionID,
insn->opcode,
0);
}
-
+
return 0;
}
@@ -696,7 +709,7 @@ static int getIDWithAttrMask(uint16_t* instructionID,
*/
static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
off_t i;
-
+
for (i = 0;; i++) {
if (orig[i] == '\0' && equiv[i] == '\0')
return TRUE;
@@ -715,8 +728,8 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
}
/*
- * getID - Determines the ID of an instruction, consuming the ModR/M byte as
- * appropriate for extended and escape opcodes. Determines the attributes and
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as
+ * appropriate for extended and escape opcodes. Determines the attributes and
* context for the instruction before doing so.
*
* @param insn - The instruction whose ID is to be determined.
@@ -726,21 +739,21 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
static int getID(struct InternalInstruction* insn, const void *miiArg) {
uint8_t attrMask;
uint16_t instructionID;
-
+
dbgprintf(insn, "getID()");
-
+
attrMask = ATTR_NONE;
if (insn->mode == MODE_64BIT)
attrMask |= ATTR_64BIT;
-
+
if (insn->vexSize) {
attrMask |= ATTR_VEX;
if (insn->vexSize == 3) {
switch (ppFromVEX3of3(insn->vexPrefix[2])) {
case VEX_PREFIX_66:
- attrMask |= ATTR_OPSIZE;
+ attrMask |= ATTR_OPSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
@@ -749,14 +762,14 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
attrMask |= ATTR_XD;
break;
}
-
+
if (lFromVEX3of3(insn->vexPrefix[2]))
attrMask |= ATTR_VEXL;
}
else if (insn->vexSize == 2) {
switch (ppFromVEX2of2(insn->vexPrefix[1])) {
case VEX_PREFIX_66:
- attrMask |= ATTR_OPSIZE;
+ attrMask |= ATTR_OPSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
@@ -765,7 +778,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
attrMask |= ATTR_XD;
break;
}
-
+
if (lFromVEX2of2(insn->vexPrefix[1]))
attrMask |= ATTR_VEXL;
}
@@ -836,26 +849,26 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
* conservative, but in the specific case where OpSize is present but not
* in the right place we check if there's a 16-bit operation.
*/
-
+
const struct InstructionSpecifier *spec;
uint16_t instructionIDWithOpsize;
const char *specName, *specWithOpSizeName;
-
+
spec = specifierForUID(instructionID);
-
+
if (getIDWithAttrMask(&instructionIDWithOpsize,
insn,
attrMask | ATTR_OPSIZE)) {
- /*
+ /*
* ModRM required with OpSize but not present; give up and return version
* without OpSize set
*/
-
+
insn->instructionID = instructionID;
insn->spec = spec;
return 0;
}
-
+
specName = x86DisassemblerGetInstrName(instructionID, miiArg);
specWithOpSizeName =
x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
@@ -882,10 +895,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
const struct InstructionSpecifier *specWithNewOpcode;
spec = specifierForUID(instructionID);
-
+
/* Borrow opcode from one of the other XCHGar opcodes */
insn->opcode = 0x91;
-
+
if (getIDWithAttrMask(&instructionIDWithNewOpcode,
insn,
attrMask)) {
@@ -906,10 +919,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
return 0;
}
-
+
insn->instructionID = instructionID;
insn->spec = specifierForUID(insn->instructionID);
-
+
return 0;
}
@@ -924,14 +937,14 @@ static int readSIB(struct InternalInstruction* insn) {
SIBIndex sibIndexBase = 0;
SIBBase sibBaseBase = 0;
uint8_t index, base;
-
+
dbgprintf(insn, "readSIB()");
-
+
if (insn->consumedSIB)
return 0;
-
+
insn->consumedSIB = TRUE;
-
+
switch (insn->addressSize) {
case 2:
dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
@@ -949,9 +962,9 @@ static int readSIB(struct InternalInstruction* insn) {
if (consumeByte(insn, &insn->sib))
return -1;
-
+
index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
-
+
switch (index) {
case 0x4:
insn->sibIndex = SIB_INDEX_NONE;
@@ -963,7 +976,7 @@ static int readSIB(struct InternalInstruction* insn) {
insn->sibIndex = SIB_INDEX_NONE;
break;
}
-
+
switch (scaleFromSIB(insn->sib)) {
case 0:
insn->sibScale = 1;
@@ -978,9 +991,9 @@ static int readSIB(struct InternalInstruction* insn) {
insn->sibScale = 8;
break;
}
-
+
base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
-
+
switch (base) {
case 0x5:
switch (modFromModRM(insn->modRM)) {
@@ -990,12 +1003,12 @@ static int readSIB(struct InternalInstruction* insn) {
break;
case 0x1:
insn->eaDisplacement = EA_DISP_8;
- insn->sibBase = (insn->addressSize == 4 ?
+ insn->sibBase = (insn->addressSize == 4 ?
SIB_BASE_EBP : SIB_BASE_RBP);
break;
case 0x2:
insn->eaDisplacement = EA_DISP_32;
- insn->sibBase = (insn->addressSize == 4 ?
+ insn->sibBase = (insn->addressSize == 4 ?
SIB_BASE_EBP : SIB_BASE_RBP);
break;
case 0x3:
@@ -1007,7 +1020,7 @@ static int readSIB(struct InternalInstruction* insn) {
insn->sibBase = (SIBBase)(sibBaseBase + base);
break;
}
-
+
return 0;
}
@@ -1015,22 +1028,22 @@ static int readSIB(struct InternalInstruction* insn) {
* readDisplacement - Consumes the displacement of an instruction.
*
* @param insn - The instruction whose displacement is to be read.
- * @return - 0 if the displacement byte was successfully read; nonzero
+ * @return - 0 if the displacement byte was successfully read; nonzero
* otherwise.
*/
-static int readDisplacement(struct InternalInstruction* insn) {
+static int readDisplacement(struct InternalInstruction* insn) {
int8_t d8;
int16_t d16;
int32_t d32;
-
+
dbgprintf(insn, "readDisplacement()");
-
+
if (insn->consumedDisplacement)
return 0;
-
+
insn->consumedDisplacement = TRUE;
insn->displacementOffset = insn->readerCursor - insn->startLocation;
-
+
switch (insn->eaDisplacement) {
case EA_DISP_NONE:
insn->consumedDisplacement = FALSE;
@@ -1051,7 +1064,7 @@ static int readDisplacement(struct InternalInstruction* insn) {
insn->displacement = d32;
break;
}
-
+
insn->consumedDisplacement = TRUE;
return 0;
}
@@ -1063,22 +1076,22 @@ static int readDisplacement(struct InternalInstruction* insn) {
* @param insn - The instruction whose addressing information is to be read.
* @return - 0 if the information was successfully read; nonzero otherwise.
*/
-static int readModRM(struct InternalInstruction* insn) {
+static int readModRM(struct InternalInstruction* insn) {
uint8_t mod, rm, reg;
-
+
dbgprintf(insn, "readModRM()");
-
+
if (insn->consumedModRM)
return 0;
-
+
if (consumeByte(insn, &insn->modRM))
return -1;
insn->consumedModRM = TRUE;
-
+
mod = modFromModRM(insn->modRM);
rm = rmFromModRM(insn->modRM);
reg = regFromModRM(insn->modRM);
-
+
/*
* This goes by insn->registerSize to pick the correct register, which messes
* up if we're using (say) XMM or 8-bit register operands. That gets fixed in
@@ -1098,16 +1111,16 @@ static int readModRM(struct InternalInstruction* insn) {
insn->eaRegBase = EA_REG_RAX;
break;
}
-
+
reg |= rFromREX(insn->rexPrefix) << 3;
rm |= bFromREX(insn->rexPrefix) << 3;
-
+
insn->reg = (Reg)(insn->regBase + reg);
-
+
switch (insn->addressSize) {
case 2:
insn->eaBaseBase = EA_BASE_BX_SI;
-
+
switch (mod) {
case 0x0:
if (rm == 0x6) {
@@ -1142,14 +1155,14 @@ static int readModRM(struct InternalInstruction* insn) {
case 4:
case 8:
insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
-
+
switch (mod) {
case 0x0:
insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
switch (rm) {
case 0x4:
case 0xc: /* in case REXW.b is set */
- insn->eaBase = (insn->addressSize == 4 ?
+ insn->eaBase = (insn->addressSize == 4 ?
EA_BASE_sib : EA_BASE_sib64);
readSIB(insn);
if (readDisplacement(insn))
@@ -1191,7 +1204,7 @@ static int readModRM(struct InternalInstruction* insn) {
}
break;
} /* switch (insn->addressSize) */
-
+
return 0;
}
@@ -1274,12 +1287,12 @@ GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
* @return - 0 if fixup was successful; -1 if the register returned was
* invalid for its class.
*/
-static int fixupReg(struct InternalInstruction *insn,
+static int fixupReg(struct InternalInstruction *insn,
const struct OperandSpecifier *op) {
uint8_t valid;
-
+
dbgprintf(insn, "fixupReg()");
-
+
switch ((OperandEncoding)op->encoding) {
default:
debug("Expected a REG or R/M encoding in fixupReg");
@@ -1311,12 +1324,12 @@ static int fixupReg(struct InternalInstruction *insn,
}
break;
}
-
+
return 0;
}
/*
- * readOpcodeModifier - Reads an operand from the opcode field of an
+ * readOpcodeModifier - Reads an operand from the opcode field of an
* instruction. Handles AddRegFrm instructions.
*
* @param insn - The instruction whose opcode field is to be read.
@@ -1326,12 +1339,12 @@ static int fixupReg(struct InternalInstruction *insn,
*/
static int readOpcodeModifier(struct InternalInstruction* insn) {
dbgprintf(insn, "readOpcodeModifier()");
-
+
if (insn->consumedOpcodeModifier)
return 0;
-
+
insn->consumedOpcodeModifier = TRUE;
-
+
switch (insn->spec->modifierType) {
default:
debug("Unknown modifier type.");
@@ -1345,11 +1358,11 @@ static int readOpcodeModifier(struct InternalInstruction* insn) {
case MODIFIER_MODRM:
insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
return 0;
- }
+ }
}
/*
- * readOpcodeRegister - Reads an operand from the opcode field of an
+ * readOpcodeRegister - Reads an operand from the opcode field of an
* instruction and interprets it appropriately given the operand width.
* Handles AddRegFrm instructions.
*
@@ -1364,39 +1377,39 @@ static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
if (readOpcodeModifier(insn))
return -1;
-
+
if (size == 0)
size = insn->registerSize;
-
+
switch (size) {
case 1:
- insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
+ insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
- if (insn->rexPrefix &&
+ if (insn->rexPrefix &&
insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
insn->opcodeRegister < MODRM_REG_AL + 0x8) {
insn->opcodeRegister = (Reg)(MODRM_REG_SPL
+ (insn->opcodeRegister - MODRM_REG_AL - 4));
}
-
+
break;
case 2:
insn->opcodeRegister = (Reg)(MODRM_REG_AX
- + ((bFromREX(insn->rexPrefix) << 3)
+ + ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
break;
case 4:
insn->opcodeRegister = (Reg)(MODRM_REG_EAX
- + ((bFromREX(insn->rexPrefix) << 3)
+ + ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
break;
case 8:
- insn->opcodeRegister = (Reg)(MODRM_REG_RAX
- + ((bFromREX(insn->rexPrefix) << 3)
+ insn->opcodeRegister = (Reg)(MODRM_REG_RAX
+ + ((bFromREX(insn->rexPrefix) << 3)
| insn->opcodeModifier));
break;
}
-
+
return 0;
}
@@ -1414,20 +1427,20 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
uint16_t imm16;
uint32_t imm32;
uint64_t imm64;
-
+
dbgprintf(insn, "readImmediate()");
-
+
if (insn->numImmediatesConsumed == 2) {
debug("Already consumed two immediates");
return -1;
}
-
+
if (size == 0)
size = insn->immediateSize;
else
insn->immediateSize = size;
insn->immediateOffset = insn->readerCursor - insn->startLocation;
-
+
switch (size) {
case 1:
if (consumeByte(insn, &imm8))
@@ -1450,9 +1463,9 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
insn->immediates[insn->numImmediatesConsumed] = imm64;
break;
}
-
+
insn->numImmediatesConsumed++;
-
+
return 0;
}
@@ -1465,7 +1478,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
*/
static int readVVVV(struct InternalInstruction* insn) {
dbgprintf(insn, "readVVVV()");
-
+
if (insn->vexSize == 3)
insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
else if (insn->vexSize == 2)
@@ -1490,14 +1503,14 @@ static int readOperands(struct InternalInstruction* insn) {
int index;
int hasVVVV, needVVVV;
int sawRegImm = 0;
-
+
dbgprintf(insn, "readOperands()");
/* If non-zero vvvv specified, need to make sure one of the operands
uses it. */
hasVVVV = !readVVVV(insn);
needVVVV = hasVVVV && (insn->vvvv != 0);
-
+
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
switch (x86OperandSets[insn->spec->operands][index].encoding) {
case ENCODING_NONE:
@@ -1599,7 +1612,7 @@ static int readOperands(struct InternalInstruction* insn) {
/* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
if (needVVVV) return -1;
-
+
return 0;
}
@@ -1607,7 +1620,7 @@ static int readOperands(struct InternalInstruction* insn) {
* decodeInstruction - Reads and interprets a full instruction provided by the
* user.
*
- * @param insn - A pointer to the instruction to be populated. Must be
+ * @param insn - A pointer to the instruction to be populated. Must be
* pre-allocated.
* @param reader - The function to be used to read the instruction's bytes.
* @param readerArg - A generic argument to be passed to the reader to store
@@ -1632,7 +1645,7 @@ int decodeInstruction(struct InternalInstruction* insn,
uint64_t startLoc,
DisassemblerMode mode) {
memset(insn, 0, sizeof(struct InternalInstruction));
-
+
insn->reader = reader;
insn->readerArg = readerArg;
insn->dlog = logger;
@@ -1641,7 +1654,7 @@ int decodeInstruction(struct InternalInstruction* insn,
insn->readerCursor = startLoc;
insn->mode = mode;
insn->numImmediatesConsumed = 0;
-
+
if (readPrefixes(insn) ||
readOpcode(insn) ||
getID(insn, miiArg) ||
@@ -1650,14 +1663,14 @@ int decodeInstruction(struct InternalInstruction* insn,
return -1;
insn->operands = &x86OperandSets[insn->spec->operands][0];
-
+
insn->length = insn->readerCursor - insn->startLocation;
-
+
dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
startLoc, insn->readerCursor, insn->length);
-
+
if (insn->length > 15)
dbgprintf(insn, "Instruction exceeds 15-byte limit");
-
+
return 0;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 9e68388cf2..3669560070 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -276,9 +276,9 @@ namespace X86II {
MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
- MRM_D4 = 47, MRM_D5 = 48, MRM_D8 = 49, MRM_D9 = 50,
- MRM_DA = 51, MRM_DB = 52, MRM_DC = 53, MRM_DD = 54,
- MRM_DE = 55, MRM_DF = 56,
+ MRM_D4 = 47, MRM_D5 = 48, MRM_D6 = 49, MRM_D8 = 50,
+ MRM_D9 = 51, MRM_DA = 52, MRM_DB = 53, MRM_DC = 54,
+ MRM_DD = 55, MRM_DE = 56, MRM_DF = 57,
/// RawFrmImm8 - This is used for the ENTER instruction, which has two
/// immediates, the first of which is a 16-bit immediate (specified by
@@ -574,16 +574,13 @@ namespace X86II {
++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV).
return FirstMemOp;
}
- case X86II::MRM_C1: case X86II::MRM_C2:
- case X86II::MRM_C3: case X86II::MRM_C4:
- case X86II::MRM_C8: case X86II::MRM_C9:
- case X86II::MRM_E8: case X86II::MRM_F0:
- case X86II::MRM_F8: case X86II::MRM_F9:
- case X86II::MRM_D0: case X86II::MRM_D1:
- case X86II::MRM_D4: case X86II::MRM_D5:
- case X86II::MRM_D8: case X86II::MRM_D9:
- case X86II::MRM_DA: case X86II::MRM_DB:
- case X86II::MRM_DC: case X86II::MRM_DD:
+ case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
+ case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
+ case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8:
+ case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1:
+ case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
+ case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
+ case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
case X86II::MRM_DE: case X86II::MRM_DF:
return -1;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 122204ae75..776cee1e35 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS) const {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// dst(ModR/M), src1(ModR/M)
// dst(ModR/M), src1(ModR/M), imm8
//
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
CurOp++;
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
CurOp++;
@@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// MRMDestReg instructions forms:
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
break;
case X86II::MRM0r: case X86II::MRM1r:
@@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMDestReg:
EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + 1;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ ++SrcRegNum;
+
EmitRegModRMByte(MI.getOperand(CurOp),
- GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
- CurOp += 2;
+ GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
+ CurOp = SrcRegNum + 1;
break;
case X86II::MRMDestMem:
@@ -1117,16 +1136,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
TSFlags, CurByte, OS, Fixups);
CurOp += X86::AddrNumOperands;
break;
- case X86II::MRM_C1: case X86II::MRM_C2:
- case X86II::MRM_C3: case X86II::MRM_C4:
- case X86II::MRM_C8: case X86II::MRM_C9:
- case X86II::MRM_D0: case X86II::MRM_D1:
- case X86II::MRM_D4: case X86II::MRM_D5:
- case X86II::MRM_D8: case X86II::MRM_D9:
- case X86II::MRM_DA: case X86II::MRM_DB:
- case X86II::MRM_DC: case X86II::MRM_DD:
- case X86II::MRM_DE: case X86II::MRM_DF:
- case X86II::MRM_E8: case X86II::MRM_F0:
+ case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
+ case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
+ case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
+ case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
+ case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
+ case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
+ case X86II::MRM_DF: case X86II::MRM_E8: case X86II::MRM_F0:
case X86II::MRM_F8: case X86II::MRM_F9:
EmitByte(BaseOpcode, CurByte, OS);
@@ -1143,6 +1159,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_D1: MRM = 0xD1; break;
case X86II::MRM_D4: MRM = 0xD4; break;
case X86II::MRM_D5: MRM = 0xD5; break;
+ case X86II::MRM_D6: MRM = 0xD6; break;
case X86II::MRM_D8: MRM = 0xD8; break;
case X86II::MRM_D9: MRM = 0xD9; break;
case X86II::MRM_DA: MRM = 0xDA; break;
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 0216252c19..1dcc344e7f 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -120,8 +120,14 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
"Support RTM instructions">;
+def FeatureHLE : SubtargetFeature<"hle", "HasHLE", "true",
+ "Support HLE">;
def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
"Support ADX instructions">;
+def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
+ "Support PRFCHW instructions">;
+def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
+ "Support RDSEED instruction">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
@@ -130,6 +136,9 @@ def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
+def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
+ "CallRegIndirect", "true",
+ "Call register indirect">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -143,9 +152,6 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
-class AtomProc<string Name, list<SubtargetFeature> Features>
- : ProcessorModel<Name, AtomModel, Features>;
-
def : Proc<"generic", []>;
def : Proc<"i386", []>;
def : Proc<"i486", []>;
@@ -162,46 +168,61 @@ def : Proc<"pentium4", [FeatureSSE2]>;
def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
FeatureFastUAMem]>;
-def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
-def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
-def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
-def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
- FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
- FeatureSlowDivide, FeaturePadShortFunctions]>;
+// Intel Core Duo.
+def : ProcessorModel<"yonah", SandyBridgeModel,
+ [FeatureSSE3, FeatureSlowBTMem]>;
+
+// NetBurst.
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Intel Core 2 Solo/Duo.
+def : ProcessorModel<"core2", SandyBridgeModel,
+ [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : ProcessorModel<"penryn", SandyBridgeModel,
+ [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Atom.
+def : ProcessorModel<"atom", AtomModel,
+ [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
+ FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
+ FeatureSlowDivide,
+ FeatureCallRegIndirect,
+ FeaturePadShortFunctions]>;
+
// "Arrandale" along with corei3 and corei5
-def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureFastUAMem,
- FeaturePOPCNT, FeatureAES]>;
-def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureFastUAMem,
- FeaturePOPCNT]>;
+def : ProcessorModel<"corei7", SandyBridgeModel,
+ [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+ FeatureFastUAMem, FeaturePOPCNT, FeatureAES]>;
+
+def : ProcessorModel<"nehalem", SandyBridgeModel,
+ [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+ FeatureFastUAMem, FeaturePOPCNT]>;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureFastUAMem,
- FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
+def : ProcessorModel<"westmere", SandyBridgeModel,
+ [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+ FeatureFastUAMem, FeaturePOPCNT, FeatureAES,
+ FeaturePCLMUL]>;
// Sandy Bridge
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
-def : Proc<"corei7-avx", [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
- FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
+def : ProcessorModel<"corei7-avx", SandyBridgeModel,
+ [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
// Ivy Bridge
-def : Proc<"core-avx-i", [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
- FeaturePOPCNT, FeatureAES, FeaturePCLMUL,
- FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>;
+def : ProcessorModel<"core-avx-i", SandyBridgeModel,
+ [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+ FeatureF16C, FeatureFSGSBase]>;
// Haswell
-def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
- FeaturePOPCNT, FeatureAES, FeaturePCLMUL,
- FeatureRDRAND, FeatureF16C, FeatureFSGSBase,
- FeatureMOVBE, FeatureLZCNT, FeatureBMI,
- FeatureBMI2, FeatureFMA,
- FeatureRTM]>;
+def : ProcessorModel<"core-avx2", HaswellModel,
+ [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
+ FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+ FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
+ FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
+ FeatureHLE]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index ac5daec2b2..6b228b0b03 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -201,7 +201,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLVP_PIC_BASE:
O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
break;
- case X86II::MO_SECREL: O << "@SECREL"; break;
+ case X86II::MO_SECREL: O << "@SECREL32"; break;
}
}
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ece38aa346..2518e02e2a 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
const MCInstrDesc *Desc) const {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
CurOp++;
@@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
// MRMDestReg instructions forms:
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
break;
case X86II::MRM0r: case X86II::MRM1r:
@@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRMDestReg: {
MCE.emitByte(BaseOpcode);
+
+ unsigned SrcRegNum = CurOp+1;
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- getX86RegNum(MI.getOperand(CurOp+1).getReg()));
- CurOp += 2;
+ getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
+ CurOp = SrcRegNum + 1;
break;
}
case X86II::MRMDestMem: {
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index b5c3270065..c5da0b9b16 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -816,14 +816,16 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// The x86-64 ABI for returning structs by value requires that we copy
// the sret argument into %rax for the return. We saved the argument into
// a virtual register in the entry block, so now we copy the value out
- // and into %rax.
- if (Subtarget->is64Bit() && F.hasStructRetAttr()) {
+ // and into %rax. We also do the same with %eax for Win32.
+ if (F.hasStructRetAttr() &&
+ (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
unsigned Reg = X86MFInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
+ unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- X86::RAX).addReg(Reg);
- RetRegs.push_back(X86::RAX);
+ RetReg).addReg(Reg);
+ RetRegs.push_back(RetReg);
}
// Now emit the RET.
@@ -1526,6 +1528,9 @@ bool X86FastISel::FastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (Subtarget->isTargetWindows())
+ return false;
+
const Function *F = FuncInfo.Fn;
if (F->isVarArg())
return false;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 00fbe6924c..6041669f81 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -444,7 +444,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
if (OptLevel != CodeGenOpt::None &&
- (N->getOpcode() == X86ISD::CALL ||
+ // Only does this when target favors doesn't favor register indirect
+ // call.
+ ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
(N->getOpcode() == X86ISD::TC_RETURN &&
// Only does this if load can be folded into TC_RETURN.
(Subtarget->is64Bit() ||
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 960870dc60..69341869aa 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -470,7 +470,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
}
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
- // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
// other SjLj exception interfaces are implemented and please don't build
@@ -1053,23 +1053,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
setOperationAction(ISD::SRA, MVT::v16i8, Custom);
- if (Subtarget->hasInt256()) {
- setOperationAction(ISD::SRL, MVT::v2i64, Legal);
- setOperationAction(ISD::SRL, MVT::v4i32, Legal);
-
- setOperationAction(ISD::SHL, MVT::v2i64, Legal);
- setOperationAction(ISD::SHL, MVT::v4i32, Legal);
+ // In the customized shift lowering, the legal cases in AVX2 will be
+ // recognized.
+ setOperationAction(ISD::SRL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Custom);
- setOperationAction(ISD::SRA, MVT::v4i32, Legal);
- } else {
- setOperationAction(ISD::SRL, MVT::v2i64, Custom);
- setOperationAction(ISD::SRL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
- setOperationAction(ISD::SHL, MVT::v2i64, Custom);
- setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v4i32, Custom);
- setOperationAction(ISD::SRA, MVT::v4i32, Custom);
- }
setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
}
@@ -1118,6 +1111,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
@@ -1186,14 +1180,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
- setOperationAction(ISD::SRL, MVT::v4i64, Legal);
- setOperationAction(ISD::SRL, MVT::v8i32, Legal);
-
- setOperationAction(ISD::SHL, MVT::v4i64, Legal);
- setOperationAction(ISD::SHL, MVT::v8i32, Legal);
-
- setOperationAction(ISD::SRA, MVT::v8i32, Legal);
-
setOperationAction(ISD::SDIV, MVT::v8i32, Custom);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
@@ -1210,15 +1196,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v8i32, Custom);
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
// Don't lower v32i8 because there is no 128-bit byte mul
+ }
- setOperationAction(ISD::SRL, MVT::v4i64, Custom);
- setOperationAction(ISD::SRL, MVT::v8i32, Custom);
+ // In the customized shift lowering, the legal cases in AVX2 will be
+ // recognized.
+ setOperationAction(ISD::SRL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i32, Custom);
- setOperationAction(ISD::SHL, MVT::v4i64, Custom);
- setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i32, Custom);
- setOperationAction(ISD::SRA, MVT::v8i32, Custom);
- }
+ setOperationAction(ISD::SRA, MVT::v8i32, Custom);
// Custom lower several nodes for 256-bit types.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
@@ -1356,7 +1344,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(4); // 2^4 bytes.
- BenefitFromCodePlacementOpt = true;
// Predictable cmov don't hurt on atom because it's in-order.
PredictableSelectIsExpensive = !Subtarget->isAtom();
@@ -1679,10 +1666,11 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// The x86-64 ABIs require that for returning structs by value we copy
// the sret argument into %rax/%eax (depending on ABI) for the return.
+ // Win32 requires us to put the sret argument to %eax as well.
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into %rax/%eax.
- if (Subtarget->is64Bit() &&
- DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
+ (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
@@ -1690,12 +1678,14 @@ X86TargetLowering::LowerReturn(SDValue Chain,
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
- unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX;
+ unsigned RetValReg
+ = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
+ X86::RAX : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
// RAX/EAX now acts like a return value.
- RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64));
+ RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
}
RetOps[0] = Chain; // Update chain.
@@ -2049,9 +2039,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// The x86-64 ABIs require that for returning structs by value we copy
// the sret argument into %rax/%eax (depending on ABI) for the return.
+ // Win32 requires us to put the sret argument to %eax as well.
// Save the argument into a virtual register so that we can access it
// from the return points.
- if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
+ if (MF.getFunction()->hasStructRetAttr() &&
+ (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
@@ -7834,7 +7826,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
Chain.getValue(1));
}
- if (Subtarget->isTargetWindows()) {
+ if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) {
// Just use the implicit TLS architecture
// Need to generate someting similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -7854,18 +7846,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = DAG.getEntryNode();
// Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
- // %gs:0x58 (64-bit).
+ // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
+ // use its literal value of 0x2C.
Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
? Type::getInt8PtrTy(*DAG.getContext(),
256)
: Type::getInt32PtrTy(*DAG.getContext(),
257));
- SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain,
- Subtarget->is64Bit()
- ? DAG.getIntPtrConstant(0x58)
- : DAG.getExternalSymbol("_tls_array",
- getPointerTy()),
+ SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) :
+ (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) :
+ DAG.getExternalSymbol("_tls_array", getPointerTy()));
+
+ SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
MachinePointerInfo(Ptr),
false, false, false, 0);
@@ -10921,16 +10914,23 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
- // RDRAND intrinsics.
+ // RDRAND/RDSEED intrinsics.
case Intrinsic::x86_rdrand_16:
case Intrinsic::x86_rdrand_32:
- case Intrinsic::x86_rdrand_64: {
+ case Intrinsic::x86_rdrand_64:
+ case Intrinsic::x86_rdseed_16:
+ case Intrinsic::x86_rdseed_32:
+ case Intrinsic::x86_rdseed_64: {
+ unsigned Opcode = (IntNo == Intrinsic::x86_rdseed_16 ||
+ IntNo == Intrinsic::x86_rdseed_32 ||
+ IntNo == Intrinsic::x86_rdseed_64) ? X86ISD::RDSEED :
+ X86ISD::RDRAND;
// Emit the node with the right value type.
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
- SDValue Result = DAG.getNode(X86ISD::RDRAND, dl, VTs, Op.getOperand(0));
+ SDValue Result = DAG.getNode(Opcode, dl, VTs, Op.getOperand(0));
- // If the value returned by RDRAND was valid (CF=1), return 1. Otherwise
- // return the value from Rand, which is always 0, casted to i32.
+ // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
+ // Otherwise return the value from Rand, which is always 0, casted to i32.
SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
DAG.getConstant(1, Op->getValueType(1)),
DAG.getConstant(X86::COND_B, MVT::i32),
@@ -10943,6 +10943,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
SDValue(Result.getNode(), 2));
}
+
+ // XTEST intrinsics.
+ case Intrinsic::x86_xtest: {
+ SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
+ SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0));
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_NE, MVT::i8),
+ InTrans);
+ SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
+ Ret, SDValue(InTrans.getNode(), 1));
+ }
}
}
@@ -11490,16 +11502,13 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
-
+static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
- if (!Subtarget->hasSSE2())
- return SDValue();
-
// Optimize shl/srl/sra with constant shift amount.
if (isSplatVector(Amt.getNode())) {
SDValue SclrAmt = Amt->getOperand(0);
@@ -11610,6 +11619,224 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+ if (!Subtarget->is64Bit() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Amt.getOpcode() == ISD::BITCAST &&
+ Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ Amt = Amt.getOperand(0);
+ unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ VT.getVectorNumElements();
+ unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
+ uint64_t ShiftAmt = 0;
+ for (unsigned i = 0; i != Ratio; ++i) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
+ if (C == 0)
+ return SDValue();
+ // 6 == Log2(64)
+ ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
+ }
+ // Check remaining shift amounts.
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ uint64_t ShAmt = 0;
+ for (unsigned j = 0; j != Ratio; ++j) {
+ ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
+ if (C == 0)
+ return SDValue();
+ // 6 == Log2(64)
+ ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
+ }
+ if (ShAmt != ShiftAmt)
+ return SDValue();
+ }
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ case ISD::SRL:
+ return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ case ISD::SRA:
+ return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget* Subtarget) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+
+ if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) ||
+ VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ (Subtarget->hasInt256() &&
+ ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
+ VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ SDValue BaseShAmt;
+ EVT EltVT = VT.getVectorElementType();
+
+ if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i, j;
+ for (i = 0; i != NumElts; ++i) {
+ if (Amt.getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ break;
+ }
+ for (j = i; j != NumElts; ++j) {
+ SDValue Arg = Amt.getOperand(j);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ if (Arg != Amt.getOperand(i))
+ break;
+ }
+ if (i != NumElts && j == NumElts)
+ BaseShAmt = Amt.getOperand(i);
+ } else {
+ if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ Amt = Amt.getOperand(0);
+ if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(Amt)->isSplat()) {
+ SDValue InVec = Amt.getOperand(0);
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = InVec.getValueType().getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = InVec.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned SplatIdx =
+ cast<ShuffleVectorSDNode>(Amt)->getSplatIndex();
+ if (C->getZExtValue() == SplatIdx)
+ BaseShAmt = InVec.getOperand(1);
+ }
+ }
+ if (BaseShAmt.getNode() == 0)
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt,
+ DAG.getIntPtrConstant(0));
+ }
+ }
+
+ if (BaseShAmt.getNode()) {
+ if (EltVT.bitsGT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt);
+ else if (EltVT.bitsLT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
+ }
+ case ISD::SRA:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
+ }
+ case ISD::SRL:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
+ }
+ }
+ }
+ }
+
+ // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+ if (!Subtarget->is64Bit() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Amt.getOpcode() == ISD::BITCAST &&
+ Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ Amt = Amt.getOperand(0);
+ unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ VT.getVectorNumElements();
+ std::vector<SDValue> Vals(Ratio);
+ for (unsigned i = 0; i != Ratio; ++i)
+ Vals[i] = Amt.getOperand(i);
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ for (unsigned j = 0; j != Ratio; ++j)
+ if (Vals[j] != Amt.getOperand(i + j))
+ return SDValue();
+ }
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1));
+ case ISD::SRL:
+ return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1));
+ case ISD::SRA:
+ return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+ SDValue V;
+
+ if (!Subtarget->hasSSE2())
+ return SDValue();
+
+ V = LowerScalarImmediateShift(Op, DAG, Subtarget);
+ if (V.getNode())
+ return V;
+
+ V = LowerScalarVariableShift(Op, DAG, Subtarget);
+ if (V.getNode())
+ return V;
+
+ // AVX2 has VPSLLV/VPSRAV/VPSRLV.
+ if (Subtarget->hasInt256()) {
+ if (Op.getOpcode() == ISD::SRL &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v4i64 || VT == MVT::v8i32))
+ return Op;
+ if (Op.getOpcode() == ISD::SHL &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v4i64 || VT == MVT::v8i32))
+ return Op;
+ if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32))
+ return Op;
+ }
+
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
@@ -11826,8 +12053,23 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
// fall through
case MVT::v4i32:
case MVT::v8i16: {
- SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
- Op.getOperand(0), ShAmt, DAG);
+ // (sext (vzext x)) -> (vsext x)
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op00 = Op0.getOperand(0);
+ SDValue Tmp1;
+ // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+ Tmp1 = LowerVectorIntExtend(Op00, DAG);
+ if (Tmp1.getNode()) {
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+ "This optimization is invalid without a VZEXT.");
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ }
+
+ // If the above didn't work, then just use Shift-Left + Shift-Right.
+ Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
}
}
@@ -12262,7 +12504,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::UINT_TO_FP: {
- if (N->getOperand(0).getValueType() != MVT::v2i32 &&
+ assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ if (N->getOperand(0).getValueType() != MVT::v2i32 ||
N->getValueType(0) != MVT::v2f32)
return;
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
@@ -12545,6 +12788,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
+ case X86ISD::RDSEED: return "X86ISD::RDSEED";
case X86ISD::FMADD: return "X86ISD::FMADD";
case X86ISD::FMSUB: return "X86ISD::FMSUB";
case X86ISD::FNMADD: return "X86ISD::FNMADD";
@@ -12553,6 +12797,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
+ case X86ISD::XTEST: return "X86ISD::XTEST";
}
}
@@ -15584,8 +15829,9 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
// Quit if the constant is neither 0 or 1.
return SDValue();
- // Skip 'zext' node.
- if (SetCC.getOpcode() == ISD::ZERO_EXTEND)
+ // Skip 'zext' or 'trunc' node.
+ if (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
+ SetCC.getOpcode() == ISD::TRUNCATE)
SetCC = SetCC.getOperand(0);
switch (SetCC.getOpcode()) {
@@ -15604,9 +15850,15 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
return SDValue();
// Quit if false value is not a constant.
if (!FVal) {
- // A special case for rdrand, where 0 is set if false cond is found.
SDValue Op = SetCC.getOperand(0);
- if (Op.getOpcode() != X86ISD::RDRAND)
+ // Skip 'zext' or 'trunc' node.
+ if (Op.getOpcode() == ISD::ZERO_EXTEND ||
+ Op.getOpcode() == ISD::TRUNCATE)
+ Op = Op.getOperand(0);
+ // A special case for rdrand/rdseed, where 0 is set if false cond is
+ // found.
+ if ((Op.getOpcode() != X86ISD::RDRAND &&
+ Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
return SDValue();
}
// Quit if false value is not the constant 0 or 1.
@@ -15918,124 +16170,12 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
- EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SHL) {
SDValue V = PerformSHLCombine(N, DAG);
if (V.getNode()) return V;
}
- // On X86 with SSE2 support, we can transform this to a vector shift if
- // all elements are shifted by the same amount. We can't do this in legalize
- // because the a constant vector is typically transformed to a constant pool
- // so we have no knowledge of the shift amount.
- if (!Subtarget->hasSSE2())
- return SDValue();
-
- if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
- (!Subtarget->hasInt256() ||
- (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
- return SDValue();
-
- SDValue ShAmtOp = N->getOperand(1);
- EVT EltVT = VT.getVectorElementType();
- DebugLoc DL = N->getDebugLoc();
- SDValue BaseShAmt = SDValue();
- if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElts = VT.getVectorNumElements();
- unsigned i = 0;
- for (; i != NumElts; ++i) {
- SDValue Arg = ShAmtOp.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- BaseShAmt = Arg;
- break;
- }
- // Handle the case where the build_vector is all undef
- // FIXME: Should DAG allow this?
- if (i == NumElts)
- return SDValue();
-
- for (; i != NumElts; ++i) {
- SDValue Arg = ShAmtOp.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- if (Arg != BaseShAmt) {
- return SDValue();
- }
- }
- } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
- cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
- SDValue InVec = ShAmtOp.getOperand(0);
- if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElts = InVec.getValueType().getVectorNumElements();
- unsigned i = 0;
- for (; i != NumElts; ++i) {
- SDValue Arg = InVec.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- BaseShAmt = Arg;
- break;
- }
- } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
- unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
- if (C->getZExtValue() == SplatIdx)
- BaseShAmt = InVec.getOperand(1);
- }
- }
- if (BaseShAmt.getNode() == 0) {
- // Don't create instructions with illegal types after legalize
- // types has run.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) &&
- !DCI.isBeforeLegalize())
- return SDValue();
-
- BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
- DAG.getIntPtrConstant(0));
- }
- } else
- return SDValue();
-
- // The shift amount is an i32.
- if (EltVT.bitsGT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
- else if (EltVT.bitsLT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
-
- // The shift amount is identical so we can do a vector shift.
- SDValue ValOp = N->getOperand(0);
- switch (N->getOpcode()) {
- default:
- llvm_unreachable("Unknown shift opcode!");
- case ISD::SHL:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v2i64:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v4i64:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- case ISD::SRA:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- case ISD::SRL:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v2i64:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v4i64:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- }
+ return SDValue();
}
// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
@@ -16346,13 +16486,19 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
// Validate that the Mask operand is a vector sra node.
// FIXME: what to do for bytes, since there is a psignb/pblendvb, but
// there is no psrai.b
- if (Mask.getOpcode() != X86ISD::VSRAI)
- return SDValue();
-
- // Check that the SRA is all signbits.
- SDValue SraC = Mask.getOperand(1);
- unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ unsigned SraAmt = ~0;
+ if (Mask.getOpcode() == ISD::SRA) {
+ SDValue Amt = Mask.getOperand(1);
+ if (isSplatVector(Amt.getNode())) {
+ SDValue SclrAmt = Amt->getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt))
+ SraAmt = C->getZExtValue();
+ }
+ } else if (Mask.getOpcode() == X86ISD::VSRAI) {
+ SDValue SraC = Mask.getOperand(1);
+ SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ }
if ((SraAmt + 1) != EltBits)
return SDValue();
@@ -16526,11 +16672,10 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned RegSz = RegVT.getSizeInBits();
+ // On Sandybridge unaligned 256bit loads are inefficient.
ISD::LoadExtType Ext = Ld->getExtensionType();
unsigned Alignment = Ld->getAlignment();
- bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8;
-
- // On Sandybridge unaligned 256bit loads are inefficient.
+ bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
!DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
unsigned NumElems = RegVT.getVectorNumElements();
@@ -16550,7 +16695,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
- std::max(Alignment/2U, 1U));
+ std::min(16U, Alignment));
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Load1.getValue(1),
Load2.getValue(1));
@@ -16721,13 +16866,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
DebugLoc dl = St->getDebugLoc();
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned Alignment = St->getAlignment();
- bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8;
// If we are saving a concatenation of two XMM registers, perform two stores.
// On Sandy Bridge, 256-bit memory operations are executed by two
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
// memory operation.
+ unsigned Alignment = St->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
if (VT.is256BitVector() && !Subtarget->hasInt256() &&
StVT == VT && !IsAligned) {
unsigned NumElems = VT.getVectorNumElements();
@@ -16747,7 +16892,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(),
- std::max(Alignment/2U, 1U));
+ std::min(16U, Alignment));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index da1dad0f40..5725f7aea5 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -356,10 +356,17 @@ namespace llvm {
// RDRAND - Get a random integer and indicate whether it is valid in CF.
RDRAND,
+ // RDSEED - Get a NIST SP800-90B & C compliant random integer and
+ // indicate whether it is valid in CF.
+ RDSEED,
+
// PCMP*STRI
PCMPISTRI,
PCMPESTRI,
+ // XTEST - Test if in transactional execution.
+ XTEST,
+
// ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
// ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
// Atomic 64-bit binary operations.
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index bb362f5c7b..ba1aede3c1 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -84,13 +84,16 @@ defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
-def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
+ [(int_x86_mmx_femms)]>;
-def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
- "prefetch\t$addr", []>;
+def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
+ "prefetch\t$addr",
+ [(prefetch addr:$addr, (i32 0), imm, (i32 1))]>;
-def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
- "prefetchw\t$addr", []>;
+def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
+ [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))]>, TB,
+ Requires<[HasPrefetchW]>;
// "3DNowA" instructions
defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index d86a4065a7..225e9720da 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -14,7 +14,7 @@
//===----------------------------------------------------------------------===//
// LEA - Load Effective Address
-
+let SchedRW = [WriteLEA] in {
let neverHasSideEffects = 1 in
def LEA16r : I<0x8D, MRMSrcMem,
(outs GR16:$dst), (ins i32mem:$src),
@@ -36,41 +36,52 @@ let isReMaterializable = 1 in
def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
"lea{q}\t{$src|$dst}, {$dst|$src}",
[(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
-
-
+} // SchedRW
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions.
//
+// SchedModel info for instruction that loads one value and gets the second
+// (and possibly third) value from a register.
+// This is used for instructions that put the memory operands before other
+// uses.
+class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Register reads (implicit or explicit).
+ ReadAfterLd, ReadAfterLd]>;
+
// Extra precision multiplication
// AL is really implied by AX, but the registers in Defs must match the
// SDNode results (i8, i32).
+// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
// FIXME: Used for 8-bit mul, ignore result upper 8 bits.
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8
-
+ (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
"mul{w}\t$src",
- [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16
-
+ [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
- "mul{l}\t$src", // EAX,EDX = EAX*GR32
+ "mul{l}\t$src",
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/],
- IIC_MUL32_REG>;
+ IIC_MUL32_REG>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
- "mul{q}\t$src", // RAX,RDX = RAX*GR64
+ "mul{q}\t$src",
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/],
- IIC_MUL64>;
-
+ IIC_MUL64>, Sched<[WriteIMul]>;
+// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
"mul{b}\t$src",
@@ -78,51 +89,60 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8]
-
+ (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
let mayLoad = 1, neverHasSideEffects = 1 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
"mul{w}\t$src",
- [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16]
-
+ [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
"mul{l}\t$src",
- [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32]
+ [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64]
+ "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>;
}
let neverHasSideEffects = 1 in {
+// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [],
- IIC_IMUL8>; // AL,AH = AL*GR8
+ IIC_IMUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [],
- IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16
+ IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [],
- IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32
+ IIC_IMUL32_RR>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [],
- IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64
+ IIC_IMUL64_RR>, Sched<[WriteIMul]>;
let mayLoad = 1 in {
+// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8]
+ "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize;
- // AX,DX = AX*[mem16]
+ "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize,
+ SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32]
+ "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64]
+ "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>;
}
} // neverHasSideEffects
@@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+let isCommutable = 1, SchedRW = [WriteIMul] in {
+// X = IMUL Y, Z --> X = IMUL Z, Y
// Register-Register Signed Integer Multiply
def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>,
TB;
-}
+} // isCommutable, SchedRW
// Register-Memory Signed Integer Multiply
+let SchedRW = [WriteIMulLd, ReadAfterLd] in {
def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -172,12 +194,14 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(X86smul_flag GR64:$src1, (load addr:$src2)))],
IIC_IMUL64_RM>,
TB;
+} // SchedRW
} // Constraints = "$src1 = $dst"
} // Defs = [EFLAGS]
// Surprisingly enough, these are not two address instructions!
let Defs = [EFLAGS] in {
+let SchedRW = [WriteIMul] in {
// Register-Integer Signed Integer Multiply
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
@@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt8:$src2))],
IIC_IMUL64_RRI>;
-
+} // SchedRW
// Memory-Integer Signed Integer Multiply
+let SchedRW = [WriteIMulLd] in {
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
(outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -260,6 +285,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(X86smul_flag (load addr:$src1),
i64immSExt8:$src2))],
IIC_IMUL64_RMI>;
+} // SchedRW
} // Defs = [EFLAGS]
@@ -267,6 +293,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
// unsigned division/remainder
let hasSideEffects = 1 in { // so that we don't speculatively execute
+let SchedRW = [WriteIDiv] in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"div{b}\t$src", [], IIC_DIV8_REG>;
@@ -280,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
"div{q}\t$src", [], IIC_DIV64>;
+} // SchedRW
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", [], IIC_DIV8_MEM>;
+ "div{b}\t$src", [], IIC_DIV8_MEM>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", [], IIC_DIV16>, OpSize;
+ "div{w}\t$src", [], IIC_DIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", [], IIC_DIV32>;
+ "div{l}\t$src", [], IIC_DIV32>,
+ SchedLoadReg<WriteIDivLd>;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", [], IIC_DIV64>;
+ "div{q}\t$src", [], IIC_DIV64>,
+ SchedLoadReg<WriteIDivLd>;
}
// Signed division/remainder.
+let SchedRW = [WriteIDiv] in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"idiv{b}\t$src", [], IIC_IDIV8>;
@@ -311,20 +344,25 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
"idiv{q}\t$src", [], IIC_IDIV64>;
+} // SchedRW
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", [], IIC_IDIV8>;
+ "idiv{b}\t$src", [], IIC_IDIV8>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
+ "idiv{w}\t$src", [], IIC_IDIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- "idiv{l}\t$src", [], IIC_IDIV32>;
+ "idiv{l}\t$src", [], IIC_IDIV32>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", [], IIC_IDIV64>;
+ "idiv{q}\t$src", [], IIC_IDIV64>,
+ SchedLoadReg<WriteIDivLd>;
}
} // hasSideEffects = 0
@@ -335,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
// unary instructions
let CodeSize = 2 in {
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
"neg{b}\t$dst",
[(set GR8:$dst, (ineg GR8:$src1)),
@@ -351,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
[(set GR64:$dst, (ineg GR64:$src1)),
(implicit EFLAGS)], IIC_UNARY_REG>;
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
+// Read-modify-write negate.
+let SchedRW = [WriteALULd, WriteRMW] in {
def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
"neg{b}\t$dst",
[(store (ineg (loadi8 addr:$dst)), addr:$dst),
@@ -368,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
[(store (ineg (loadi64 addr:$dst)), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
+} // SchedRW
} // Defs = [EFLAGS]
// Note: NOT does not set EFLAGS!
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
// Match xor -1 to not. Favors these over a move imm + xor to save code size.
let AddedComplexity = 15 in {
def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -388,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
[(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>;
}
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
+let SchedRW = [WriteALULd, WriteRMW] in {
def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
"not{b}\t$dst",
[(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
@@ -402,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
[(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
[(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
+} // SchedRW
} // CodeSize
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
@@ -454,9 +497,9 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -491,9 +534,9 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
Requires<[In64BitMode]>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
@@ -514,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
IIC_UNARY_REG>;
} // CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -532,7 +575,7 @@ let CodeSize = 2 in {
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
[(store (add (loadi64 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
} // Defs = [EFLAGS]
@@ -646,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f = MRMDestReg>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
+ Sched<[WriteALU]>;
// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
// just a regclass (no eflags) as a result.
@@ -689,7 +733,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo,
(outs typeinfo.RegClass:$dst),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
let hasSideEffects = 0;
@@ -699,7 +744,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo, (outs),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
let hasSideEffects = 0;
@@ -710,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, list<dag> pattern>
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
// BinOpRM_R - Instructions like "add reg, reg, [mem]".
class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -746,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -783,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -821,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
list<dag> pattern>
: ITy<opcode, MRMDestMem, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>;
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]>;
// BinOpMR_RMW - Instructions like "add [mem], reg".
class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -849,7 +899,8 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern, bits<8> opcode = 0x80>
: ITy<opcode, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -881,7 +932,8 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern>
: ITy<0x82, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -913,7 +965,7 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Register areg, string operands>
: ITy<opcode, RawFrm, typeinfo,
(outs), (ins typeinfo.ImmOperand:$src),
- mnemonic, operands, []> {
+ mnemonic, operands, []>, Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
let Uses = [areg];
let Defs = [areg];
@@ -1199,7 +1251,7 @@ let isCompare = 1, Defs = [EFLAGS] in {
// register class is constrained to GR8_NOREX.
let isPseudo = 1 in
def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
- "", [], IIC_BIN_NONMEM>;
+ "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
}
//===----------------------------------------------------------------------===//
@@ -1210,11 +1262,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))],
- IIC_BIN_NONMEM>;
+ IIC_BIN_NONMEM>, Sched<[WriteALU]>;
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
- (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>;
+ (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
@@ -1241,12 +1294,12 @@ let neverHasSideEffects = 1 in {
let isCommutable = 1 in
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- [], IIC_MUL8>, T8XD, VEX_4V;
+ [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMul]>;
let mayLoad = 1 in
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- [], IIC_MUL8>, T8XD, VEX_4V;
+ [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMulLd]>;
}
}
@@ -1261,6 +1314,7 @@ let Predicates = [HasBMI2] in {
// ADCX Instruction
//
let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ let SchedRW = [WriteALU] in {
def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"adcx{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8, OpSize;
@@ -1268,8 +1322,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"adcx{q}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+ } // SchedRW
- let mayLoad = 1 in {
+ let mayLoad = 1, SchedRW = [WriteALULd] in {
def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"adcx{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_MEM>, T8, OpSize;
@@ -1284,6 +1339,7 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
// ADOX Instruction
//
let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ let SchedRW = [WriteALU] in {
def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"adox{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8XS;
@@ -1291,8 +1347,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"adox{q}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+ } // SchedRW
- let mayLoad = 1 in {
+ let mayLoad = 1, SchedRW = [WriteALULd] in {
def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"adox{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_MEM>, T8XS;
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index 8f2d0a1aae..a967a4da5c 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -16,7 +16,7 @@
// SetCC instructions.
multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
- isCommutable = 1 in {
+ isCommutable = 1, SchedRW = [WriteALU] in {
def NAME#16rr
: I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
@@ -37,7 +37,8 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
IIC_CMOV32_RR>, TB;
}
- let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ SchedRW = [WriteALULd, ReadAfterLd] in {
def NAME#16rm
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
@@ -83,11 +84,11 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
def r : I<opc, MRM0r, (outs GR8:$dst), (ins),
!strconcat(Mnemonic, "\t$dst"),
[(set GR8:$dst, (X86setcc OpNode, EFLAGS))],
- IIC_SET_R>, TB;
+ IIC_SET_R>, TB, Sched<[WriteALU]>;
def m : I<opc, MRM0m, (outs), (ins i8mem:$dst),
!strconcat(Mnemonic, "\t$dst"),
[(store (X86setcc OpNode, EFLAGS), addr:$dst)],
- IIC_SET_M>, TB;
+ IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>;
} // Uses = [EFLAGS]
}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 734e5982b2..d9ff0c63c5 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -149,11 +149,12 @@ let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in {
//===----------------------------------------------------------------------===//
// EH Pseudo Instructions
//
+let SchedRW = [WriteSystem] in {
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR32:$addr)], IIC_RET>;
+ [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
}
@@ -161,7 +162,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR64:$addr)], IIC_RET>;
+ [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
}
@@ -186,6 +187,7 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
Requires<[In64BitMode]>;
}
}
+} // SchedRW
let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
@@ -220,7 +222,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in {
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
- [(set GR8:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
// encoding and avoids a partial-register update sometimes, but doing so
@@ -229,11 +231,12 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
// to an MCInst.
def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
"",
- [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize;
+ [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize,
+ Sched<[WriteZero]>;
// FIXME: Set encoding to pseudo.
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
}
// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
@@ -245,7 +248,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
let Defs = [EFLAGS], isCodeGenOnly=1,
AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
@@ -254,10 +257,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in
def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
"", [(set GR64:$dst, i64immZExt32:$src)],
- IIC_ALU_NONMEM>;
+ IIC_ALU_NONMEM>, Sched<[WriteALU]>;
// Use sbb to materialize carry bit.
-let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in {
+let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
// However, Pat<> can't replicate the destination reg into the inputs of the
// result.
@@ -320,6 +323,7 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
//===----------------------------------------------------------------------===//
// String Pseudo Instructions
//
+let SchedRW = [WriteMicrocoded] in {
let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
[(X86rep_movs i8)], IIC_REP_MOVS>, REP,
@@ -382,6 +386,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
[(X86rep_stos i64)], IIC_REP_STOS>, REP,
Requires<[In64BitMode]>;
}
+} // SchedRW
//===----------------------------------------------------------------------===//
// Thread Local Storage Instructions
@@ -594,12 +599,13 @@ defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
let isCodeGenOnly = 1, Defs = [EFLAGS] in
def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
"or{l}\t{$zero, $dst|$dst, $zero}",
- [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
+ [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK,
+ Sched<[WriteALULd, WriteRMW]>;
let hasSideEffects = 1 in
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
"#MEMBARRIER",
- [(X86MemBarrier)]>;
+ [(X86MemBarrier)]>, Sched<[WriteLoad]>;
// RegOpc corresponds to the mr version of the instruction
// ImmOpc corresponds to the mi version of the instruction
@@ -607,7 +613,8 @@ def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
// ImmMod corresponds to the instruction format of the mi and mi8 versions
multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
Format ImmMod, string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteALULd, WriteRMW] in {
def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
@@ -694,7 +701,8 @@ defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
// Optimized codegen when the non-memory output is not used.
multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteALULd, WriteRMW] in {
def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
!strconcat(mnemonic, "{b}\t$dst"),
@@ -728,7 +736,7 @@ let isCodeGenOnly = 1 in {
multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic, SDPatternOperator frag,
InstrItinClass itin8, InstrItinClass itin> {
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
let Defs = [AL, EFLAGS], Uses = [AL] in
def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
!strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
@@ -748,14 +756,15 @@ let isCodeGenOnly = 1 in {
}
}
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+ SchedRW = [WriteALULd, WriteRMW] in {
defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
X86cas8, i64mem,
IIC_CMPX_LOCK_8B>;
}
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
- Predicates = [HasCmpxchg16b] in {
+ Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
X86cas16, i128mem,
IIC_CMPX_LOCK_16B>, REX_W;
@@ -768,7 +777,8 @@ defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
string frag,
InstrItinClass itin8, InstrItinClass itin> {
- let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
+ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
+ SchedRW = [WriteALULd, WriteRMW] in {
def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst),
(ins GR8:$val, i8mem:$ptr),
!strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
@@ -990,9 +1000,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
// This corresponds to add $foo@tpoff, %rax
def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
-// This corresponds to mov foo@tpoff(%rbx), %eax
-def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
- (MOV64rm tglobaltlsaddr :$dst)>;
// Direct PC relative function call for small code model. 32-bit displacement
@@ -1192,7 +1199,8 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in { // Try this before the selecting to OR
+// Try this before the selecting to OR.
+let AddedComplexity = 5, SchedRW = [WriteALU] in {
let isConvertibleToThreeAddress = 1,
Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
@@ -1239,7 +1247,7 @@ def ADD64ri32_DB : I<0, Pseudo,
[(set GR64:$dst, (or_is_add GR64:$src1,
i64immSExt32:$src2))]>;
}
-} // AddedComplexity
+} // AddedComplexity, SchedRW
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index bfe954114c..0e696513d4 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -20,7 +20,7 @@
// The X86retflag return instructions are variadic because we may add ST0 and
// ST1 arguments when returning values on the x87 stack.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, FPForm = SpecialFP in {
+ hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret",
[(X86retflag 0)], IIC_RET>;
@@ -46,7 +46,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
}
// Unconditional branches.
-let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
"jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>;
def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
@@ -58,7 +58,7 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
}
// Conditional Branches.
-let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [],
IIC_Jcc>;
@@ -85,7 +85,7 @@ defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
// jcx/jecx/jrcx instructions.
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
// These are the 32-bit versions of this instruction for the asmparser. In
// 32-bit mode, the address size prefix is jcxz and the unprefixed version is
// jecxz.
@@ -110,36 +110,46 @@ let isBranch = 1, isTerminator = 1 in {
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
- [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>;
+ [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>,
+ Sched<[WriteJump]>;
def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
- [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>;
+ [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>,
+ Requires<[In32BitMode]>, Sched<[WriteJumpLd]>;
def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
- [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>;
+ [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>,
+ Sched<[WriteJump]>;
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
- [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, Requires<[In64BitMode]>;
+ [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>,
+ Requires<[In64BitMode]>, Sched<[WriteJumpLd]>;
def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
- "ljmp{w}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>, OpSize;
+ "ljmp{w}\t{$seg, $off|$off, $seg}", [],
+ IIC_JMP_FAR_PTR>, OpSize, Sched<[WriteJump]>;
def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
- "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>;
+ "ljmp{l}\t{$seg, $off|$off, $seg}", [],
+ IIC_JMP_FAR_PTR>, Sched<[WriteJump]>;
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
- "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
+ "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
+ Sched<[WriteJump]>;
def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
- "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize;
+ "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize,
+ Sched<[WriteJumpLd]>;
def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
- "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
+ "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
+ Sched<[WriteJumpLd]>;
}
// Loop instructions
-
+let SchedRW = [WriteJump] in {
def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>;
def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>;
def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>;
+}
//===----------------------------------------------------------------------===//
// Call Instructions...
@@ -152,27 +162,32 @@ let isCall = 1 in
let Uses = [ESP] in {
def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i32imm_pcrel:$dst),
- "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>;
+ "call{l}\t$dst", [], IIC_CALL_RI>,
+ Requires<[In32BitMode]>, Sched<[WriteJump]>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
"call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
- Requires<[In32BitMode]>;
+ Requires<[In32BitMode]>, Sched<[WriteJump]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
- "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>,
- Requires<[In32BitMode]>;
+ "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))],
+ IIC_CALL_MEM>,
+ Requires<[In32BitMode,FavorMemIndirectCall]>,
+ Sched<[WriteJumpLd]>;
def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
"lcall{w}\t{$seg, $off|$off, $seg}", [],
- IIC_CALL_FAR_PTR>, OpSize;
+ IIC_CALL_FAR_PTR>, OpSize, Sched<[WriteJump]>;
def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
"lcall{l}\t{$seg, $off|$off, $seg}", [],
- IIC_CALL_FAR_PTR>;
+ IIC_CALL_FAR_PTR>, Sched<[WriteJump]>;
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
- "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize;
+ "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize,
+ Sched<[WriteJumpLd]>;
def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
- "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
+ "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>,
+ Sched<[WriteJumpLd]>;
// callw for 16 bit code for the assembler.
let isAsmParserOnly = 1 in
@@ -185,7 +200,7 @@ let isCall = 1 in
// Tail call stuff.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- isCodeGenOnly = 1 in
+ isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
let Uses = [ESP] in {
def TCRETURNdi : PseudoI<(outs),
(ins i32imm_pcrel:$dst, i32imm:$offset), []>;
@@ -216,7 +231,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
// RSP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead. Uses for argument
// registers are added manually.
-let isCall = 1, Uses = [RSP] in {
+let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
// NOTE: this pattern doesn't match "X86call imm", because we do not know
// that the offset between an arbitrary immediate and the call will fit in
// the 32-bit pcrel field that we have.
@@ -231,7 +246,7 @@ let isCall = 1, Uses = [RSP] in {
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
IIC_CALL_MEM>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode,FavorMemIndirectCall]>;
def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
"lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
@@ -245,13 +260,12 @@ let isCall = 1, isCodeGenOnly = 1 in
def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst),
"call{q}\t$dst", [], IIC_CALL_RI>,
- Requires<[IsWin64]>;
+ Requires<[IsWin64]>, Sched<[WriteJump]>;
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- isCodeGenOnly = 1 in
- let Uses = [RSP],
- usesCustomInserter = 1 in {
+ isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1,
+ SchedRW = [WriteJump] in {
def TCRETURNdi64 : PseudoI<(outs),
(ins i64i32imm_pcrel:$dst, i32imm:$offset),
[]>;
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 2eb454ded2..6dc7175357 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -42,48 +42,54 @@ let neverHasSideEffects = 1 in {
let neverHasSideEffects = 1 in {
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALULd]>;
} // neverHasSideEffects = 1
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALULd]>;
def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
let neverHasSideEffects = 1 in {
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALULd]>;
} // neverHasSideEffects = 1
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALULd]>;
def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class
@@ -92,12 +98,12 @@ let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [], IIC_MOVZX>, TB;
+ [], IIC_MOVZX>, TB, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [], IIC_MOVZX>, TB;
+ [], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
@@ -106,38 +112,42 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
// were generalized, this would require a special register class.
def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+ [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+ [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>;
+ [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>,
+ Sched<[WriteALU]>;
def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>;
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>,
+ Sched<[WriteALULd]>;
// movzbq and movzwq encodings for the disassembler
def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
"movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALU]>;
def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
"movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALU]>;
def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
// FIXME: These should be Pat patterns.
let isCodeGenOnly = 1 in {
@@ -145,17 +155,19 @@ let isCodeGenOnly = 1 in {
// Use movzbl instead of movzbq when the destination is a register; it's
// equivalent due to implicit zero-extending, and it has a smaller encoding.
def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
- "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+ "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
// Use movzwl instead of movzwq when the destination is a register; it's
// equivalent due to implicit zero-extending, and it has a smaller encoding.
def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+ "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
- IIC_MOVZX>, TB;
+ IIC_MOVZX>, TB, Sched<[WriteALULd]>;
// There's no movzlq instruction, but movl can be used for this purpose, using
// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
@@ -165,9 +177,10 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
// necessarily all zero. In such cases, we fall back to these explicit zext
// instructions.
def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
- "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>;
+ "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>,
+ Sched<[WriteALU]>;
def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
- IIC_MOVZX>;
+ IIC_MOVZX>, Sched<[WriteALULd]>;
}
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 568726e08e..2224a08d59 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -422,7 +422,7 @@ def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
}
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src",
IIC_FLD>;
def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src",
@@ -436,7 +436,7 @@ def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src",
def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src",
IIC_FILD>;
}
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst",
IIC_FST>;
def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst",
@@ -481,7 +481,7 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
[(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
} // Predicates = [HasSSE3]
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst",
IIC_FST>;
def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst",
@@ -491,6 +491,7 @@ def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
}
// FP Stack manipulation instructions.
+let SchedRW = [WriteMove] in {
def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op",
IIC_FLD>, D9;
def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op",
@@ -499,6 +500,7 @@ def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op",
IIC_FST>, DD;
def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op",
IIC_FXCH>, D9;
+}
// Floating point constant loads.
let isReMaterializable = 1 in {
@@ -516,19 +518,23 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
[(set RFP80:$dst, fpimm1)]>;
}
+let SchedRW = [WriteZero] in {
def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz", IIC_FLDZ>, D9;
def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1", IIC_FIST>, D9;
-
+}
// Floating point compares.
+let SchedRW = [WriteFAdd] in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
+} // SchedRW
} // Defs = [FPSW]
+let SchedRW = [WriteFAdd] in {
// CC = ST(0) cmp ST(i)
let Defs = [EFLAGS, FPSW] in {
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
@@ -566,8 +572,10 @@ def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
"fcompi\t$reg", IIC_FCOMI>, DF;
}
+} // SchedRW
// Floating point flag ops.
+let SchedRW = [WriteALU] in {
let Defs = [AX], Uses = [FPSW] in
def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags
(outs), (ins), "fnstsw %ax",
@@ -576,23 +584,26 @@ def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
(outs), (ins i16mem:$dst), "fnstcw\t$dst",
[(X86fp_cwd_get16 addr:$dst)], IIC_FNSTCW>;
-
+} // SchedRW
let mayLoad = 1 in
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
- (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>;
+ (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>,
+ Sched<[WriteLoad]>;
// FPU control instructions
+let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW] in
def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", [], IIC_FNINIT>, DB;
def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
"ffree\t$reg", IIC_FFREE>, DD;
-
// Clear exceptions
let Defs = [FPSW] in
def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", [], IIC_FNCLEX>, DB;
+} // SchedRW
// Operandless floating-point instructions for the disassembler.
+let SchedRW = [WriteMicrocoded] in {
def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", [], IIC_WAIT>;
def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", [], IIC_FNOP>, D9;
@@ -627,6 +638,7 @@ def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
"fxrstorq\t$src", [], IIC_FXRSTOR>, TB, REX_W,
Requires<[In64BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 44e574d246..0ef9491eb7 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -45,14 +45,15 @@ def MRM_D0 : Format<45>;
def MRM_D1 : Format<46>;
def MRM_D4 : Format<47>;
def MRM_D5 : Format<48>;
-def MRM_D8 : Format<49>;
-def MRM_D9 : Format<50>;
-def MRM_DA : Format<51>;
-def MRM_DB : Format<52>;
-def MRM_DC : Format<53>;
-def MRM_DD : Format<54>;
-def MRM_DE : Format<55>;
-def MRM_DF : Format<56>;
+def MRM_D6 : Format<49>;
+def MRM_D8 : Format<50>;
+def MRM_D9 : Format<51>;
+def MRM_DA : Format<52>;
+def MRM_DB : Format<53>;
+def MRM_DC : Format<54>;
+def MRM_DD : Format<55>;
+def MRM_DE : Format<56>;
+def MRM_DF : Format<57>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -208,47 +209,47 @@ class PseudoI<dag oops, dag iops, list<dag> pattern>
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+ list<dag> pattern, InstrItinClass itin = NoItinerary,
Domain d = GenericDomain>
: X86Inst<o, f, NoImm, outs, ins, asm, itin, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+ list<dag> pattern, InstrItinClass itin = NoItinerary,
Domain d = GenericDomain>
: X86Inst<o, f, Imm8, outs, ins, asm, itin, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
@@ -257,12 +258,12 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
// FPStack Instruction Templates:
// FPI - Floating Point Instruction template.
class FPI<bits<8> o, Format F, dag outs, dag ins, string asm,
- InstrItinClass itin = IIC_DEFAULT>
+ InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, [], itin> {}
// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
- InstrItinClass itin = IIC_DEFAULT>
+ InstrItinClass itin = NoItinerary>
: X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> {
let FPForm = fp;
let Pattern = pattern;
@@ -275,14 +276,14 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
// Iseg32 - 16-bit segment selector, 32-bit offset
class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
@@ -292,7 +293,7 @@ def __xs : XS;
// SI - SSE 1 & 2 scalar instructions
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
!if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
@@ -303,7 +304,7 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
// SIi8 - SSE 1 & 2 scalar instructions
class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin> {
let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
!if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
@@ -350,25 +351,25 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// VPSI - SSE1 instructions with TB prefix in AVX form.
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
Requires<[UseSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
Requires<[UseSSE1]>;
class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
Requires<[HasAVX]>;
class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedSingle>, TB,
Requires<[HasAVX]>;
@@ -388,42 +389,42 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
// MMX operands.
class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>;
class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
Requires<[UseSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
Requires<[UseSSE2]>;
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
Requires<[HasAVX]>;
class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
Requires<[HasAVX]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
OpSize, Requires<[HasAVX]>;
class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
// SSE3 Instruction Templates:
@@ -433,15 +434,15 @@ class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// S3DI - SSE3 instructions with XD prefix.
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS,
Requires<[UseSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD,
Requires<[UseSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
Requires<[UseSSE3]>;
@@ -458,19 +459,19 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
// classes. They need to be enabled even if AVX is enabled.
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[UseSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[UseSSSE3]>;
class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[HasSSSE3]>;
class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[HasSSSE3]>;
@@ -480,11 +481,11 @@ class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
//
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[UseSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[UseSSE41]>;
@@ -492,19 +493,19 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[UseSSE42]>;
// SS42FI - SSE 4.2 instructions with T8XD prefix.
// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>;
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[UseSSE42]>;
@@ -514,11 +515,11 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
// AVX8I - AVX instructions with T8 and OpSize prefix.
// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
Requires<[HasAVX]>;
class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
Requires<[HasAVX]>;
@@ -528,11 +529,11 @@ class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// AVX28I - AVX2 instructions with T8 and OpSize prefix.
// AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8.
class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
Requires<[HasAVX2]>;
class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
Requires<[HasAVX2]>;
@@ -541,53 +542,53 @@ class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// AES8I
// These use the same encoding as the SSE4.2 T8 and TA encodings.
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[HasAES]>;
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
Requires<[HasAES]>;
// PCLMUL Instruction Templates
class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
OpSize, Requires<[HasPCLMUL]>;
class AVXPCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
OpSize, VEX_4V, Requires<[HasAVX, HasPCLMUL]>;
// FMA3 Instruction Templates
class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, T8,
- OpSize, VEX_4V, Requires<[HasFMA]>;
+ OpSize, VEX_4V, FMASC, Requires<[HasFMA]>;
// FMA4 Instruction Templates
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, TA,
- OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
+ OpSize, VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>;
// XOP 2, 3 and 4 Operand Instruction Template
class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
XOP, XOP9, Requires<[HasXOP]>;
// XOP 2, 3 and 4 Operand Instruction Templates with imm byte
class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
XOP, XOP8, Requires<[HasXOP]>;
// XOP 5 operand instruction (VEX encoding!)
class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag>pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>;
@@ -595,33 +596,33 @@ class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
//
class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii32<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm64, outs, ins, asm, itin>, REX_W {
let Pattern = pattern;
let CodeSize = 3;
}
class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: SSI<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: SDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: PDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: VPDI<o, F, outs, ins, asm, pattern, itin>, VEX_W;
// MMX Instruction templates
@@ -635,23 +636,23 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
// MMXID - MMX instructions with XD prefix.
// MMXIS - MMX instructions with XS prefix.
class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>;
class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, REX_W, Requires<[HasMMX]>;
class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, OpSize, Requires<[HasMMX]>;
class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>;
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 17714acd86..7ba542c875 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3655,7 +3655,16 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Align) const {
const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect();
bool isTwoAddrFold = false;
+
+ // Atom favors register form of call. So, we do not fold loads into calls
+ // when X86Subtarget is Atom.
+ if (isCallRegIndirect &&
+ (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) {
+ return NULL;
+ }
+
unsigned NumOps = MI->getDesc().getNumOperands();
bool isTwoAddr = NumOps > 1 &&
MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index d989ec7bb0..ccc1aa2e35 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -142,6 +142,9 @@ def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>;
def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand,
[SDNPHasChain, SDNPSideEffect]>;
+def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
@@ -603,7 +606,12 @@ def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
def HasBMI : Predicate<"Subtarget->hasBMI()">;
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
+def HasHLE : Predicate<"Subtarget->hasHLE()">;
+def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
def HasADX : Predicate<"Subtarget->hasADX()">;
+def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
+def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
@@ -626,6 +634,7 @@ def OptForSize : Predicate<"OptForSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@@ -758,7 +767,7 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
//
// Nop
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteZero] in {
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>;
def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero),
"nop{w}\t$zero", [], IIC_NOP>, TB, OpSize;
@@ -769,8 +778,9 @@ let neverHasSideEffects = 1 in {
// Constructing a stack frame.
def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
- "enter\t$len, $lvl", [], IIC_ENTER>;
+ "enter\t$len, $lvl", [], IIC_ENTER>, Sched<[WriteMicrocoded]>;
+let SchedRW = [WriteALU] in {
let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
def LEAVE : I<0xC9, RawFrm,
(outs), (ins), "leave", [], IIC_LEAVE>,
@@ -780,13 +790,14 @@ let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
def LEAVE64 : I<0xC9, RawFrm,
(outs), (ins), "leave", [], IIC_LEAVE>,
Requires<[In64BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions.
//
let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
IIC_POP_REG16>, OpSize;
def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
@@ -803,9 +814,9 @@ def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [],
def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize;
def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
Requires<[In32BitMode]>;
-}
+} // mayLoad, SchedRW
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
IIC_PUSH_REG>, OpSize;
def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
@@ -832,29 +843,30 @@ def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>,
def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>,
Requires<[In32BitMode]>;
-}
+} // mayStore, SchedRW
}
let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
def POP64r : I<0x58, AddRegFrm,
(outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>;
def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
IIC_POP_REG>;
def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [],
IIC_POP_MEM>;
-}
-let mayStore = 1 in {
+} // mayLoad, SchedRW
+let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH64r : I<0x50, AddRegFrm,
(outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>;
def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>;
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
IIC_PUSH_MEM>;
-}
+} // mayStore, SchedRW
}
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1,
+ SchedRW = [WriteStore] in {
def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
"push{q}\t$imm", [], IIC_PUSH_IMM>;
def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
@@ -865,23 +877,24 @@ def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode]>, Sched<[WriteLoad]>;
let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode]>, Sched<[WriteStore]>;
let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
- mayLoad=1, neverHasSideEffects=1 in {
+ mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>,
Requires<[In32BitMode]>;
}
let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
- mayStore=1, neverHasSideEffects=1 in {
+ mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>,
Requires<[In32BitMode]>;
}
-let Constraints = "$src = $dst" in { // GR32 = bswap GR32
+let Constraints = "$src = $dst", SchedRW = [WriteALU] in {
+// GR32 = bswap GR32
def BSWAP32r : I<0xC8, AddRegFrm,
(outs GR32:$dst), (ins GR32:$src),
"bswap{l}\t$dst",
@@ -890,60 +903,63 @@ def BSWAP32r : I<0xC8, AddRegFrm,
def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
"bswap{q}\t$dst",
[(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
// Bit scan instructions.
let Defs = [EFLAGS] in {
def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))],
- IIC_BSF>, TB, OpSize;
+ IIC_BSF>, TB, OpSize, Sched<[WriteShift]>;
def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))],
- IIC_BSF>, TB, OpSize;
+ IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>;
def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB,
+ Sched<[WriteShift]>;
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))],
- IIC_BSF>, TB;
+ IIC_BSF>, TB, Sched<[WriteShiftLd]>;
def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))],
- IIC_BSF>, TB;
+ IIC_BSF>, TB, Sched<[WriteShift]>;
def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))],
- IIC_BSF>, TB;
+ IIC_BSF>, TB, Sched<[WriteShiftLd]>;
def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteShift]>;
def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))],
IIC_BSR>, TB,
- OpSize;
+ OpSize, Sched<[WriteShiftLd]>;
def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB,
+ Sched<[WriteShift]>;
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))],
- IIC_BSR>, TB;
+ IIC_BSR>, TB, Sched<[WriteShiftLd]>;
def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB,
+ Sched<[WriteShift]>;
def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))],
- IIC_BSR>, TB;
+ IIC_BSR>, TB, Sched<[WriteShiftLd]>;
} // Defs = [EFLAGS]
-
+let SchedRW = [WriteMicrocoded] in {
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", [], IIC_MOVS>;
@@ -971,12 +987,12 @@ def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", [], IIC_CMPS>;
def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", [], IIC_CMPS>, OpSize;
def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", [], IIC_CMPS>;
def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>;
-
+} // SchedRW
//===----------------------------------------------------------------------===//
// Move Instructions.
//
-
+let SchedRW = [WriteMove] in {
let neverHasSideEffects = 1 in {
def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
@@ -987,6 +1003,7 @@ def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
}
+
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
@@ -1004,7 +1021,9 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>;
}
+} // SchedRW
+let SchedRW = [WriteStore] in {
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
@@ -1017,9 +1036,11 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
+let SchedRW = [WriteALU] in {
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
@@ -1038,6 +1059,7 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
+}
// FIXME: These definitions are utterly broken
// Just leave them commented out for now because they're useless outside
@@ -1055,7 +1077,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
*/
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
"mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1066,7 +1088,7 @@ def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>;
@@ -1081,6 +1103,7 @@ def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
[(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>;
}
+let SchedRW = [WriteStore] in {
def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(store GR8:$src, addr:$dst)], IIC_MOV_MEM>;
@@ -1093,6 +1116,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store GR64:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
@@ -1101,34 +1125,37 @@ let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV8rr_NOREX : I<0x88, MRMDestReg,
(outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
- "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>;
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>,
+ Sched<[WriteMove]>;
let mayStore = 1 in
def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
- IIC_MOV_MEM>;
+ IIC_MOV_MEM>, Sched<[WriteStore]>;
let mayLoad = 1, neverHasSideEffects = 1,
canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
- IIC_MOV_MEM>;
+ IIC_MOV_MEM>, Sched<[WriteLoad]>;
}
// Condition code ops, incl. set if equal/not equal/...
+let SchedRW = [WriteALU] in {
let Defs = [EFLAGS], Uses = [AH] in
def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
[(set EFLAGS, (X86sahf AH))], IIC_AHF>;
let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [],
IIC_AHF>; // AH = flags
-
+} // SchedRW
//===----------------------------------------------------------------------===//
// Bit tests instructions: BT, BTS, BTR, BTC.
let Defs = [EFLAGS] in {
+let SchedRW = [WriteALU] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>,
@@ -1139,13 +1166,14 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB;
+} // SchedRW
// Unlike with the register+register form, the memory+register form of the
// bt instruction does not ignore the high bits of the index. From ISel's
// perspective, this is pretty bizarre. Make these instructions disassembly
// only for now.
-let mayLoad = 1, hasSideEffects = 0 in {
+let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi16 addr:$src1), GR16:$src2),
@@ -1166,6 +1194,7 @@ let mayLoad = 1, hasSideEffects = 0 in {
>, TB;
}
+let SchedRW = [WriteALU] in {
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))],
@@ -1178,10 +1207,12 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))],
IIC_BT_RI>, TB;
+} // SchedRW
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
+let SchedRW = [WriteALU] in {
def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
@@ -1194,8 +1225,10 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi64 addr:$src1),
i64immSExt8:$src2))], IIC_BT_MI>, TB;
+} // SchedRW
let hasSideEffects = 0 in {
+let SchedRW = [WriteALU] in {
def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1203,8 +1236,9 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1214,6 +1248,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
}
+let SchedRW = [WriteALU] in {
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1221,8 +1256,9 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1232,6 +1268,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
}
+let SchedRW = [WriteALU] in {
def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1239,8 +1276,9 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1250,6 +1288,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
}
+let SchedRW = [WriteALU] in {
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1257,8 +1296,9 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1268,6 +1308,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
}
+let SchedRW = [WriteALU] in {
def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1275,8 +1316,9 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1286,6 +1328,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
}
+let SchedRW = [WriteALU] in {
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1293,8 +1336,9 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1315,7 +1359,7 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
// operand is referenced, the atomicity is ensured.
multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag,
InstrItinClass itin> {
- let Constraints = "$val = $dst" in {
+ let Constraints = "$val = $dst", SchedRW = [WriteALULd, WriteRMW] in {
def NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst),
(ins GR8:$val, i8mem:$ptr),
!strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
@@ -1350,6 +1394,7 @@ multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag,
defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>;
// Swap between registers.
+let SchedRW = [WriteALU] in {
let Constraints = "$val = $dst" in {
def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
"xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
@@ -1374,9 +1419,9 @@ def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src),
Requires<[In64BitMode]>;
def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
"xchg{q}\t{$src, %rax|RAX, $src}", [], IIC_XCHG_REG>;
+} // SchedRW
-
-
+let SchedRW = [WriteALU] in {
def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1386,8 +1431,9 @@ def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
@@ -1400,6 +1446,7 @@ def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
}
+let SchedRW = [WriteALU] in {
def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
IIC_CMPXCHG_REG8>, TB;
@@ -1412,7 +1459,9 @@ def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"cmpxchg{q}\t{$src, $dst|$dst, $src}", [],
IIC_CMPXCHG_REG>, TB;
+} // SchedRW
+let SchedRW = [WriteALULd, WriteRMW] in {
let mayLoad = 1, mayStore = 1 in {
def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
@@ -1436,7 +1485,7 @@ let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
"cmpxchg16b\t$dst", [], IIC_CMPXCHG_16B>,
TB, Requires<[HasCmpxchg16b]>;
-
+} // SchedRW
// Lock instruction prefix
@@ -1459,17 +1508,21 @@ def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
// String manipulation instructions
+let SchedRW = [WriteMicrocoded] in {
def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", [], IIC_LODS>;
def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", [], IIC_LODS>, OpSize;
def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", [], IIC_LODS>;
def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", [], IIC_LODS>;
+}
+let SchedRW = [WriteSystem] in {
def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", [], IIC_OUTS>;
def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", [], IIC_OUTS>, OpSize;
def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", [], IIC_OUTS>;
-
+}
// Flag instructions
+let SchedRW = [WriteALU] in {
def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>;
def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>;
def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>;
@@ -1479,10 +1532,13 @@ def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>;
def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>;
def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
+}
// Table lookup instructions
-def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>;
+def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>,
+ Sched<[WriteLoad]>;
+let SchedRW = [WriteMicrocoded] in {
// ASCII Adjust After Addition
// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>,
@@ -1512,7 +1568,9 @@ def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>,
// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>,
Requires<[In32BitMode]>;
+} // SchedRW
+let SchedRW = [WriteSystem] in {
// Check Array Index Against Bounds
def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize,
@@ -1528,11 +1586,13 @@ def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>,
Requires<[In32BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// MOVBE Instructions
//
let Predicates = [HasMOVBE] in {
+ let SchedRW = [WriteALULd] in {
def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>,
@@ -1545,6 +1605,8 @@ let Predicates = [HasMOVBE] in {
"movbe{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>,
T8;
+ }
+ let SchedRW = [WriteStore] in {
def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>,
@@ -1557,6 +1619,7 @@ let Predicates = [HasMOVBE] in {
"movbe{q}\t{$src, $dst|$dst, $src}",
[(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>,
T8;
+ }
}
//===----------------------------------------------------------------------===//
@@ -1575,6 +1638,21 @@ let Predicates = [HasRDRAND], Defs = [EFLAGS] in {
}
//===----------------------------------------------------------------------===//
+// RDSEED Instruction
+//
+let Predicates = [HasRDSEED], Defs = [EFLAGS] in {
+ def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins),
+ "rdseed{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize, TB;
+ def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
+ "rdseed{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86rdseed))]>, TB;
+ def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins),
+ "rdseed{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86rdseed))]>, TB;
+}
+
+//===----------------------------------------------------------------------===//
// LZCNT Instruction
//
let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 127af6f7f9..49721df7c1 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -20,6 +20,7 @@
// MMX Multiclasses
//===----------------------------------------------------------------------===//
+let Sched = WriteVecALU in {
def MMX_INTALU_ITINS : OpndItins<
IIC_MMX_ALU_RR, IIC_MMX_ALU_RM
>;
@@ -35,11 +36,14 @@ def MMX_PHADDSUBW : OpndItins<
def MMX_PHADDSUBD : OpndItins<
IIC_MMX_PHADDSUBD_RR, IIC_MMX_PHADDSUBD_RM
>;
+}
+let Sched = WriteVecIMul in
def MMX_PMUL_ITINS : OpndItins<
IIC_MMX_PMUL, IIC_MMX_PMUL
>;
+let Sched = WriteVecALU in {
def MMX_PSADBW_ITINS : OpndItins<
IIC_MMX_PSADBW, IIC_MMX_PSADBW
>;
@@ -47,11 +51,13 @@ def MMX_PSADBW_ITINS : OpndItins<
def MMX_MISC_FUNC_ITINS : OpndItins<
IIC_MMX_MISC_FUNC_MEM, IIC_MMX_MISC_FUNC_REG
>;
+}
def MMX_SHIFT_ITINS : ShiftOpndItins<
IIC_MMX_SHIFT_RR, IIC_MMX_SHIFT_RM, IIC_MMX_SHIFT_RI
>;
+let Sched = WriteShuffle in {
def MMX_UNPCK_H_ITINS : OpndItins<
IIC_MMX_UNPCK_H_RR, IIC_MMX_UNPCK_H_RM
>;
@@ -67,7 +73,9 @@ def MMX_PCK_ITINS : OpndItins<
def MMX_PSHUF_ITINS : OpndItins<
IIC_MMX_PSHUF, IIC_MMX_PSHUF
>;
+} // Sched
+let Sched = WriteCvtF2I in {
def MMX_CVT_PD_ITINS : OpndItins<
IIC_MMX_CVT_PD_RR, IIC_MMX_CVT_PD_RM
>;
@@ -75,6 +83,7 @@ def MMX_CVT_PD_ITINS : OpndItins<
def MMX_CVT_PS_ITINS : OpndItins<
IIC_MMX_CVT_PS_RR, IIC_MMX_CVT_PS_RM
>;
+}
let Constraints = "$src1 = $dst" in {
// MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
@@ -84,7 +93,8 @@ let Constraints = "$src1 = $dst" in {
def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr> {
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>,
+ Sched<[itins.Sched]> {
let isCommutable = Commutable;
}
def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
@@ -92,7 +102,7 @@ let Constraints = "$src1 = $dst" in {
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
@@ -101,17 +111,19 @@ let Constraints = "$src1 = $dst" in {
def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>;
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>,
+ Sched<[WriteVecShift]>;
def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))],
- itins.rm>;
+ itins.rm>, Sched<[WriteVecShiftLd, ReadAfterLd]>;
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>;
+ [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>,
+ Sched<[WriteVecShift]>;
}
}
@@ -120,13 +132,14 @@ multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
Intrinsic IntId64, OpndItins itins> {
def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>;
+ [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
(IntId64 (bitconvert (memopmmx addr:$src))))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
/// Binary MMX instructions requiring SSSE3.
@@ -137,13 +150,15 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>;
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst,
(IntId64 VR64:$src1,
- (bitconvert (memopmmx addr:$src2))))], itins.rm>;
+ (bitconvert (memopmmx addr:$src2))))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -164,9 +179,11 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm, OpndItins itins, Domain d> {
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>;
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>,
+ Sched<[itins.Sched]>;
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>;
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>,
+ Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -174,11 +191,11 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
PatFrag ld_frag, string asm, Domain d> {
def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
- IIC_DEFAULT, d>;
+ NoItinerary, d>;
def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2), asm,
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
- IIC_DEFAULT, d>;
+ NoItinerary, d>;
}
//===----------------------------------------------------------------------===//
@@ -197,16 +214,17 @@ def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (scalar_to_vector GR32:$src)))],
- IIC_MMX_MOV_MM_RM>;
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
let canFoldAsLoad = 1 in
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (scalar_to_vector (loadi32 addr:$src))))],
- IIC_MMX_MOV_MM_RM>;
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
- "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>;
+ "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>,
+ Sched<[WriteStore]>;
// Low word of MMX to GPR.
def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
@@ -214,16 +232,18 @@ def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst,
- (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>;
+ (MMX_X86movd2w (x86mmx VR64:$src)))],
+ IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
let neverHasSideEffects = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
- [], IIC_MMX_MOV_MM_RM>;
+ [], IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
// These are 64 bit moves, but since the OS X assembler doesn't
// recognize a register-register movq, we write them as
// movd.
+let SchedRW = [WriteMove] in {
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -237,6 +257,9 @@ let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", [],
IIC_MMX_MOVQ_RR>;
+} // SchedRW
+
+let SchedRW = [WriteLoad] in {
let canFoldAsLoad = 1 in
def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -246,7 +269,9 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (x86mmx VR64:$src), addr:$dst)],
IIC_MMX_MOVQ_RM>;
+} // SchedRW
+let SchedRW = [WriteMove] in {
def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
@@ -271,11 +296,12 @@ def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[], IIC_MMX_MOVQ_RR>;
+} // SchedRW
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)],
- IIC_MMX_MOVQ_RM>;
+ IIC_MMX_MOVQ_RM>, Sched<[WriteStore]>;
let AddedComplexity = 15 in
// movd to MMX register zero-extends
@@ -283,7 +309,7 @@ def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))],
- IIC_MMX_MOV_MM_RM>;
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
let AddedComplexity = 20 in
def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
(ins i32mem:$src),
@@ -291,7 +317,7 @@ def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
[(set VR64:$dst,
(x86mmx (X86vzmovl (x86mmx
(scalar_to_vector (loadi32 addr:$src))))))],
- IIC_MMX_MOV_MM_RM>;
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
// Arithmetic Instructions
defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
@@ -491,14 +517,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w VR64:$src1, imm:$src2))],
- IIC_MMX_PSHUF>;
+ IIC_MMX_PSHUF>, Sched<[WriteShuffle]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w (load_mmx addr:$src1),
imm:$src2))],
- IIC_MMX_PSHUF>;
+ IIC_MMX_PSHUF>, Sched<[WriteShuffleLd]>;
@@ -532,7 +558,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
(iPTR imm:$src2)))],
- IIC_MMX_PEXTR>;
+ IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
let Constraints = "$src1 = $dst" in {
def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
@@ -540,7 +566,7 @@ let Constraints = "$src1 = $dst" in {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
GR32:$src2, (iPTR imm:$src3)))],
- IIC_MMX_PINSRW>;
+ IIC_MMX_PINSRW>, Sched<[WriteShuffle]>;
def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
(outs VR64:$dst),
@@ -549,7 +575,7 @@ let Constraints = "$src1 = $dst" in {
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
(i32 (anyext (loadi16 addr:$src2))),
(iPTR imm:$src3)))],
- IIC_MMX_PINSRW>;
+ IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
// Mask creation
@@ -570,6 +596,7 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
(x86mmx (MMX_MOVQ64rm addr:$src))>;
// Misc.
+let SchedRW = [WriteShuffle] in {
let Uses = [EDI] in
def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
@@ -580,6 +607,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)],
IIC_MMX_MASKMOV>;
+}
// 64-bit bit convert.
let Predicates = [HasSSE2] in {
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 0979752757..384238741b 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -16,6 +16,8 @@
class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
InstrItinClass rr = arg_rr;
InstrItinClass rm = arg_rm;
+ // InstrSchedModel info.
+ X86FoldableSchedWrite Sched = WriteFAdd;
}
class SizeItins<OpndItins arg_s, OpndItins arg_d> {
@@ -33,6 +35,7 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
// scalar
+let Sched = WriteFAdd in {
def SSE_ALU_F32S : OpndItins<
IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
>;
@@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins<
def SSE_ALU_F64S : OpndItins<
IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
>;
+}
def SSE_ALU_ITINS_S : SizeItins<
SSE_ALU_F32S, SSE_ALU_F64S
>;
+let Sched = WriteFMul in {
def SSE_MUL_F32S : OpndItins<
IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
>;
@@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins<
def SSE_MUL_F64S : OpndItins<
IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
>;
+}
def SSE_MUL_ITINS_S : SizeItins<
SSE_MUL_F32S, SSE_MUL_F64S
>;
+let Sched = WriteFDiv in {
def SSE_DIV_F32S : OpndItins<
IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
>;
@@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins<
def SSE_DIV_F64S : OpndItins<
IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
>;
+}
def SSE_DIV_ITINS_S : SizeItins<
SSE_DIV_F32S, SSE_DIV_F64S
>;
// parallel
+let Sched = WriteFAdd in {
def SSE_ALU_F32P : OpndItins<
IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
>;
@@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins<
def SSE_ALU_F64P : OpndItins<
IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM
>;
+}
def SSE_ALU_ITINS_P : SizeItins<
SSE_ALU_F32P, SSE_ALU_F64P
>;
+let Sched = WriteFMul in {
def SSE_MUL_F32P : OpndItins<
IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
>;
@@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins<
def SSE_MUL_F64P : OpndItins<
IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
>;
+}
def SSE_MUL_ITINS_P : SizeItins<
SSE_MUL_F32P, SSE_MUL_F64P
>;
+let Sched = WriteFDiv in {
def SSE_DIV_F32P : OpndItins<
IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
>;
@@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins<
def SSE_DIV_F64P : OpndItins<
IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
>;
+}
def SSE_DIV_ITINS_P : SizeItins<
SSE_DIV_F32P, SSE_DIV_F64P
@@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins<
IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
>;
+let Sched = WriteVecALU in {
def SSE_INTALU_ITINS_P : OpndItins<
IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
>;
@@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins<
def SSE_INTALUQ_ITINS_P : OpndItins<
IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
>;
+}
+let Sched = WriteVecIMul in
def SSE_INTMUL_ITINS_P : OpndItins<
IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
>;
@@ -148,13 +165,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
}
def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>;
+ [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
@@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, RC:$src2))], itins.rr>;
+ RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, mem_cpat:$src2))], itins.rm>;
+ RC:$src1, mem_cpat:$src2))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_packed - SSE 1 & 2 packed instructions class
@@ -189,14 +210,16 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
+ Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
- itins.rm, d>;
+ itins.rm, d>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
@@ -209,12 +232,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rr, IIC_DEFAULT, d>;
+ pat_rr, NoItinerary, d>,
+ Sched<[WriteVecLogic]>;
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rm, IIC_DEFAULT, d>;
+ pat_rm, NoItinerary, d>,
+ Sched<[WriteVecLogicLd, ReadAfterLd]>;
}
//===----------------------------------------------------------------------===//
@@ -345,7 +370,7 @@ let Predicates = [HasAVX] in {
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
[(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
@@ -362,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
@@ -379,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [HasAVX] in {
+ isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in {
def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8f32 immAllZerosV))]>;
}
@@ -417,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
let Predicates = [HasAVX2] in
@@ -444,14 +469,14 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
!strconcat(base_opc, asm_opr),
[(set VR128:$dst, (vt (OpNode VR128:$src1,
(scalar_to_vector RC:$src2))))],
- IIC_SSE_MOV_S_RR>;
+ IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
// For the disassembler
let isCodeGenOnly = 1, hasSideEffects = 0 in
def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, RC:$src2),
!strconcat(base_opc, asm_opr),
- [], IIC_SSE_MOV_S_RR>;
+ [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
}
multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
@@ -464,7 +489,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- VEX, VEX_LIG;
+ VEX, VEX_LIG, Sched<[WriteStore]>;
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
@@ -473,7 +498,8 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ Sched<[WriteStore]>;
}
// Loading from memory automatically zeroing upper bits.
@@ -482,11 +508,11 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>, VEX, VEX_LIG;
+ IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>;
+ IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
}
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
@@ -745,11 +771,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
bit IsReMaterializable = 1> {
let neverHasSideEffects = 1 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>;
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>,
+ Sched<[WriteMove]>;
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>;
+ [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>,
+ Sched<[WriteLoad]>;
}
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
@@ -790,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
TB, OpSize;
+let SchedRW = [WriteStore] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -822,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)],
IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+} // SchedRW
// For disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
@@ -880,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
(VMOVUPDYmr addr:$dst, VR256:$src)>;
+let SchedRW = [WriteStore] in {
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -896,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)],
IIC_SSE_MOVU_P_MR>;
+} // SchedRW
// For disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>;
@@ -1009,7 +1041,7 @@ let Predicates = [HasAVX] in {
(VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v8i16 (extract_subvector
(v16i16 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v16i8 (extract_subvector
(v32i8 VR256:$src), (iPTR 0))), addr:$dst),
(VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
@@ -1044,7 +1076,7 @@ let Predicates = [UseSSE1] in {
// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>, VEX;
@@ -1061,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
let isCodeGenOnly = 1 in {
def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
"movaps\t{$src, $dst|$dst, $src}",
@@ -1095,14 +1127,16 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
[(set VR128:$dst,
(psnode VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
- itin, SSEPackedSingle>, TB;
+ itin, SSEPackedSingle>, TB,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
def PDrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
[(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
- itin, SSEPackedDouble>, TB, OpSize;
+ itin, SSEPackedDouble>, TB, OpSize,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
@@ -1123,6 +1157,7 @@ let AddedComplexity = 20 in {
IIC_SSE_MOV_LH>;
}
+let SchedRW = [WriteStore] in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -1143,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>;
+} // SchedRW
let Predicates = [HasAVX] in {
// Shuffle with VMOVLPS
@@ -1222,6 +1258,7 @@ let AddedComplexity = 20 in {
IIC_SSE_MOV_LH>;
}
+let SchedRW = [WriteStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -1246,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
+} // SchedRW
let Predicates = [HasAVX] in {
// VMOVHPS patterns
@@ -1296,14 +1334,14 @@ let AddedComplexity = 20 in {
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V;
+ VEX_4V, Sched<[WriteShuffle]>;
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V;
+ VEX_4V, Sched<[WriteShuffle]>;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -1311,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
}
let Predicates = [HasAVX] in {
@@ -1352,22 +1390,27 @@ def SSE_CVT_PD : OpndItins<
IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
>;
+let Sched = WriteCvtI2F in
def SSE_CVT_PS : OpndItins<
IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
>;
+let Sched = WriteCvtI2F in
def SSE_CVT_Scalar : OpndItins<
IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SS2SI_32 : OpndItins<
IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SS2SI_64 : OpndItins<
IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SD2SI : OpndItins<
IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
>;
@@ -1377,10 +1420,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (OpNode SrcRC:$src))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -1388,10 +1431,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
OpndItins itins> {
let neverHasSideEffects = 1 in {
def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [], itins.rr, d>;
+ [], itins.rr, d>, Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [], itins.rm, d>;
+ [], itins.rm, d>, Sched<[itins.Sched.Folded]>;
}
}
@@ -1399,11 +1442,13 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
let neverHasSideEffects = 1 in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
- !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ Sched<[WriteCvtI2F]>;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
- !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ Sched<[WriteCvtI2FLd, ReadAfterLd]>;
} // neverHasSideEffects = 1
}
@@ -1534,10 +1579,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>;
+ [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -1549,14 +1596,14 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
@@ -1701,13 +1748,15 @@ let neverHasSideEffects = 1 in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG;
+ IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR64:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
- XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+ XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
@@ -1716,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))],
- IIC_SSE_CVT_Scalar_RR>;
+ IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>;
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround (loadf64 addr:$src)))],
IIC_SSE_CVT_Scalar_RM>,
XD,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
@@ -1743,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2F]>;
def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
// Convert scalar single to scalar double
@@ -1759,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RR>,
- XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
+ XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR32:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
- XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+ XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
def : Pat<(f64 (fextend FR32:$src)),
@@ -1784,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))],
IIC_SSE_CVT_Scalar_RR>, XS,
- Requires<[UseSSE2]>;
+ Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))],
IIC_SSE_CVT_Scalar_RM>, XS,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
// extload f32 -> f64. This matches load+fextend because we have a hack in
// the isel (PreprocessForFPConvert) that can introduce loads after dag
@@ -1806,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2F]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2F]>;
def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
+ IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
+ IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
// Convert Packed Double FP to Packed DW Integers
@@ -1867,7 +1926,7 @@ let Predicates = [HasAVX] in {
def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
- VEX;
+ VEX, Sched<[WriteCvtF2I]>;
// XMM only
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
@@ -1875,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX;
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX,
+ Sched<[WriteCvtF2ILd]>;
// YMM only
def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L;
+ (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L,
+ Sched<[WriteCvtF2I]>;
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
- VEX, VEX_L;
+ VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
}
@@ -1895,11 +1956,11 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>;
def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
@@ -1907,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
(memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
(memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
+ Sched<[WriteCvtF2ILd]>;
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
+ IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
+ IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
@@ -1982,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>, VEX;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
@@ -1995,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
@@ -2021,12 +2083,13 @@ let Predicates = [HasAVX] in {
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>,
+ Sched<[WriteCvtF2ILd]>;
// Convert packed single to packed double
let Predicates = [HasAVX] in {
@@ -2034,32 +2097,32 @@ let Predicates = [HasAVX] in {
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB, VEX;
+ IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB, VEX;
+ IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
}
let Predicates = [UseSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB;
+ IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB;
+ IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>;
}
// Convert Packed DW Integers to Packed Double FP
@@ -2067,30 +2130,33 @@ let Predicates = [HasAVX] in {
let neverHasSideEffects = 1, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
- []>, VEX;
+ []>, VEX, Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX;
+ (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX,
+ Sched<[WriteCvtI2F]>;
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtdq2_pd_256
- (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L;
+ (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L,
+ Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L;
+ (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L,
+ Sched<[WriteCvtI2F]>;
}
let neverHasSideEffects = 1, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
// AVX 256-bit register conversion intrinsics
let Predicates = [HasAVX] in {
@@ -2107,7 +2173,7 @@ let Predicates = [HasAVX] in {
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>, VEX;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>;
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
@@ -2116,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2psx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
// YMM only
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>;
// AVX 256-bit register conversion intrinsics
@@ -2193,22 +2259,24 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
- IIC_SSE_ALU_F32S_RR>;
+ IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
- IIC_SSE_ALU_F32S_RM>;
+ IIC_SSE_ALU_F32S_RM>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -2241,12 +2309,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
(ins VR128:$src1, VR128:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
VR128:$src, imm:$cc))],
- itins.rr>;
+ itins.rr>,
+ Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
(load addr:$src), imm:$cc))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
// Aliases to match intrinsics which expect XMM operand(s).
@@ -2276,12 +2346,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
- IIC_SSE_COMIS_RR, d>;
+ IIC_SSE_COMIS_RR, d>,
+ Sched<[WriteFAdd]>;
def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
(ld_frag addr:$src2)))],
- IIC_SSE_COMIS_RM, d>;
+ IIC_SSE_COMIS_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
}
let Defs = [EFLAGS] in {
@@ -2338,20 +2410,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
- IIC_SSE_CMPP_RR, d>;
+ IIC_SSE_CMPP_RR, d>,
+ Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
- IIC_SSE_CMPP_RM, d>;
+ IIC_SSE_CMPP_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rri_alt : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RR, d>;
+ asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>;
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RM, d>;
+ asm_alt, [], IIC_SSE_CMPP_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
}
}
@@ -2427,12 +2502,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffle]>;
}
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2516,13 +2593,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1, RC:$src2)))],
- IIC_SSE_UNPCK, d>;
+ IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>;
def rm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1,
(mem_frag addr:$src2))))],
- IIC_SSE_UNPCK, d>;
+ IIC_SSE_UNPCK, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
@@ -2613,10 +2691,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
Domain d> {
def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>;
+ [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
+ Sched<[WriteVecLogic]>;
def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
- IIC_SSE_MOVMSK, d>, REX_W;
+ IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>;
}
let Predicates = [HasAVX] in {
@@ -2644,18 +2723,18 @@ let Predicates = [HasAVX] in {
// Assembler Only
def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX;
+ SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>;
def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
SSEPackedDouble>, TB,
- OpSize, VEX;
+ OpSize, VEX, Sched<[WriteVecLogic]>;
def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX, VEX_L;
+ SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>;
def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
SSEPackedDouble>, TB,
- OpSize, VEX, VEX_L;
+ OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>;
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
@@ -2693,7 +2772,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>;
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
@@ -2701,7 +2781,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1,
(bitconvert (memop_frag addr:$src2)))))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
} // ExeDomain = SSEPackedInt
@@ -2967,6 +3048,7 @@ let isCodeGenOnly = 1 in {
///
/// And, we have a special variant form for a full-vector intrinsic form.
+let Sched = WriteFSqrt in {
def SSE_SQRTP : OpndItins<
IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
>;
@@ -2974,7 +3056,9 @@ def SSE_SQRTP : OpndItins<
def SSE_SQRTS : OpndItins<
IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
>;
+}
+let Sched = WriteFRcp in {
def SSE_RCPP : OpndItins<
IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
>;
@@ -2982,6 +3066,7 @@ def SSE_RCPP : OpndItins<
def SSE_RCPS : OpndItins<
IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
>;
+}
/// sse1_fp_unop_s - SSE1 unops in scalar form.
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
@@ -2991,24 +3076,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR32:$src1, FR32:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1,f32mem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
// For scalar unary operations, fold a load into the operation
// only in OptForSize mode. It eliminates an instruction, but it also
// eliminates a whole-register clobber (the load), so it introduces a
@@ -3016,13 +3103,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
+ [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
@@ -3033,24 +3122,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR32:$src1, FR32:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1,f32mem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
// For scalar unary operations, fold a load into the operation
// only in OptForSize mode. It eliminates an instruction, but it also
// eliminates a whole-register clobber (the load), so it introduces a
@@ -3058,17 +3149,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
let Constraints = "$src1 = $dst" in {
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rr>;
+ [], itins.rr>, Sched<[itins.Sched]>;
let mayLoad = 1, hasSideEffects = 0 in
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rm>;
+ [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -3080,30 +3171,32 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
@@ -3115,33 +3208,33 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V8F32Int VR256:$src))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
(ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
@@ -3152,35 +3245,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR64:$src1, FR64:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1,f64mem:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
+ [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
// See the comments in sse1_fp_unop_s for why this is OptForSize.
def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
+ [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -3191,30 +3289,32 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
// Square root.
@@ -3305,52 +3405,48 @@ let Predicates = [UseSSE1] in {
//===----------------------------------------------------------------------===//
let AddedComplexity = 400 in { // Prefer non-temporal versions
- def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
- def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
-
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2i64 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
-
- def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
-
- def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
- def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4i64 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
-}
+let SchedRW = [WriteStore] in {
+def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2i64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4i64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
-let AddedComplexity = 400 in { // Prefer non-temporal versions
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
@@ -3366,9 +3462,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
IIC_SSE_MOVNT>;
-def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
-
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti{l}\t{$src, $dst|$dst, $src}",
@@ -3380,14 +3473,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
[(nontemporalstore (i64 GR64:$src), addr:$dst)],
IIC_SSE_MOVNT>,
TB, Requires<[HasSSE2]>;
-}
+} // SchedRW = [WriteStore]
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
+} // AddedComplexity
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Prefetch and memory fence
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
-let Predicates = [HasSSE1] in {
+let Predicates = [HasSSE1], SchedRW = [WriteLoad] in {
def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
IIC_SSE_PREFETCH>, TB;
@@ -3402,6 +3502,8 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
IIC_SSE_PREFETCH>, TB;
}
+// FIXME: How should these memory instructions be modeled?
+let SchedRW = [WriteLoad] in {
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
@@ -3421,6 +3523,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
TB, Requires<[HasSSE2]>;
+} // SchedRW
def : Pat<(X86SFence), (SFENCE)>;
def : Pat<(X86LFence), (LFENCE)>;
@@ -3432,17 +3535,17 @@ def : Pat<(X86MFence), (MFENCE)>;
def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
- IIC_SSE_LDMXCSR>, VEX;
+ IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>;
def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
"stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
- IIC_SSE_STMXCSR>, VEX;
+ IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>;
def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
- IIC_SSE_LDMXCSR>;
+ IIC_SSE_LDMXCSR>, Sched<[WriteLoad]>;
def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
"stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
- IIC_SSE_STMXCSR>;
+ IIC_SSE_STMXCSR>, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
@@ -3450,7 +3553,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
let ExeDomain = SSEPackedInt in { // SSE integer instructions
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
VEX;
@@ -3466,7 +3569,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
}
// For Disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>,
@@ -3484,7 +3587,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
}
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- neverHasSideEffects = 1 in {
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
VEX;
@@ -3501,7 +3604,7 @@ let Predicates = [HasAVX] in {
}
}
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
@@ -3520,6 +3623,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
}
}
+let SchedRW = [WriteMove] in {
let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
@@ -3538,9 +3642,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
}
+} // SchedRW
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- neverHasSideEffects = 1 in {
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
@@ -3552,7 +3657,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
XS, Requires<[UseSSE2]>;
}
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
@@ -3580,6 +3685,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
+let Sched = WriteVecIMul in
def SSE_PMADD : OpndItins<
IIC_SSE_PMADD, IIC_SSE_PMADD
>;
@@ -3598,14 +3704,15 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
@@ -3639,20 +3746,22 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
- itins.rr>;
+ itins.rr>, Sched<[WriteVecShift]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1,
- (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>;
+ (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
+ Sched<[WriteVecShiftLd, ReadAfterLd]>;
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
(ins RC:$src1, i32i8imm:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
+ [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>,
+ Sched<[WriteVecShift]>;
}
/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
@@ -3667,14 +3776,16 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>;
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))]>;
+ (bitconvert (memop_frag addr:$src2)))))]>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
} // ExeDomain = SSEPackedInt
@@ -3779,7 +3890,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 128-bit logical shifts.
def VPSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3825,7 +3936,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR256, v8i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 256-bit logical shifts.
def VPSLLDQYri : PDIi8<0x73, MRM7r,
(outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
@@ -3871,7 +3982,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P>;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 128-bit logical shifts.
def PSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3966,14 +4077,15 @@ let Predicates = [HasAVX] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX;
+ IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX2] in {
@@ -3983,14 +4095,15 @@ let Predicates = [HasAVX2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX, VEX_L;
+ IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [UseSSE2] in {
@@ -4000,14 +4113,15 @@ let Predicates = [UseSSE2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>;
+ IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
def mi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>,
+ Sched<[WriteShuffleLd]>;
}
}
} // ExeDomain = SSEPackedInt
@@ -4043,7 +4157,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
- IIC_SSE_UNPCK>;
+ IIC_SSE_UNPCK>, Sched<[WriteShuffle]>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -4052,7 +4166,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
[(set VR128:$dst, (OpNode VR128:$src1,
(bc_frag (memopv2i64
addr:$src2))))],
- IIC_SSE_UNPCK>;
+ IIC_SSE_UNPCK>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
@@ -4060,12 +4175,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
def Yrr : PDI<opc, MRMSrcReg,
(outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
+ [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>,
+ Sched<[WriteShuffle]>;
def Yrm : PDI<opc, MRMSrcMem,
(outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (OpNode VR256:$src1,
- (bc_frag (memopv4i64 addr:$src2))))]>;
+ (bc_frag (memopv4i64 addr:$src2))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -4142,7 +4259,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>;
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffle]>;
def rmi : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, i32i8imm:$src3),
@@ -4151,7 +4269,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- imm:$src3))], IIC_SSE_PINSRW>;
+ imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
// Extract
@@ -4160,12 +4279,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>, TB, OpSize, VEX;
+ imm:$src2))]>, TB, OpSize, VEX,
+ Sched<[WriteShuffle]>;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))], IIC_SSE_PEXTRW>;
+ imm:$src2))], IIC_SSE_PEXTRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
// Insert
let Predicates = [HasAVX] in {
@@ -4173,7 +4294,7 @@ let Predicates = [HasAVX] in {
def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, TB, OpSize, VEX_4V;
+ []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>;
}
let Constraints = "$src1 = $dst" in
@@ -4185,7 +4306,7 @@ let Constraints = "$src1 = $dst" in
// SSE2 - Packed Mask Creation
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
@@ -4213,7 +4334,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
// SSE2 - Conditional Store
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
let Uses = [EDI] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
@@ -4252,41 +4373,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteMove]>;
def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>;
+ (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+ Sched<[WriteMove]>;
def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Single Scalar
@@ -4294,22 +4416,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int to Packed Double Int
@@ -4317,26 +4439,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
+ Sched<[WriteMove]>;
def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))], IIC_SSE_MOVD_ToGP>;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
+ Sched<[WriteMove]>;
def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int first element to Doubleword Int
//
+let SchedRW = [WriteMove] in {
def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"vmov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
@@ -4349,6 +4474,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>;
+} //SchedRW
//===---------------------------------------------------------------------===//
// Bitcast FR64 <-> GR64
@@ -4357,28 +4483,28 @@ let Predicates = [HasAVX] in
def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVD_ToGP>;
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// Move Scalar Single to Double Int
@@ -4386,23 +4512,24 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>, VEX;
+ IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>;
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// Patterns and instructions to describe movd/movq to XMM register zero-extends
//
+let SchedRW = [WriteMove] in {
let AddedComplexity = 15 in {
def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -4428,8 +4555,9 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
(v2i64 (scalar_to_vector GR64:$src)))))],
IIC_SSE_MOVDQ>;
}
+} // SchedRW
-let AddedComplexity = 20 in {
+let AddedComplexity = 20, SchedRW = [WriteLoad] in {
def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4442,7 +4570,7 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
(v4i32 (X86vzmovl (v4i32 (scalar_to_vector
(loadi32 addr:$src))))))],
IIC_SSE_MOVDQ>;
-}
+} // AddedComplexity, SchedRW
let Predicates = [HasAVX] in {
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
@@ -4491,6 +4619,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
//===---------------------------------------------------------------------===//
// Move Quadword Int to Packed Quadword Int
//
+
+let SchedRW = [WriteLoad] in {
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4502,10 +4632,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (scalar_to_vector (loadi64 addr:$src))))],
IIC_SSE_MOVDQ>, XS,
Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
+} // SchedRW
//===---------------------------------------------------------------------===//
// Move Packed Quadword Int to Quadword Int
//
+let SchedRW = [WriteStore] in {
def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -4516,17 +4648,19 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(store (i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>;
+} // SchedRW
//===---------------------------------------------------------------------===//
// Store / copy lower 64-bits of a XMM register.
//
def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX,
+ Sched<[WriteStore]>;
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
let AddedComplexity = 20 in
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4535,7 +4669,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, VEX, Requires<[HasAVX]>;
+ XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>;
let AddedComplexity = 20 in
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4544,7 +4678,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, Requires<[UseSSE2]>;
+ XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
let Predicates = [HasAVX], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
@@ -4574,6 +4708,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)),
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
//
+let SchedRW = [WriteVecLogic] in {
let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -4586,7 +4721,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
IIC_SSE_MOVQ_RR>,
XS, Requires<[UseSSE2]>;
+} // SchedRW
+let SchedRW = [WriteVecLogicLd] in {
let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -4602,6 +4739,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
IIC_SSE_MOVDQ>,
XS, Requires<[UseSSE2]>;
}
+} // SchedRW
let AddedComplexity = 20 in {
let Predicates = [HasAVX] in {
@@ -4619,6 +4757,7 @@ let AddedComplexity = 20 in {
}
// Instructions to match in the assembler
+let SchedRW = [WriteMove] in {
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVDQ>, VEX, VEX_W;
@@ -4629,16 +4768,19 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVDQ>, VEX, VEX_W;
+} // SchedRW
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
+let SchedRW = [WriteMove] in {
let Predicates = [HasAVX] in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
+} // SchedRW
//===---------------------------------------------------------------------===//
// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
@@ -4649,11 +4791,11 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (vt (OpNode RC:$src)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (OpNode (mem_frag addr:$src)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX] in {
@@ -4709,25 +4851,27 @@ multiclass sse3_replicate_dfp<string OpcodeStr> {
let neverHasSideEffects = 1 in
def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [], IIC_SSE_MOV_LH>;
+ [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(v2f64 (X86Movddup
(scalar_to_vector (loadf64 addr:$src)))))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
}
// FIXME: Merge with above classe when there're patterns for the ymm version
multiclass sse3_replicate_dfp_y<string OpcodeStr> {
def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>;
+ [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
+ Sched<[WriteShuffle]>;
def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(v4f64 (X86Movddup
- (scalar_to_vector (loadf64 addr:$src)))))]>;
+ (scalar_to_vector (loadf64 addr:$src)))))]>,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX] in {
@@ -4775,6 +4919,7 @@ let Predicates = [UseSSE3] in {
// SSE3 - Move Unaligned Integer
//===---------------------------------------------------------------------===//
+let SchedRW = [WriteLoad] in {
let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
@@ -4788,6 +4933,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
IIC_SSE_LDDQU>;
+}
//===---------------------------------------------------------------------===//
// SSE3 - Arithmetic
@@ -4801,13 +4947,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : I<0xD0, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>;
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -4844,14 +4992,15 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+ Sched<[WriteFAdd]>;
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
- IIC_SSE_HADDSUB_RM>;
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
@@ -4859,14 +5008,15 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+ Sched<[WriteFAdd]>;
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
- IIC_SSE_HADDSUB_RM>;
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -4915,7 +5065,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>,
- OpSize;
+ OpSize, Sched<[WriteVecALU]>;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src),
@@ -4923,7 +5073,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst,
(IntId128
(bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
- OpSize;
+ OpSize, Sched<[WriteVecALULd]>;
}
/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
@@ -4933,14 +5083,15 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
(ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (IntId256 VR256:$src))]>,
- OpSize;
+ OpSize, Sched<[WriteVecALU]>;
def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(IntId256
- (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src))))]>, OpSize,
+ Sched<[WriteVecALULd]>;
}
let Predicates = [HasAVX] in {
@@ -4972,6 +5123,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
// SSSE3 - Packed Binary Operator Instructions
//===---------------------------------------------------------------------===//
+let Sched = WriteVecALU in {
def SSE_PHADDSUBD : OpndItins<
IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
>;
@@ -4981,12 +5133,16 @@ def SSE_PHADDSUBSW : OpndItins<
def SSE_PHADDSUBW : OpndItins<
IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
>;
+}
+let Sched = WriteShuffle in
def SSE_PSHUFB : OpndItins<
IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
>;
+let Sched = WriteVecALU in
def SSE_PSIGN : OpndItins<
IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM
>;
+let Sched = WriteVecIMul in
def SSE_PMULHRSW : OpndItins<
IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW
>;
@@ -5003,7 +5159,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
- OpSize;
+ OpSize, Sched<[itins.Sched]>;
def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
@@ -5011,7 +5167,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize;
+ (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
@@ -5025,7 +5182,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize;
+ OpSize, Sched<[itins.Sched]>;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -5033,7 +5190,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
@@ -5175,7 +5333,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize;
+ [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>;
let mayLoad = 1 in
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
@@ -5183,7 +5341,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize;
+ [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
}
@@ -5193,13 +5351,13 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+ []>, OpSize, Sched<[WriteShuffle]>;
let mayLoad = 1 in
def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+ []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
}
@@ -5247,6 +5405,7 @@ def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
// SSSE3 - Thread synchronization
//===---------------------------------------------------------------------===//
+let SchedRW = [WriteSystem] in {
let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
[(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
@@ -5260,6 +5419,7 @@ let Uses = [ECX, EAX] in
def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
[(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
TB, Requires<[HasSSE3]>;
+} // SchedRW
def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
@@ -6679,7 +6839,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
- IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+ NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
@@ -6688,7 +6848,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
[(set RC:$dst,
(IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
RC:$src3))],
- IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+ NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
}
let Predicates = [HasAVX] in {
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index 1185941d34..5b6298b541 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -15,7 +15,7 @@
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
"shl{b}\t{%cl, $dst|$dst, CL}",
@@ -62,9 +62,10 @@ def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
"shl{q}\t$dst", [], IIC_SR>;
} // hasSideEffects = 0
} // isConvertibleToThreeAddress = 1
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
// using CL?
let Uses = [CL] in {
@@ -118,8 +119,9 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
"shl{q}\t$dst",
[(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
IIC_SR>;
+} // SchedRW
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
"shr{b}\t{%cl, $dst|$dst, CL}",
@@ -163,9 +165,10 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
"shr{q}\t$dst",
[(set GR64:$dst, (srl GR64:$src1, (i8 1)))], IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
let Uses = [CL] in {
def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
"shr{b}\t{%cl, $dst|$dst, CL}",
@@ -216,8 +219,9 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
"shr{q}\t$dst",
[(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
IIC_SR>;
+} // SchedRW
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
"sar{b}\t{%cl, $dst|$dst, CL}",
@@ -273,9 +277,10 @@ def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
"sar{q}\t$dst",
[(set GR64:$dst, (sra GR64:$src1, (i8 1)))],
IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
let Uses = [CL] in {
def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
"sar{b}\t{%cl, $dst|$dst, CL}",
@@ -330,13 +335,14 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
"sar{q}\t$dst",
[(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)],
IIC_SR>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Rotate instructions
//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t$dst", [], IIC_SR>;
def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
@@ -405,6 +411,7 @@ def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
} // Constraints = "$src = $dst"
+let SchedRW = [WriteShiftLd, WriteRMW] in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t$dst", [], IIC_SR>;
def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
@@ -458,9 +465,10 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
}
+} // SchedRW
} // hasSideEffects = 0
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
// FIXME: provide shorter instructions when imm8 == 1
let Uses = [CL] in {
def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -512,8 +520,9 @@ def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
"rol{q}\t$dst",
[(set GR64:$dst, (rotl GR64:$src1, (i8 1)))],
IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
let Uses = [CL] in {
def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
"rol{b}\t{%cl, $dst|$dst, CL}",
@@ -568,8 +577,9 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
"rol{q}\t$dst",
[(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
IIC_SR>;
+} // SchedRW
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"ror{b}\t{%cl, $dst|$dst, CL}",
@@ -620,8 +630,9 @@ def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
"ror{q}\t$dst",
[(set GR64:$dst, (rotr GR64:$src1, (i8 1)))],
IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
let Uses = [CL] in {
def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t{%cl, $dst|$dst, CL}",
@@ -676,13 +687,14 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
"ror{q}\t$dst",
[(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)],
IIC_SR>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Double shift instructions (generalizations of rotate)
//===----------------------------------------------------------------------===//
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
@@ -765,8 +777,9 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
(i8 imm:$src3)))], IIC_SHD64_REG_IM>,
TB;
}
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
+let SchedRW = [WriteShiftLd, WriteRMW] in {
let Uses = [CL] in {
def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
@@ -840,6 +853,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD64_MEM_IM>,
TB;
+} // SchedRW
} // Defs = [EFLAGS]
@@ -857,12 +871,12 @@ multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
let neverHasSideEffects = 1 in {
def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, TAXD, VEX;
+ []>, TAXD, VEX, Sched<[WriteShift]>;
let mayLoad = 1 in
def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
(ins x86memop:$src1, i8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, TAXD, VEX;
+ []>, TAXD, VEX, Sched<[WriteShiftLd]>;
}
}
@@ -870,11 +884,17 @@ multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
let neverHasSideEffects = 1 in {
def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- VEX_4VOp3;
+ VEX_4VOp3, Sched<[WriteShift]>;
let mayLoad = 1 in
def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- VEX_4VOp3;
+ VEX_4VOp3,
+ Sched<[WriteShiftLd,
+ // x86memop:$src1
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault,
+ // RC:$src1
+ ReadAfterLd]>;
}
}
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 3caa1b538c..053417ccde 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+let SchedRW = [WriteSystem] in {
let Defs = [RAX, RDX] in
def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>,
TB;
@@ -35,6 +36,7 @@ let Uses = [EFLAGS] in
def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
[(int_x86_int (i8 3))], IIC_INT3>;
+} // SchedRW
def : Pat<(debugtrap),
(INT3)>;
@@ -43,6 +45,7 @@ def : Pat<(debugtrap),
// FIXME: This doesn't work because InstAlias can't match immediate constants.
//def : InstAlias<"int\t$3", (INT3)>;
+let SchedRW = [WriteSystem] in {
def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)], IIC_INT>;
@@ -65,11 +68,13 @@ def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>;
def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
Requires<[In64BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Input/Output Instructions.
//
+let SchedRW = [WriteSystem] in {
let Defs = [AL], Uses = [DX] in
def IN8rr : I<0xEC, RawFrm, (outs), (ins),
"in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>;
@@ -113,10 +118,12 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>;
def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize;
def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from debug registers
+let SchedRW = [WriteSystem] in {
def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB;
def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
@@ -126,10 +133,12 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from control registers
+let SchedRW = [WriteSystem] in {
def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB;
def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
@@ -139,6 +148,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Segment override instruction prefixes
@@ -155,6 +165,7 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
// Moves to and from segment registers.
//
+let SchedRW = [WriteMove] in {
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
"mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize;
def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
@@ -182,10 +193,12 @@ def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Segmentation support instructions.
+let SchedRW = [WriteSystem] in {
def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
@@ -347,10 +360,12 @@ def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
"verw\t$seg", [], IIC_VERW_MEM>, TB;
def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
"verw\t$seg", [], IIC_VERW_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Descriptor-table support instructions
+let SchedRW = [WriteSystem] in {
def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
"sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
@@ -385,9 +400,11 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
"lldt{w}\t$src", [], IIC_LLDT_REG>, TB;
def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
"lldt{w}\t$src", [], IIC_LLDT_MEM>, TB;
-
+} // SchedRW
+
//===----------------------------------------------------------------------===//
// Specialized register support
+let SchedRW = [WriteSystem] in {
def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB;
@@ -410,14 +427,18 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
"lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Cache instructions
+let SchedRW = [WriteSystem] in {
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB;
def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// XSAVE instructions
+let SchedRW = [WriteSystem] in {
let Defs = [RDX, RAX], Uses = [RCX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
@@ -438,6 +459,7 @@ let Uses = [RDX, RAX] in {
def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
"xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
}
+} // SchedRW
//===----------------------------------------------------------------------===//
// VIA PadLock crypto instructions
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index a37a8cc744..363a190aa8 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -15,6 +15,9 @@
//===----------------------------------------------------------------------===//
// TSX instructions
+def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
let usesCustomInserter = 1 in
def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
"# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
@@ -27,6 +30,10 @@ def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst),
def XEND : I<0x01, MRM_D5, (outs), (ins),
"xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
+let Defs = [EFLAGS] in
+def XTEST : I<0x01, MRM_D6, (outs), (ins),
+ "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasTSX]>;
+
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm",
[(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 3af1b3e06b..a8a9fd8acc 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -407,6 +407,57 @@ ReSimplify:
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
+ // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
+ // if one of the registers is extended, but other isn't.
+ case X86::VMOVAPDrr:
+ case X86::VMOVAPDYrr:
+ case X86::VMOVAPSrr:
+ case X86::VMOVAPSYrr:
+ case X86::VMOVDQArr:
+ case X86::VMOVDQAYrr:
+ case X86::VMOVDQUrr:
+ case X86::VMOVDQUYrr:
+ case X86::VMOVUPDrr:
+ case X86::VMOVUPDYrr:
+ case X86::VMOVUPSrr:
+ case X86::VMOVUPSYrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+ case X86::VMOVSDrr:
+ case X86::VMOVSSrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
+ case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+
// TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
// inputs modeled as normal uses instead of implicit uses. As such, truncate
// off all but the first operand (the callee). FIXME: Change isel.
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
new file mode 100644
index 0000000000..b3eb460d3c
--- /dev/null
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -0,0 +1,126 @@
+//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Haswell to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def HaswellModel : SchedMachineModel {
+ // All x86 instructions are modeled as a single micro-op, and HW can decode 4
+ // instructions per cycle.
+ let IssueWidth = 4;
+ let MinLatency = 0; // 0 = Out-of-order execution.
+ let LoadLatency = 4;
+ let ILPWindow = 40;
+ let MispredictPenalty = 16;
+}
+
+let SchedModel = HaswellModel in {
+
+// Haswell can issue micro-ops to 8 different ports in one cycle.
+
+// Ports 0, 1, 5, 6 and 7 handle all computation.
+// Port 4 gets the data half of stores. Store data can be available later than
+// the store address, but since we don't model the latency of stores, we can
+// ignore that.
+// Ports 2 and 3 are identical. They handle loads and the address half of
+// stores. Port 7 can handle address calculations.
+def HWPort0 : ProcResource<1>;
+def HWPort1 : ProcResource<1>;
+def HWPort2 : ProcResource<1>;
+def HWPort3 : ProcResource<1>;
+def HWPort4 : ProcResource<1>;
+def HWPort5 : ProcResource<1>;
+def HWPort6 : ProcResource<1>;
+def HWPort7 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>;
+def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
+def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>;
+def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>;
+def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>;
+def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
+def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
+
+// Integer division issued on port 0, but uses the non-pipelined divider.
+def HWDivider : ProcResource<1> { let Buffered = 0; }
+
+// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> {
+ let Latency = !add(Lat, 4);
+ }
+}
+
+// A folded store needs a cycle on port 4 for the store data, but it does not
+// need an extra port 2/3 cycle to recompute the address.
+def : WriteRes<WriteRMW, [HWPort4]>;
+
+def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
+def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 4; }
+def : WriteRes<WriteMove, [HWPort0156]>;
+def : WriteRes<WriteZero, []>;
+
+defm : HWWriteResPair<WriteALU, HWPort0156, 1>;
+defm : HWWriteResPair<WriteIMul, HWPort1, 3>;
+defm : HWWriteResPair<WriteShift, HWPort056, 1>;
+defm : HWWriteResPair<WriteJump, HWPort5, 1>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [HWPort15]>;
+
+// This is quite rough, latency depends on the dividend.
+def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> {
+ let Latency = 25;
+ let ResourceCycles = [1, 10];
+}
+def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> {
+ let Latency = 29;
+ let ResourceCycles = [1, 1, 10];
+}
+
+// Scalar and vector floating point.
+defm : HWWriteResPair<WriteFAdd, HWPort1, 3>;
+defm : HWWriteResPair<WriteFMul, HWPort0, 5>;
+defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles.
+defm : HWWriteResPair<WriteFRcp, HWPort0, 5>;
+defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>;
+defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
+defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
+defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>;
+
+// Vector integer operations.
+defm : HWWriteResPair<WriteVecShift, HWPort05, 1>;
+defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>;
+defm : HWWriteResPair<WriteVecALU, HWPort15, 1>;
+defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>;
+defm : HWWriteResPair<WriteShuffle, HWPort15, 1>;
+
+def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
+} // SchedModel
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
new file mode 100644
index 0000000000..66d78e4fc4
--- /dev/null
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -0,0 +1,122 @@
+//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Sandy Bridge to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def SandyBridgeModel : SchedMachineModel {
+ // All x86 instructions are modeled as a single micro-op, and SB can decode 4
+ // instructions per cycle.
+ // FIXME: Identify instructions that aren't a single fused micro-op.
+ let IssueWidth = 4;
+ let MinLatency = 0; // 0 = Out-of-order execution.
+ let LoadLatency = 4;
+ let ILPWindow = 30;
+ let MispredictPenalty = 16;
+}
+
+let SchedModel = SandyBridgeModel in {
+
+// Sandy Bridge can issue micro-ops to 6 different ports in one cycle.
+
+// Ports 0, 1, and 5 handle all computation.
+def SBPort0 : ProcResource<1>;
+def SBPort1 : ProcResource<1>;
+def SBPort5 : ProcResource<1>;
+
+// Ports 2 and 3 are identical. They handle loads and the address half of
+// stores.
+def SBPort23 : ProcResource<2>;
+
+// Port 4 gets the data half of stores. Store data can be available later than
+// the store address, but since we don't model the latency of stores, we can
+// ignore that.
+def SBPort4 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>;
+def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>;
+def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
+
+// Integer division issued on port 0, but uses the non-pipelined divider.
+def SBDivider : ProcResource<1> { let Buffered = 0; }
+
+// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> {
+ let Latency = !add(Lat, 4);
+ }
+}
+
+// A folded store needs a cycle on port 4 for the store data, but it does not
+// need an extra port 2/3 cycle to recompute the address.
+def : WriteRes<WriteRMW, [SBPort4]>;
+
+def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
+def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 4; }
+def : WriteRes<WriteMove, [SBPort015]>;
+def : WriteRes<WriteZero, []>;
+
+defm : SBWriteResPair<WriteALU, SBPort015, 1>;
+defm : SBWriteResPair<WriteIMul, SBPort1, 3>;
+defm : SBWriteResPair<WriteShift, SBPort05, 1>;
+defm : SBWriteResPair<WriteJump, SBPort5, 1>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [SBPort15]>;
+
+// This is quite rough, latency depends on the dividend.
+def : WriteRes<WriteIDiv, [SBPort0, SBDivider]> {
+ let Latency = 25;
+ let ResourceCycles = [1, 10];
+}
+def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> {
+ let Latency = 29;
+ let ResourceCycles = [1, 1, 10];
+}
+
+// Scalar and vector floating point.
+defm : SBWriteResPair<WriteFAdd, SBPort1, 3>;
+defm : SBWriteResPair<WriteFMul, SBPort0, 5>;
+defm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles.
+defm : SBWriteResPair<WriteFRcp, SBPort0, 5>;
+defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>;
+defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
+defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
+defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
+
+// Vector integer operations.
+defm : SBWriteResPair<WriteVecShift, SBPort05, 1>;
+defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
+defm : SBWriteResPair<WriteVecALU, SBPort15, 1>;
+defm : SBWriteResPair<WriteVecIMul, SBPort0, 5>;
+defm : SBWriteResPair<WriteShuffle, SBPort15, 1>;
+
+def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
+} // SchedModel
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index d99d085298..9fbde88b71 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -7,9 +7,94 @@
//
//===----------------------------------------------------------------------===//
+// InstrSchedModel annotations for out-of-order CPUs.
+//
+// These annotations are independent of the itinerary classes defined below.
+
+// Instructions with folded loads need to read the memory operand immediately,
+// but other register operands don't have to be read until the load is ready.
+// These operands are marked with ReadAfterLd.
+def ReadAfterLd : SchedRead;
+
+// Instructions with both a load and a store folded are modeled as a folded
+// load + WriteRMW.
+def WriteRMW : SchedWrite;
+
+// Most instructions can fold loads, so almost every SchedWrite comes in two
+// variants: With and without a folded load.
+// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
+// with a folded load.
+class X86FoldableSchedWrite : SchedWrite {
+ // The SchedWrite to use when a load is folded into the instruction.
+ SchedWrite Folded;
+}
+
+// Multiclass that produces a linked pair of SchedWrites.
+multiclass X86SchedWritePair {
+ // Register-Memory operation.
+ def Ld : SchedWrite;
+ // Register-Register operation.
+ def NAME : X86FoldableSchedWrite {
+ let Folded = !cast<SchedWrite>(NAME#"Ld");
+ }
+}
+
+// Arithmetic.
+defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
+defm WriteIMul : X86SchedWritePair; // Integer multiplication.
+defm WriteIDiv : X86SchedWritePair; // Integer division.
+def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
+
+// Integer shifts and rotates.
+defm WriteShift : X86SchedWritePair;
+
+// Loads, stores, and moves, not folded with other operations.
+def WriteLoad : SchedWrite;
+def WriteStore : SchedWrite;
+def WriteMove : SchedWrite;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def WriteZero : SchedWrite;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm WriteJump : X86SchedWritePair;
+
+// Floating point. This covers both scalar and vector operations.
+defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
+defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
+defm WriteFDiv : X86SchedWritePair; // Floating point division.
+defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
+defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal.
+defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
+
+// FMA Scheduling helper class.
+class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
+
+// Vector integer operations.
+defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
+defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
+
+// Vector bitwise operations.
+// These are often used on both floating point and integer vectors.
+defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
+defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends.
+
+// Conversion between integer and float.
+defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
+defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
+defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
+
+// Catch-all for expensive system instructions.
+def WriteSystem : SchedWrite;
+
+// Old microcoded instructions that nobody use.
+def WriteMicrocoded : SchedWrite;
+
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for X86
-def IIC_DEFAULT : InstrItinClass;
def IIC_ALU_MEM : InstrItinClass;
def IIC_ALU_NONMEM : InstrItinClass;
def IIC_LEA : InstrItinClass;
@@ -484,3 +569,5 @@ def GenericModel : SchedMachineModel {
}
include "X86ScheduleAtom.td"
+include "X86SchedSandyBridge.td"
+include "X86SchedHaswell.td"
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 1e5f2d6c9a..cce8f1b114 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -33,7 +33,6 @@ def AtomItineraries : ProcessorItineraries<
// InstrItinData<class, [InstrStage<N, [P0], 0>, InstrStage<N, [P1]>] >,
//
// Default is 1 cycle, port0 or port1
- InstrItinData<IIC_DEFAULT, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >,
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 0f2c008ab9..4132463ee8 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -283,6 +283,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasLZCNT = true;
ToggleFeature(X86::FeatureLZCNT);
}
+ if (IsIntel && ((ECX >> 8) & 0x1)) {
+ HasPRFCHW = true;
+ ToggleFeature(X86::FeaturePRFCHW);
+ }
if (IsAMD) {
if ((ECX >> 6) & 0x1) {
HasSSE4A = true;
@@ -310,6 +314,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasBMI = true;
ToggleFeature(X86::FeatureBMI);
}
+ if ((EBX >> 4) & 0x1) {
+ HasHLE = true;
+ ToggleFeature(X86::FeatureHLE);
+ }
if (IsIntel && ((EBX >> 5) & 0x1)) {
X86SSELevel = AVX2;
ToggleFeature(X86::FeatureAVX2);
@@ -322,6 +330,14 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasRTM = true;
ToggleFeature(X86::FeatureRTM);
}
+ if (IsIntel && ((EBX >> 19) & 0x1)) {
+ HasADX = true;
+ ToggleFeature(X86::FeatureADX);
+ }
+ if (IsIntel && ((EBX >> 18) & 0x1)) {
+ HasRDSEED = true;
+ ToggleFeature(X86::FeatureRDSEED);
+ }
}
}
}
@@ -439,7 +455,10 @@ void X86Subtarget::initializeEnvironment() {
HasBMI = false;
HasBMI2 = false;
HasRTM = false;
+ HasHLE = false;
HasADX = false;
+ HasPRFCHW = false;
+ HasRDSEED = false;
IsBTMemSlow = false;
IsUAMemFast = false;
HasVectorUAMem = false;
@@ -448,6 +467,7 @@ void X86Subtarget::initializeEnvironment() {
HasSlowDivide = false;
PostRAScheduler = false;
PadShortFunctions = false;
+ CallRegIndirect = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index e97da4b6f4..6fbdb1d5f0 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -121,9 +121,18 @@ protected:
/// HasRTM - Processor has RTM instructions.
bool HasRTM;
+ /// HasHLE - Processor has HLE.
+ bool HasHLE;
+
/// HasADX - Processor has ADX instructions.
bool HasADX;
+ /// HasPRFCHW - Processor has PRFCHW instructions.
+ bool HasPRFCHW;
+
+ /// HasRDSEED - Processor has RDSEED instructions.
+ bool HasRDSEED;
+
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@@ -153,6 +162,10 @@ protected:
/// a stall when returning too early.
bool PadShortFunctions;
+ /// CallRegIndirect - True if the Calls with memory reference should be converted
+ /// to a register-based indirect call.
+ bool CallRegIndirect;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -253,7 +266,10 @@ public:
bool hasBMI() const { return HasBMI; }
bool hasBMI2() const { return HasBMI2; }
bool hasRTM() const { return HasRTM; }
+ bool hasHLE() const { return HasHLE; }
bool hasADX() const { return HasADX; }
+ bool hasPRFCHW() const { return HasPRFCHW; }
+ bool hasRDSEED() const { return HasRDSEED; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
@@ -261,6 +277,7 @@ public:
bool useLeaForSP() const { return UseLeaForSP; }
bool hasSlowDivide() const { return HasSlowDivide; }
bool padShortFunctions() const { return PadShortFunctions; }
+ bool callRegIndirect() const { return CallRegIndirect; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index be2a997b8e..2336035bea 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -169,6 +169,29 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ static const CostTblEntry<MVT> AVX2CostTable[] = {
+ // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
+ // customize them to detect the cases where shift amount is a scalar one.
+ { ISD::SHL, MVT::v4i32, 1 },
+ { ISD::SRL, MVT::v4i32, 1 },
+ { ISD::SRA, MVT::v4i32, 1 },
+ { ISD::SHL, MVT::v8i32, 1 },
+ { ISD::SRL, MVT::v8i32, 1 },
+ { ISD::SRA, MVT::v8i32, 1 },
+ { ISD::SHL, MVT::v2i64, 1 },
+ { ISD::SRL, MVT::v2i64, 1 },
+ { ISD::SHL, MVT::v4i64, 1 },
+ { ISD::SRL, MVT::v4i64, 1 },
+ };
+
+ // Look for AVX2 lowering tricks.
+ if (ST->hasAVX2()) {
+ int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX2CostTable[Idx].Cost;
+ }
+
static const CostTblEntry<MVT> AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
@@ -248,17 +271,40 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
- { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 },
+
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
+
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
+
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
};
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 019c4570d9..6b6480e4b4 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -409,7 +409,7 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
if (RegInfo->requiresRegisterScavenging(MF)) {
// Reserve a slot close to SP or frame pointer.
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index a75212a386..bc5109b4d4 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1,4 +1,4 @@
-//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===//
+//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,6 +14,8 @@
// to the function does not create any copies of the pointer value that
// outlive the call. This more or less means that the pointer is only
// dereferenced, and not returned from the function or stored in a global.
+// Finally, well-known library call declarations are marked with all
+// attributes that are consistent with the function's standard definition.
// This pass is implemented as a bottom-up traversal of the call-graph.
//
//===----------------------------------------------------------------------===//
@@ -32,12 +34,14 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+STATISTIC(NumAnnotated, "Number of attributes added to library functions");
namespace {
struct FunctionAttrs : public CallGraphSCCPass {
@@ -62,14 +66,63 @@ namespace {
// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
bool AddNoAliasAttrs(const CallGraphSCC &SCC);
+ // Utility methods used by inferPrototypeAttributes to add attributes
+ // and maintain annotation statistics.
+
+ void setDoesNotAccessMemory(Function &F) {
+ if (!F.doesNotAccessMemory()) {
+ F.setDoesNotAccessMemory();
+ ++NumAnnotated;
+ }
+ }
+
+ void setOnlyReadsMemory(Function &F) {
+ if (!F.onlyReadsMemory()) {
+ F.setOnlyReadsMemory();
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotThrow(Function &F) {
+ if (!F.doesNotThrow()) {
+ F.setDoesNotThrow();
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotCapture(Function &F, unsigned n) {
+ if (!F.doesNotCapture(n)) {
+ F.setDoesNotCapture(n);
+ ++NumAnnotated;
+ }
+ }
+
+ void setDoesNotAlias(Function &F, unsigned n) {
+ if (!F.doesNotAlias(n)) {
+ F.setDoesNotAlias(n);
+ ++NumAnnotated;
+ }
+ }
+
+ // inferPrototypeAttributes - Analyze the name and prototype of the
+ // given function and set any applicable attributes. Returns true
+ // if any attributes were set and false otherwise.
+ bool inferPrototypeAttributes(Function &F);
+
+ // annotateLibraryCalls - Adds attributes to well-known standard library
+ // call declarations.
+ bool annotateLibraryCalls(const CallGraphSCC &SCC);
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
private:
AliasAnalysis *AA;
+ TargetLibraryInfo *TLI;
};
}
@@ -77,6 +130,7 @@ char FunctionAttrs::ID = 0;
INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
@@ -598,10 +652,693 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
return MadeChange;
}
+/// inferPrototypeAttributes - Analyze the name and prototype of the
+/// given function and set any applicable attributes. Returns true
+/// if any attributes were set and false otherwise.
+bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
+ FunctionType *FTy = F.getFunctionType();
+ LibFunc::Func TheLibFunc;
+ if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
+ return false;
+
+ switch (TheLibFunc) {
+ case LibFunc::strlen:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::strchr:
+ case LibFunc::strrchr:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isIntegerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::strcpy:
+ case LibFunc::stpcpy:
+ case LibFunc::strcat:
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strncat:
+ case LibFunc::strncpy:
+ case LibFunc::stpncpy:
+ case LibFunc::strtoull:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strxfrm:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strcmp:
+ case LibFunc::strspn:
+ case LibFunc::strncmp:
+ case LibFunc::strcspn:
+ case LibFunc::strcoll:
+ case LibFunc::strcasecmp:
+ case LibFunc::strncasecmp:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strstr:
+ case LibFunc::strpbrk:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::strtok:
+ case LibFunc::strtok_r:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::scanf:
+ case LibFunc::setbuf:
+ case LibFunc::setvbuf:
+ if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::strdup:
+ case LibFunc::strndup:
+ if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::stat:
+ case LibFunc::sscanf:
+ case LibFunc::sprintf:
+ case LibFunc::statvfs:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::snprintf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::setitimer:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::system:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "system" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::malloc:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::memcmp:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::memchr:
+ case LibFunc::memrchr:
+ if (FTy->getNumParams() != 3)
+ return false;
+ setOnlyReadsMemory(F);
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::modf:
+ case LibFunc::modff:
+ case LibFunc::modfl:
+ case LibFunc::memcpy:
+ case LibFunc::memccpy:
+ case LibFunc::memmove:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::memalign:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::mkdir:
+ case LibFunc::mktime:
+ if (FTy->getNumParams() == 0 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::realloc:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::read:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "read" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::rmdir:
+ case LibFunc::rewind:
+ case LibFunc::remove:
+ case LibFunc::realpath:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::rename:
+ case LibFunc::readlink:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::write:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; "write" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bcopy:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bcmp:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::bzero:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::calloc:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::chmod:
+ case LibFunc::chown:
+ case LibFunc::ctermid:
+ case LibFunc::clearerr:
+ case LibFunc::closedir:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::atoi:
+ case LibFunc::atol:
+ case LibFunc::atof:
+ case LibFunc::atoll:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::access:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::fopen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fdopen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::feof:
+ case LibFunc::free:
+ case LibFunc::fseek:
+ case LibFunc::ftell:
+ case LibFunc::fgetc:
+ case LibFunc::fseeko:
+ case LibFunc::ftello:
+ case LibFunc::fileno:
+ case LibFunc::fflush:
+ case LibFunc::fclose:
+ case LibFunc::fsetpos:
+ case LibFunc::flockfile:
+ case LibFunc::funlockfile:
+ case LibFunc::ftrylockfile:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::ferror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setOnlyReadsMemory(F);
+ break;
+ case LibFunc::fputc:
+ case LibFunc::fstat:
+ case LibFunc::frexp:
+ case LibFunc::frexpf:
+ case LibFunc::frexpl:
+ case LibFunc::fstatvfs:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fgets:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 3);
+ case LibFunc::fread:
+ case LibFunc::fwrite:
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 4);
+ case LibFunc::fputs:
+ case LibFunc::fscanf:
+ case LibFunc::fprintf:
+ case LibFunc::fgetpos:
+ if (FTy->getNumParams() < 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::getc:
+ case LibFunc::getlogin_r:
+ case LibFunc::getc_unlocked:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::getenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setOnlyReadsMemory(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::gets:
+ case LibFunc::getchar:
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::getitimer:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::getpwnam:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::ungetc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::uname:
+ case LibFunc::unlink:
+ case LibFunc::unsetenv:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::utime:
+ case LibFunc::utimes:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::puts:
+ case LibFunc::printf:
+ case LibFunc::perror:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::pread:
+ case LibFunc::pwrite:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ // May throw; these are valid pthread cancellation points.
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::putchar:
+ setDoesNotThrow(F);
+ break;
+ case LibFunc::popen:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::pclose:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vscanf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vsscanf:
+ case LibFunc::vfscanf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::valloc:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::vprintf:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::vfprintf:
+ case LibFunc::vsprintf:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::vsnprintf:
+ if (FTy->getNumParams() != 4 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 3);
+ break;
+ case LibFunc::open:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::opendir:
+ if (FTy->getNumParams() != 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::tmpfile:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::times:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::htonl:
+ case LibFunc::htons:
+ case LibFunc::ntohl:
+ case LibFunc::ntohs:
+ setDoesNotThrow(F);
+ setDoesNotAccessMemory(F);
+ break;
+ case LibFunc::lstat:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::lchown:
+ if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::qsort:
+ if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+ return false;
+ // May throw; places call through function pointer.
+ setDoesNotCapture(F, 4);
+ break;
+ case LibFunc::dunder_strdup:
+ case LibFunc::dunder_strndup:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::dunder_strtok_r:
+ if (FTy->getNumParams() != 3 ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::under_IO_getc:
+ if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::under_IO_putc:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::dunder_isoc99_scanf:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::stat64:
+ case LibFunc::lstat64:
+ case LibFunc::statvfs64:
+ case LibFunc::dunder_isoc99_sscanf:
+ if (FTy->getNumParams() < 1 ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fopen64:
+ if (FTy->getNumParams() != 2 ||
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ setDoesNotCapture(F, 1);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::fseeko64:
+ case LibFunc::ftello64:
+ if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 1);
+ break;
+ case LibFunc::tmpfile64:
+ if (!FTy->getReturnType()->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotAlias(F, 0);
+ break;
+ case LibFunc::fstat64:
+ case LibFunc::fstatvfs64:
+ if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+ return false;
+ setDoesNotThrow(F);
+ setDoesNotCapture(F, 2);
+ break;
+ case LibFunc::open64:
+ if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+ return false;
+ // May throw; "open" is a valid pthread cancellation point.
+ setDoesNotCapture(F, 1);
+ break;
+ default:
+ // Didn't mark any attributes.
+ return false;
+ }
+
+ return true;
+}
+
+/// annotateLibraryCalls - Adds attributes to well-known standard library
+/// call declarations.
+bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
+ bool MadeChange = false;
+
+ // Check each function in turn annotating well-known library function
+ // declarations with attributes.
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ Function *F = (*I)->getFunction();
+
+ if (F != 0 && F->isDeclaration())
+ MadeChange |= inferPrototypeAttributes(*F);
+ }
+
+ return MadeChange;
+}
+
bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
AA = &getAnalysis<AliasAnalysis>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
- bool Changed = AddReadAttrs(SCC);
+ bool Changed = annotateLibraryCalls(SCC);
+ Changed |= AddReadAttrs(SCC);
Changed |= AddNoCaptureAttrs(SCC);
Changed |= AddNoAliasAttrs(SCC);
return Changed;
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c6d60d6f00..7595da08d3 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -44,7 +44,7 @@ namespace {
}
void set(const APFloat& C);
-
+
void negate();
bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
@@ -79,6 +79,14 @@ namespace {
bool isInt() const { return !IsFp; }
+ // If the coefficient is represented by an integer, promote it to a
+ // floating point.
+ void convertToFpType(const fltSemantics &Sem);
+
+ // Construct an APFloat from a signed integer.
+ // TODO: We should get rid of this function when APFloat can be constructed
+ // from an *SIGNED* integer.
+ APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
private:
bool IsFp;
@@ -150,7 +158,9 @@ namespace {
typedef SmallVector<const FAddend*, 4> AddendVect;
Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
-
+
+ Value *performFactorization(Instruction *I);
+
/// Convert given addend to a Value
Value *createAddendVal(const FAddend &A, bool& NeedNeg);
@@ -159,6 +169,7 @@ namespace {
Value *createFSub(Value *Opnd0, Value *Opnd1);
Value *createFAdd(Value *Opnd0, Value *Opnd1);
Value *createFMul(Value *Opnd0, Value *Opnd1);
+ Value *createFDiv(Value *Opnd0, Value *Opnd1);
Value *createFNeg(Value *V);
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
void createInstPostProc(Instruction *NewInst);
@@ -203,7 +214,31 @@ void FAddendCoef::set(const APFloat& C) {
IsFp = BufHasFpVal = true;
}
-void FAddendCoef::operator=(const FAddendCoef& That) {
+void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
+ if (!isInt())
+ return;
+
+ APFloat *P = getFpValPtr();
+ if (IntVal > 0)
+ new(P) APFloat(Sem, IntVal);
+ else {
+ new(P) APFloat(Sem, 0 - IntVal);
+ P->changeSign();
+ }
+ IsFp = BufHasFpVal = true;
+}
+
+APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
+ if (Val >= 0)
+ return APFloat(Sem, Val);
+
+ APFloat T(Sem, 0 - Val);
+ T.changeSign();
+
+ return T;
+}
+
+void FAddendCoef::operator=(const FAddendCoef &That) {
if (That.isInt())
set(That.IntVal);
else
@@ -222,13 +257,13 @@ void FAddendCoef::operator+=(const FAddendCoef &That) {
if (isInt()) {
const APFloat &T = That.getFpVal();
- set(T);
- getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode);
+ convertToFpType(T.getSemantics());
+ getFpVal().add(T, RndMode);
return;
}
APFloat &T = getFpVal();
- T.add(APFloat(T.getSemantics(), That.IntVal), RndMode);
+ T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
}
void FAddendCoef::operator-=(const FAddendCoef &That) {
@@ -243,13 +278,13 @@ void FAddendCoef::operator-=(const FAddendCoef &That) {
if (isInt()) {
const APFloat &T = That.getFpVal();
- set(T);
- getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode);
+ convertToFpType(T.getSemantics());
+ getFpVal().subtract(T, RndMode);
return;
}
APFloat &T = getFpVal();
- T.subtract(APFloat(T.getSemantics(), IntVal), RndMode);
+ T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode);
}
void FAddendCoef::operator*=(const FAddendCoef &That) {
@@ -272,11 +307,12 @@ void FAddendCoef::operator*=(const FAddendCoef &That) {
isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
if (isInt())
- set(APFloat(Semantic, IntVal));
+ convertToFpType(Semantic);
APFloat &F0 = getFpVal();
if (That.isInt())
- F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven);
+ F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),
+ APFloat::rmNearestTiesToEven);
else
F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
@@ -388,6 +424,78 @@ unsigned FAddend::drillAddendDownOneStep
return BreakNum;
}
+// Try to perform following optimization on the input instruction I. Return the
+// simplified expression if was successful; otherwise, return 0.
+//
+// Instruction "I" is Simplified into
+// -------------------------------------------------------
+// (x * y) +/- (x * z) x * (y +/- z)
+// (y / x) +/- (z / x) (y +/- z) / x
+//
+Value *FAddCombine::performFactorization(Instruction *I) {
+ assert((I->getOpcode() == Instruction::FAdd ||
+ I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+ Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
+ Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
+
+ if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
+ return 0;
+
+ bool isMpy = false;
+ if (I0->getOpcode() == Instruction::FMul)
+ isMpy = true;
+ else if (I0->getOpcode() != Instruction::FDiv)
+ return 0;
+
+ Value *Opnd0_0 = I0->getOperand(0);
+ Value *Opnd0_1 = I0->getOperand(1);
+ Value *Opnd1_0 = I1->getOperand(0);
+ Value *Opnd1_1 = I1->getOperand(1);
+
+ // Input Instr I Factor AddSub0 AddSub1
+ // ----------------------------------------------
+ // (x*y) +/- (x*z) x y z
+ // (y/x) +/- (z/x) x y z
+ //
+ Value *Factor = 0;
+ Value *AddSub0 = 0, *AddSub1 = 0;
+
+ if (isMpy) {
+ if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
+ Factor = Opnd0_0;
+ else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
+ Factor = Opnd0_1;
+
+ if (Factor) {
+ AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
+ AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
+ }
+ } else if (Opnd0_1 == Opnd1_1) {
+ Factor = Opnd0_1;
+ AddSub0 = Opnd0_0;
+ AddSub1 = Opnd1_0;
+ }
+
+ if (!Factor)
+ return 0;
+
+ // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
+ Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
+ createFAdd(AddSub0, AddSub1) :
+ createFSub(AddSub0, AddSub1);
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
+ const APFloat &F = CFP->getValueAPF();
+ if (!F.isNormal() || F.isDenormal())
+ return 0;
+ }
+
+ if (isMpy)
+ return createFMul(Factor, NewAddSub);
+
+ return createFDiv(NewAddSub, Factor);
+}
+
Value *FAddCombine::simplify(Instruction *I) {
assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
@@ -471,7 +579,8 @@ Value *FAddCombine::simplify(Instruction *I) {
return R;
}
- return 0;
+ // step 6: Try factorization as the last resort,
+ return performFactorization(I);
}
Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
@@ -627,7 +736,8 @@ Value *FAddCombine::createNaryFAdd
Value *FAddCombine::createFSub
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFSub(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
@@ -639,13 +749,22 @@ Value *FAddCombine::createFNeg(Value *V) {
Value *FAddCombine::createFAdd
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFMul(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index d162223a6f..2ee1278d23 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1610,6 +1610,9 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
/// bitcast. The various long double bitcasts can't get in here.
static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+ // We need to know the target byte order to perform this optimization.
+ if (!IC.getDataLayout()) return 0;
+
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType();
@@ -1631,7 +1634,10 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
}
- return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+ unsigned Elt = 0;
+ if (IC.getDataLayout()->isBigEndian())
+ Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
}
@@ -1653,6 +1659,8 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+ if (IC.getDataLayout()->isBigEndian())
+ Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bad46b4dab..a96e754f3d 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -139,6 +139,31 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
}
}
+/// Returns true if the exploded icmp can be expressed as a signed comparison
+/// to zero and updates the predicate accordingly.
+/// The signedness of the comparison is preserved.
+static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) {
+ if (!ICmpInst::isSigned(pred))
+ return false;
+
+ if (RHS->isZero())
+ return ICmpInst::isRelational(pred);
+
+ if (RHS->isOne()) {
+ if (pred == ICmpInst::ICMP_SLT) {
+ pred = ICmpInst::ICMP_SLE;
+ return true;
+ }
+ } else if (RHS->isAllOnesValue()) {
+ if (pred == ICmpInst::ICMP_SGT) {
+ pred = ICmpInst::ICMP_SGE;
+ return true;
+ }
+ }
+
+ return false;
+}
+
// isHighOnes - Return true if the constant is of the form 1+0+.
// This is the same as lowones(~X).
static bool isHighOnes(const ConstantInt *CI) {
@@ -443,20 +468,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
}
- // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
+ // If a magic bitvector captures the entire comparison state
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
- if (ArrayElementCount <= 32 ||
- (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) {
- Type *Ty;
- if (ArrayElementCount <= 32)
+ {
+ Type *Ty = 0;
+
+ // Look for an appropriate type:
+ // - The type of Idx if the magic fits
+ // - The smallest fitting legal type if we have a DataLayout
+ // - Default to i32
+ if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
+ Ty = Idx->getType();
+ else if (TD)
+ Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+ else if (ArrayElementCount <= 32)
Ty = Type::getInt32Ty(Init->getContext());
- else
- Ty = Type::getInt64Ty(Init->getContext());
- Value *V = Builder->CreateIntCast(Idx, Ty, false);
- V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
- V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
- return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+
+ if (Ty != 0) {
+ Value *V = Builder->CreateIntCast(Idx, Ty, false);
+ V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+ V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+ return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+ }
}
return 0;
@@ -1273,6 +1307,23 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
break;
}
+ case Instruction::Mul: { // (icmp pred (mul X, Val), CI)
+ ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+ if (!Val) break;
+
+ // If this is a signed comparison to 0 and the mul is sign preserving,
+ // use the mul LHS operand instead.
+ ICmpInst::Predicate pred = ICI.getPredicate();
+ if (isSignTest(pred, RHS) && !Val->isZero() &&
+ cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+ return new ICmpInst(Val->isNegative() ?
+ ICmpInst::getSwappedPredicate(pred) : pred,
+ LHSI->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+
+ break;
+ }
+
case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI)
ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
if (!ShAmt) break;
@@ -1304,6 +1355,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
ConstantExpr::getLShr(RHS, ShAmt));
+ // If the shift is NSW and we compare to 0, then it is just shifting out
+ // sign bits, no need for an AND either.
+ if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0)
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getLShr(RHS, ShAmt));
+
if (LHSI->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
@@ -1318,6 +1375,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
}
+ // If this is a signed comparison to 0 and the shift is sign preserving,
+ // use the shift LHS operand instead.
+ ICmpInst::Predicate pred = ICI.getPredicate();
+ if (isSignTest(pred, RHS) &&
+ cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+ return new ICmpInst(pred,
+ LHSI->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+
// Otherwise, if this is a comparison of the sign bit, simplify to and/test.
bool TrueIfSigned = false;
if (LHSI->hasOneUse() &&
@@ -1333,13 +1399,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
// Transform (icmp pred iM (shl iM %v, N), CI)
- // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N))
- // Transform the shl to a trunc if (trunc (CI>>N)) has no loss.
+ // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
+ // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
// This enables to get rid of the shift in favor of a trunc which can be
// free on the target. It has the additional benefit of comparing to a
// smaller constant, which will be target friendly.
unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
- if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
+ if (LHSI->hasOneUse() &&
+ Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
Constant *NCI = ConstantExpr::getTrunc(
ConstantExpr::getAShr(RHS,
@@ -1531,6 +1598,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return new ICmpInst(pred, X, NegX);
}
}
+ break;
+ case Instruction::Mul:
+ if (RHSV == 0 && BO->hasNoSignedWrap()) {
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ // The trivial case (mul X, 0) is handled by InstSimplify
+ // General case : (mul X, C) != 0 iff X != 0
+ // (mul X, C) == 0 iff X == 0
+ if (!BOC->isZero())
+ return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+ Constant::getNullValue(RHS->getType()));
+ }
+ }
+ break;
default: break;
}
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index a262d711d3..121aa1f8d7 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -127,13 +127,14 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
// If this is a non-volatile load or a cast from the same type,
// merge.
if (TI->isCast()) {
- if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+ Type *FIOpndTy = FI->getOperand(0)->getType();
+ if (TI->getOperand(0)->getType() != FIOpndTy)
return 0;
// The select condition may be a vector. We may only change the operand
// type if the vector width remains the same (and matches the condition).
Type *CondTy = SI.getCondition()->getType();
- if (CondTy->isVectorTy() && CondTy->getVectorNumElements() !=
- FI->getOperand(0)->getType()->getVectorNumElements())
+ if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() ||
+ CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
return 0;
} else {
return 0; // unknown unary op.
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6877475b1d..623c470506 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *kAsanInitName = "__asan_init_v1";
+static const char *kAsanInitName = "__asan_init_v3";
static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
static const char *kAsanMappingScaleName = "__asan_mapping_scale";
@@ -244,7 +244,7 @@ static size_t RedzoneSizeForScale(int MappingScale) {
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer : public FunctionPass {
- AddressSanitizer(bool CheckInitOrder = false,
+ AddressSanitizer(bool CheckInitOrder = true,
bool CheckUseAfterReturn = false,
bool CheckLifetime = false,
StringRef BlacklistFile = StringRef(),
@@ -274,8 +274,6 @@ struct AddressSanitizer : public FunctionPass {
Instruction *InsertBefore, bool IsWrite);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool runOnFunction(Function &F);
- void createInitializerPoisonCalls(Module &M,
- Value *FirstAddr, Value *LastAddr);
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
void emitShadowMapping(Module &M, IRBuilder<> &IRB) const;
virtual bool doInitialization(Module &M);
@@ -315,7 +313,7 @@ struct AddressSanitizer : public FunctionPass {
class AddressSanitizerModule : public ModulePass {
public:
- AddressSanitizerModule(bool CheckInitOrder = false,
+ AddressSanitizerModule(bool CheckInitOrder = true,
StringRef BlacklistFile = StringRef(),
bool ZeroBaseShadow = false)
: ModulePass(ID),
@@ -333,8 +331,7 @@ class AddressSanitizerModule : public ModulePass {
void initializeCallbacks(Module &M);
bool ShouldInstrumentGlobal(GlobalVariable *G);
- void createInitializerPoisonCalls(Module &M, Value *FirstAddr,
- Value *LastAddr);
+ void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
size_t RedzoneSize() const {
return RedzoneSizeForScale(Mapping.Scale);
}
@@ -531,9 +528,12 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
// Create a constant for Str so that we can pass it to the run-time lib.
static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
- return new GlobalVariable(M, StrConst->getType(), true,
+ GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true,
GlobalValue::PrivateLinkage, StrConst,
kAsanGenPrefix);
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
+ return GV;
}
static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
@@ -750,7 +750,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
}
void AddressSanitizerModule::createInitializerPoisonCalls(
- Module &M, Value *FirstAddr, Value *LastAddr) {
+ Module &M, GlobalValue *ModuleName) {
// We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
// If that function is not present, this TU contains no globals, or they have
@@ -762,7 +762,8 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
// Add a call to poison all external globals before the given function starts.
- IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr);
+ Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy);
+ IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr);
// Add calls to unpoison all globals before each return instruction.
for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
@@ -836,7 +837,7 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
// Declare our poisoning and unpoisoning functions.
AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, NULL));
AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
@@ -885,11 +886,12 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
// size_t size;
// size_t size_with_redzone;
// const char *name;
+ // const char *module_name;
// size_t has_dynamic_init;
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
IntptrTy, IntptrTy,
- IntptrTy, NULL);
+ IntptrTy, IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n), DynamicInit;
@@ -897,9 +899,13 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
assert(CtorFunc);
IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
- // The addresses of the first and last dynamically initialized globals in
- // this TU. Used in initialization order checking.
- Value *FirstDynamic = 0, *LastDynamic = 0;
+ bool HasDynamicallyInitializedGlobals = false;
+
+ GlobalVariable *ModuleName = createPrivateGlobalForString(
+ M, M.getModuleIdentifier());
+ // We shouldn't merge same module names, as this string serves as unique
+ // module ID in runtime.
+ ModuleName->setUnnamedAddr(false);
for (size_t i = 0; i < n; i++) {
static const uint64_t kMaxGlobalRedzone = 1 << 18;
@@ -930,11 +936,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
NewTy, G->getInitializer(),
Constant::getNullValue(RightRedZoneTy), NULL);
- SmallString<2048> DescriptionOfGlobal = G->getName();
- DescriptionOfGlobal += " (";
- DescriptionOfGlobal += M.getModuleIdentifier();
- DescriptionOfGlobal += ")";
- GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
+ GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
// Create a new global variable with enough space for a redzone.
GlobalVariable *NewGlobal = new GlobalVariable(
@@ -958,15 +960,13 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
+ ConstantExpr::getPointerCast(ModuleName, IntptrTy),
ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
NULL);
// Populate the first and last globals declared in this TU.
- if (CheckInitOrder && GlobalHasDynamicInitializer) {
- LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
- if (FirstDynamic == 0)
- FirstDynamic = LastDynamic;
- }
+ if (CheckInitOrder && GlobalHasDynamicInitializer)
+ HasDynamicallyInitializedGlobals = true;
DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
}
@@ -977,8 +977,8 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
// Create calls for poisoning before initializers run and unpoisoning after.
- if (CheckInitOrder && FirstDynamic && LastDynamic)
- createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
+ if (CheckInitOrder && HasDynamicallyInitializedGlobals)
+ createInitializerPoisonCalls(M, ModuleName);
IRB.CreateCall2(AsanRegisterGlobals,
IRB.CreatePointerCast(AllGlobals, IntptrTy),
ConstantInt::get(IntptrTy, n));
@@ -1095,6 +1095,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
initializeCallbacks(*F.getParent());
@@ -1312,10 +1313,10 @@ void FunctionStackPoisoner::poisonStack() {
ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
}
- // This string will be parsed by the run-time (DescribeStackAddress).
+ // This string will be parsed by the run-time (DescribeAddressIfStack).
SmallString<2048> StackDescriptionStorage;
raw_svector_ostream StackDescription(StackDescriptionStorage);
- StackDescription << F.getName() << " " << AllocaVec.size() << " ";
+ StackDescription << AllocaVec.size() << " ";
// Insert poison calls for lifetime intrinsics for alloca.
bool HavePoisonedAllocas = false;
@@ -1348,19 +1349,26 @@ void FunctionStackPoisoner::poisonStack() {
}
assert(Pos == LocalStackSize);
- // Write the Magic value and the frame description constant to the redzone.
+ // The left-most redzone has enough space for at least 4 pointers.
+ // Write the Magic value to redzone[0].
Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
BasePlus0);
- Value *BasePlus1 = IRB.CreateAdd(LocalStackBase,
- ConstantInt::get(IntptrTy,
- ASan.LongSize/8));
- BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
+ // Write the frame description constant to redzone[1].
+ Value *BasePlus1 = IRB.CreateIntToPtr(
+ IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)),
+ IntptrPtrTy);
GlobalVariable *StackDescriptionGlobal =
createPrivateGlobalForString(*F.getParent(), StackDescription.str());
Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal,
IntptrTy);
IRB.CreateStore(Description, BasePlus1);
+ // Write the PC to redzone[2].
+ Value *BasePlus2 = IRB.CreateIntToPtr(
+ IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy,
+ 2 * ASan.LongSize/8)),
+ IntptrPtrTy);
+ IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
// Poison the stack redzones at the entry.
Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index a79873cbf6..2edd151869 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -29,8 +29,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/Support/PathV2.h"
#include "llvm/Support/raw_ostream.h"
@@ -39,35 +41,57 @@
#include <utility>
using namespace llvm;
+static cl::opt<std::string>
+DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
+ cl::ValueRequired);
+
+GCOVOptions GCOVOptions::getDefault() {
+ GCOVOptions Options;
+ Options.EmitNotes = true;
+ Options.EmitData = true;
+ Options.UseCfgChecksum = false;
+ Options.NoRedZone = false;
+ Options.FunctionNamesInData = true;
+
+ if (DefaultGCOVVersion.size() != 4) {
+ llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
+ DefaultGCOVVersion);
+ }
+ memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
+ return Options;
+}
+
namespace {
class GCOVProfiler : public ModulePass {
public:
static char ID;
- GCOVProfiler()
- : ModulePass(ID), EmitNotes(true), EmitData(true),
- UseExtraChecksum(false), NoRedZone(false),
- NoFunctionNamesInData(false) {
- memcpy(Version, DefaultGCovVersion, 4);
+ GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) {
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
- GCOVProfiler(bool EmitNotes, bool EmitData, const char (&Version)[4],
- bool UseExtraChecksum, bool NoRedZone,
- bool NoFunctionNamesInData)
- : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
- UseExtraChecksum(UseExtraChecksum), NoRedZone(NoRedZone),
- NoFunctionNamesInData(NoFunctionNamesInData) {
- memcpy(this->Version, Version, 4);
- assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
+ GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){
+ assert((Options.EmitNotes || Options.EmitData) &&
+ "GCOVProfiler asked to do nothing?");
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
virtual const char *getPassName() const {
return "GCOV Profiler";
}
+
private:
bool runOnModule(Module &M);
- // Create the GCNO files for the Module based on DebugInfo.
- void emitGCNO();
+ // Create the .gcno files for the Module based on DebugInfo.
+ void emitProfileNotes();
// Modify the program to track transitions along edges and call into the
// profiling runtime to emit .gcda files when run.
@@ -78,6 +102,8 @@ namespace {
Constant *getIncrementIndirectCounterFunc();
Constant *getEmitFunctionFunc();
Constant *getEmitArcsFunc();
+ Constant *getDeleteWriteoutFunctionListFunc();
+ Constant *getDeleteFlushFunctionListFunc();
Constant *getEndFileFunc();
// Create or retrieve an i32 state value that is used to represent the
@@ -88,23 +114,22 @@ namespace {
// block number.
GlobalVariable *buildEdgeLookupTable(Function *F,
GlobalVariable *Counter,
- const UniqueVector<BasicBlock *> &Preds,
- const UniqueVector<BasicBlock *> &Succs);
+ const UniqueVector<BasicBlock *>&Preds,
+ const UniqueVector<BasicBlock*>&Succs);
// Add the function to write out all our counters to the global destructor
// list.
- void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
+ Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*,
+ MDNode*> >);
+ Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
void insertIndirectCounterIncrement();
- void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
std::string mangleName(DICompileUnit CU, const char *NewStem);
- bool EmitNotes;
- bool EmitData;
- char Version[4];
- bool UseExtraChecksum;
- bool NoRedZone;
- bool NoFunctionNamesInData;
+ GCOVOptions Options;
+
+ // Reversed, NUL-terminated copy of Options.Version.
+ char ReversedVersion[5];
Module *M;
LLVMContext *Ctx;
@@ -115,13 +140,14 @@ char GCOVProfiler::ID = 0;
INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
"Insert instrumentation for GCOV profiling", false, false)
-ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
- const char (&Version)[4],
- bool UseExtraChecksum,
- bool NoRedZone,
- bool NoFunctionNamesInData) {
- return new GCOVProfiler(EmitNotes, EmitData, Version, UseExtraChecksum,
- NoRedZone, NoFunctionNamesInData);
+ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
+ return new GCOVProfiler(Options);
+}
+
+static std::string getFunctionName(DISubprogram SP) {
+ if (!SP.getLinkageName().empty())
+ return SP.getLinkageName();
+ return SP.getName();
}
namespace {
@@ -260,7 +286,7 @@ namespace {
class GCOVFunction : public GCOVRecord {
public:
GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
- bool UseExtraChecksum) {
+ bool UseCfgChecksum) {
this->os = os;
Function *F = SP.getFunction();
@@ -272,16 +298,16 @@ namespace {
ReturnBlock = new GCOVBlock(i++, os);
writeBytes(FunctionTag, 4);
- uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
+ uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
1 + lengthOfGCOVString(SP.getFilename()) + 1;
- if (UseExtraChecksum)
+ if (UseCfgChecksum)
++BlockLen;
write(BlockLen);
write(Ident);
write(0); // lineno checksum
- if (UseExtraChecksum)
+ if (UseCfgChecksum)
write(0); // cfg checksum
- writeGCOVString(SP.getName());
+ writeGCOVString(getFunctionName(SP));
writeGCOVString(SP.getFilename());
write(SP.getLineNumber());
}
@@ -356,19 +382,23 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
SmallString<128> Filename = CU.getFilename();
sys::path::replace_extension(Filename, NewStem);
- return sys::path::filename(Filename.str());
+ StringRef FName = sys::path::filename(Filename);
+ SmallString<128> CurPath;
+ if (sys::fs::current_path(CurPath)) return FName;
+ sys::path::append(CurPath, FName.str());
+ return CurPath.str();
}
bool GCOVProfiler::runOnModule(Module &M) {
this->M = &M;
Ctx = &M.getContext();
- if (EmitNotes) emitGCNO();
- if (EmitData) return emitProfileArcs();
+ if (Options.EmitNotes) emitProfileNotes();
+ if (Options.EmitData) return emitProfileArcs();
return false;
}
-void GCOVProfiler::emitGCNO() {
+void GCOVProfiler::emitProfileNotes() {
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes) return;
@@ -382,7 +412,7 @@ void GCOVProfiler::emitGCNO() {
raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
raw_fd_ostream::F_Binary);
out.write("oncg", 4);
- out.write(Version, 4);
+ out.write(ReversedVersion, 4);
out.write("MVLL", 4);
DIArray SPs = CU.getSubprograms();
@@ -392,7 +422,7 @@ void GCOVProfiler::emitGCNO() {
Function *F = SP.getFunction();
if (!F) continue;
- GCOVFunction Func(SP, &out, i, UseExtraChecksum);
+ GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
GCOVBlock &Block = Func.getBlock(BB);
@@ -522,8 +552,38 @@ bool GCOVProfiler::emitProfileArcs() {
}
}
- insertCounterWriteout(CountersBySP);
- insertFlush(CountersBySP);
+ Function *WriteoutF = insertCounterWriteout(CountersBySP);
+ Function *FlushF = insertFlush(CountersBySP);
+
+ // Create a small bit of code that registers the "__llvm_gcov_writeout" to
+ // be executed at exit and the "__llvm_gcov_flush" function to be executed
+ // when "__gcov_flush" is called.
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_init", M);
+ F->setUnnamedAddr(true);
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ F->addFnAttr(Attribute::NoRedZone);
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ IRBuilder<> Builder(BB);
+
+ FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Type *Params[] = {
+ PointerType::get(FTy, 0),
+ PointerType::get(FTy, 0)
+ };
+ FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
+
+ // Inialize the environment and register the local writeout and flush
+ // functions.
+ Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
+ Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
+ Builder.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
}
if (InsertIndCounterIncrCode)
@@ -619,6 +679,16 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
}
+Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy);
+}
+
+Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy);
+}
+
Constant *GCOVProfiler::getEndFileFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
@@ -637,7 +707,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
return GV;
}
-void GCOVProfiler::insertCounterWriteout(
+Function *GCOVProfiler::insertCounterWriteout(
ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
@@ -646,7 +716,7 @@ void GCOVProfiler::insertCounterWriteout(
"__llvm_gcov_writeout", M);
WriteoutF->setUnnamedAddr(true);
WriteoutF->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
WriteoutF->addFnAttr(Attribute::NoRedZone);
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
@@ -664,15 +734,15 @@ void GCOVProfiler::insertCounterWriteout(
std::string FilenameGcda = mangleName(CU, "gcda");
Builder.CreateCall2(StartFile,
Builder.CreateGlobalStringPtr(FilenameGcda),
- Builder.CreateGlobalStringPtr(Version));
+ Builder.CreateGlobalStringPtr(ReversedVersion));
for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
DISubprogram SP(CountersBySP[j].second);
- Builder.CreateCall3(EmitFunction,
- Builder.getInt32(j),
- NoFunctionNamesInData ?
- Constant::getNullValue(Builder.getInt8PtrTy()) :
- Builder.CreateGlobalStringPtr(SP.getName()),
- Builder.getInt8(UseExtraChecksum));
+ Builder.CreateCall3(
+ EmitFunction, Builder.getInt32(j),
+ Options.FunctionNamesInData ?
+ Builder.CreateGlobalStringPtr(getFunctionName(SP)) :
+ Constant::getNullValue(Builder.getInt8PtrTy()),
+ Builder.getInt8(Options.UseCfgChecksum));
GlobalVariable *GV = CountersBySP[j].first;
unsigned Arcs =
@@ -684,29 +754,9 @@ void GCOVProfiler::insertCounterWriteout(
Builder.CreateCall(EndFile);
}
}
- Builder.CreateRetVoid();
- // Create a small bit of code that registers the "__llvm_gcov_writeout"
- // function to be executed at exit.
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__llvm_gcov_init", M);
- F->setUnnamedAddr(true);
- F->setLinkage(GlobalValue::InternalLinkage);
- F->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
- F->addFnAttr(Attribute::NoRedZone);
-
- BB = BasicBlock::Create(*Ctx, "entry", F);
- Builder.SetInsertPoint(BB);
-
- FTy = FunctionType::get(Builder.getInt32Ty(),
- PointerType::get(FTy, 0), false);
- Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy);
- Builder.CreateCall(AtExitFn, WriteoutF);
Builder.CreateRetVoid();
-
- appendToGlobalCtors(*M, F, 0);
+ return WriteoutF;
}
void GCOVProfiler::insertIndirectCounterIncrement() {
@@ -715,7 +765,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Fn->setUnnamedAddr(true);
Fn->setLinkage(GlobalValue::InternalLinkage);
Fn->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
Fn->addFnAttr(Attribute::NoRedZone);
// Create basic blocks for function.
@@ -760,18 +810,18 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Builder.CreateRetVoid();
}
-void GCOVProfiler::
+Function *GCOVProfiler::
insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *FlushF = M->getFunction("__gcov_flush");
+ Function *FlushF = M->getFunction("__llvm_gcov_flush");
if (!FlushF)
FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__gcov_flush", M);
+ "__llvm_gcov_flush", M);
else
FlushF->setLinkage(GlobalValue::InternalLinkage);
FlushF->setUnnamedAddr(true);
FlushF->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
FlushF->addFnAttr(Attribute::NoRedZone);
BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
@@ -796,8 +846,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
if (RetTy == Type::getVoidTy(*Ctx))
Builder.CreateRetVoid();
else if (RetTy->isIntegerTy())
- // Used if __gcov_flush was implicitly declared.
+ // Used if __llvm_gcov_flush was implicitly declared.
Builder.CreateRet(ConstantInt::get(RetTy, 0));
else
- report_fatal_error("invalid return type for __gcov_flush");
+ report_fatal_error("invalid return type for __llvm_gcov_flush");
+
+ return FlushF;
}
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index fce6513a97..4e75904ded 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -122,6 +122,9 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
cl::desc("poison uninitialized stack variables with the given patter"),
cl::Hidden, cl::init(0xff));
+static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
+ cl::desc("poison undef temps"),
+ cl::Hidden, cl::init(true));
static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
@@ -690,7 +693,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
///
/// Clean shadow (all zeroes) means all bits of the value are defined
/// (initialized).
- Value *getCleanShadow(Value *V) {
+ Constant *getCleanShadow(Value *V) {
Type *ShadowTy = getShadowTy(V);
if (!ShadowTy)
return 0;
@@ -709,6 +712,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return ConstantStruct::get(ST, Vals);
}
+ /// \brief Create a dirty shadow for a given value.
+ Constant *getPoisonedShadow(Value *V) {
+ Type *ShadowTy = getShadowTy(V);
+ if (!ShadowTy)
+ return 0;
+ return getPoisonedShadow(ShadowTy);
+ }
+
/// \brief Create a clean (zero) origin.
Value *getCleanOrigin() {
return Constant::getNullValue(MS.OriginTy);
@@ -730,7 +741,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return Shadow;
}
if (UndefValue *U = dyn_cast<UndefValue>(V)) {
- Value *AllOnes = getPoisonedShadow(getShadowTy(V));
+ Value *AllOnes = ClPoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
(void)U;
return AllOnes;
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index f93c5ab4c8..299060a42f 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -56,6 +57,9 @@ static cl::opt<bool> ClInstrumentFuncEntryExit(
static cl::opt<bool> ClInstrumentAtomics(
"tsan-instrument-atomics", cl::init(true),
cl::desc("Instrument atomics"), cl::Hidden);
+static cl::opt<bool> ClInstrumentMemIntrinsics(
+ "tsan-instrument-memintrinsics", cl::init(true),
+ cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
@@ -63,6 +67,7 @@ STATISTIC(NumOmittedReadsBeforeWrite,
"Number of reads ignored due to following writes");
STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size");
STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes");
+STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads");
STATISTIC(NumOmittedReadsFromConstantGlobals,
"Number of reads from constant globals");
STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads");
@@ -85,12 +90,14 @@ struct ThreadSanitizer : public FunctionPass {
void initializeCallbacks(Module &M);
bool instrumentLoadOrStore(Instruction *I);
bool instrumentAtomic(Instruction *I);
+ bool instrumentMemIntrinsic(Instruction *I);
void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
SmallVectorImpl<Instruction*> &All);
bool addrPointsToConstantData(Value *Addr);
int getMemoryAccessFuncIndex(Value *Addr);
DataLayout *TD;
+ Type *IntptrTy;
SmallString<64> BlacklistFile;
OwningPtr<BlackList> BL;
IntegerType *OrdTy;
@@ -108,6 +115,8 @@ struct ThreadSanitizer : public FunctionPass {
Function *TsanAtomicThreadFence;
Function *TsanAtomicSignalFence;
Function *TsanVptrUpdate;
+ Function *TsanVptrLoad;
+ Function *MemmoveFn, *MemcpyFn, *MemsetFn;
};
} // namespace
@@ -196,10 +205,22 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
"__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), NULL));
+ TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction(
+ "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction(
"__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL));
TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
"__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL));
+
+ MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction(
+ "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy, NULL));
+ MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction(
+ "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IntptrTy, NULL));
+ MemsetFn = checkInterfaceFunction(M.getOrInsertFunction(
+ "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
+ IntptrTy, NULL));
}
bool ThreadSanitizer::doInitialization(Module &M) {
@@ -210,6 +231,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
// Always insert a call to __tsan_init into the module's CTORs.
IRBuilder<> IRB(M.getContext());
+ IntptrTy = IRB.getIntPtrTy(TD);
Value *TsanInit = M.getOrInsertFunction("__tsan_init",
IRB.getVoidTy(), NULL);
appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
@@ -309,6 +331,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
SmallVector<Instruction*, 8> AllLoadsAndStores;
SmallVector<Instruction*, 8> LocalLoadsAndStores;
SmallVector<Instruction*, 8> AtomicAccesses;
+ SmallVector<Instruction*, 8> MemIntrinCalls;
bool Res = false;
bool HasCalls = false;
@@ -325,6 +348,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
else if (isa<ReturnInst>(BI))
RetVec.push_back(BI);
else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
+ if (isa<MemIntrinsic>(BI))
+ MemIntrinCalls.push_back(BI);
HasCalls = true;
chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
}
@@ -348,6 +373,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
Res |= instrumentAtomic(AtomicAccesses[i]);
}
+ if (ClInstrumentMemIntrinsics)
+ for (size_t i = 0, n = MemIntrinCalls.size(); i < n; ++i) {
+ Res |= instrumentMemIntrinsic(MemIntrinCalls[i]);
+ }
+
// Instrument function entry/exit points if there were instrumented accesses.
if ((Res || HasCalls) && ClInstrumentFuncEntryExit) {
IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
@@ -386,6 +416,12 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
NumInstrumentedVtableWrites++;
return true;
}
+ if (!IsWrite && isVtableAccess(I)) {
+ IRB.CreateCall(TsanVptrLoad,
+ IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+ NumInstrumentedVtableReads++;
+ return true;
+ }
Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
if (IsWrite) NumInstrumentedWrites++;
@@ -423,6 +459,32 @@ static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
return IRB->getInt32(v);
}
+// If a memset intrinsic gets inlined by the code gen, we will miss races on it.
+// So, we either need to ensure the intrinsic is not inlined, or instrument it.
+// We do not instrument memset/memmove/memcpy intrinsics (too complicated),
+// instead we simply replace them with regular function calls, which are then
+// intercepted by the run-time.
+// Since tsan is running after everyone else, the calls should not be
+// replaced back with intrinsics. If that becomes wrong at some point,
+// we will need to call e.g. __tsan_memset to avoid the intrinsics.
+bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
+ IRBuilder<> IRB(I);
+ if (MemSetInst *M = dyn_cast<MemSetInst>(I)) {
+ IRB.CreateCall3(MemsetFn,
+ IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+ I->eraseFromParent();
+ } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) {
+ IRB.CreateCall3(isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
+ IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+ I->eraseFromParent();
+ }
+ return false;
+}
+
// Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x
// standards. For background see C++11 standard. A slightly older, publically
// available draft of the standard (not entirely up-to-date, but close enough
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 5aada9c373..8f917aeb37 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -38,6 +38,7 @@ llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
switch (Class) {
case IC_Autorelease:
case IC_AutoreleaseRV:
+ case IC_IntrinsicUser:
case IC_User:
// These operations never directly modify a reference count.
return false;
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index e062b66555..39670f339e 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -64,7 +64,8 @@ static inline bool ModuleHasARC(const Module &M) {
M.getNamedValue("objc_copyWeak") ||
M.getNamedValue("objc_retainedObject") ||
M.getNamedValue("objc_unretainedObject") ||
- M.getNamedValue("objc_unretainedPointer");
+ M.getNamedValue("objc_unretainedPointer") ||
+ M.getNamedValue("clang.arc.use");
}
/// \enum InstructionClass
@@ -89,6 +90,7 @@ enum InstructionClass {
IC_CopyWeak, ///< objc_copyWeak (derived)
IC_DestroyWeak, ///< objc_destroyWeak (derived)
IC_StoreStrong, ///< objc_storeStrong (derived)
+ IC_IntrinsicUser, ///< clang.arc.use
IC_CallOrUser, ///< could call objc_release and/or "use" pointers
IC_Call, ///< could call objc_release
IC_User, ///< could "use" a pointer
@@ -97,6 +99,13 @@ enum InstructionClass {
raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class);
+/// \brief Test if the given class is a kind of user.
+inline static bool IsUser(InstructionClass Class) {
+ return Class == IC_User ||
+ Class == IC_CallOrUser ||
+ Class == IC_IntrinsicUser;
+}
+
/// \brief Test if the given class is objc_retain or equivalent.
static inline bool IsRetain(InstructionClass Class) {
return Class == IC_Retain ||
@@ -112,13 +121,10 @@ static inline bool IsAutorelease(InstructionClass Class) {
/// \brief Test if the given class represents instructions which return their
/// argument verbatim.
static inline bool IsForwarding(InstructionClass Class) {
- // objc_retainBlock technically doesn't always return its argument
- // verbatim, but it doesn't matter for our purposes here.
return Class == IC_Retain ||
Class == IC_RetainRV ||
Class == IC_Autorelease ||
Class == IC_AutoreleaseRV ||
- Class == IC_RetainBlock ||
Class == IC_NoopCast;
}
@@ -256,11 +262,11 @@ static inline Value *GetObjCArg(Value *Inst) {
return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
}
-static inline bool isNullOrUndef(const Value *V) {
+static inline bool IsNullOrUndef(const Value *V) {
return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
}
-static inline bool isNoopInstruction(const Instruction *I) {
+static inline bool IsNoopInstruction(const Instruction *I) {
return isa<BitCastInst>(I) ||
(isa<GetElementPtrInst>(I) &&
cast<GetElementPtrInst>(I)->hasAllZeroIndices());
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 1c13d1cbea..b96c64fe81 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -410,7 +410,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
break;
}
--BBI;
- } while (isNoopInstruction(BBI));
+ } while (IsNoopInstruction(BBI));
if (&*BBI == GetObjCArg(Inst)) {
DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for "
@@ -429,7 +429,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
case IC_InitWeak: {
// objc_initWeak(p, null) => *p = null
CallInst *CI = cast<CallInst>(Inst);
- if (isNullOrUndef(CI->getArgOperand(1))) {
+ if (IsNullOrUndef(CI->getArgOperand(1))) {
Value *Null =
ConstantPointerNull::get(cast<PointerType>(CI->getType()));
Changed = true;
@@ -453,6 +453,10 @@ bool ObjCARCContract::runOnFunction(Function &F) {
if (isa<AllocaInst>(Inst))
TailOkForStoreStrongs = false;
continue;
+ case IC_IntrinsicUser:
+ // Remove calls to @clang.arc.use(...).
+ Inst->eraseFromParent();
+ continue;
default:
continue;
}
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 9c14949877..924fb0a9da 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
@@ -211,6 +212,9 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
// These special functions make copies of their pointer arguments.
return true;
}
+ case IC_IntrinsicUser:
+ // Use by the use intrinsic is not an escape.
+ continue;
case IC_User:
case IC_None:
// Use by an instruction which copies the value is an escape if the
@@ -387,10 +391,6 @@ namespace {
/// KnownSafe is true when either of these conditions is satisfied.
bool KnownSafe;
- /// True if the Calls are objc_retainBlock calls (as opposed to objc_retain
- /// calls).
- bool IsRetainBlock;
-
/// True of the objc_release calls are all marked with the "tail" keyword.
bool IsTailCallRelease;
@@ -407,9 +407,7 @@ namespace {
SmallPtrSet<Instruction *, 2> ReverseInsertPts;
RRInfo() :
- KnownSafe(false), IsRetainBlock(false),
- IsTailCallRelease(false),
- ReleaseMetadata(0) {}
+ KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
void clear();
};
@@ -417,7 +415,6 @@ namespace {
void RRInfo::clear() {
KnownSafe = false;
- IsRetainBlock = false;
IsTailCallRelease = false;
ReleaseMetadata = 0;
Calls.clear();
@@ -451,11 +448,11 @@ namespace {
KnownPositiveRefCount = true;
}
- void ClearRefCount() {
+ void ClearKnownPositiveRefCount() {
KnownPositiveRefCount = false;
}
- bool IsKnownIncremented() const {
+ bool HasKnownPositiveRefCount() const {
return KnownPositiveRefCount;
}
@@ -486,10 +483,6 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
Seq = MergeSeqs(Seq, Other.Seq, TopDown);
KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
- // We can't merge a plain objc_retain with an objc_retainBlock.
- if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
- Seq = S_None;
-
// If we're not in a sequence (anymore), drop all associated state.
if (Seq == S_None) {
Partial = false;
@@ -698,6 +691,228 @@ void BBState::MergeSucc(const BBState &Other) {
MI->second.Merge(PtrState(), /*TopDown=*/false);
}
+// Only enable ARC Annotations if we are building a debug version of
+// libObjCARCOpts.
+#ifndef NDEBUG
+#define ARC_ANNOTATIONS
+#endif
+
+// Define some macros along the lines of DEBUG and some helper functions to make
+// it cleaner to create annotations in the source code and to no-op when not
+// building in debug mode.
+#ifdef ARC_ANNOTATIONS
+
+#include "llvm/Support/CommandLine.h"
+
+/// Enable/disable ARC sequence annotations.
+static cl::opt<bool>
+EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false));
+
+/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
+/// instruction so that we can track backwards when post processing via the llvm
+/// arc annotation processor tool. If the function is an
+static MDString *AppendMDNodeToSourcePtr(unsigned NodeId,
+ Value *Ptr) {
+ MDString *Hash = 0;
+
+ // If pointer is a result of an instruction and it does not have a source
+ // MDNode it, attach a new MDNode onto it. If pointer is a result of
+ // an instruction and does have a source MDNode attached to it, return a
+ // reference to said Node. Otherwise just return 0.
+ if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) {
+ MDNode *Node;
+ if (!(Node = Inst->getMetadata(NodeId))) {
+ // We do not have any node. Generate and attatch the hash MDString to the
+ // instruction.
+
+ // We just use an MDString to ensure that this metadata gets written out
+ // of line at the module level and to provide a very simple format
+ // encoding the information herein. Both of these makes it simpler to
+ // parse the annotations by a simple external program.
+ std::string Str;
+ raw_string_ostream os(Str);
+ os << "(" << Inst->getParent()->getParent()->getName() << ",%"
+ << Inst->getName() << ")";
+
+ Hash = MDString::get(Inst->getContext(), os.str());
+ Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash));
+ } else {
+ // We have a node. Grab its hash and return it.
+ assert(Node->getNumOperands() == 1 &&
+ "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand.");
+ Hash = cast<MDString>(Node->getOperand(0));
+ }
+ } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
+ std::string str;
+ raw_string_ostream os(str);
+ os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName()
+ << ")";
+ Hash = MDString::get(Arg->getContext(), os.str());
+ }
+
+ return Hash;
+}
+
+static std::string SequenceToString(Sequence A) {
+ std::string str;
+ raw_string_ostream os(str);
+ os << A;
+ return os.str();
+}
+
+/// Helper function to change a Sequence into a String object using our overload
+/// for raw_ostream so we only have printing code in one location.
+static MDString *SequenceToMDString(LLVMContext &Context,
+ Sequence A) {
+ return MDString::get(Context, SequenceToString(A));
+}
+
+/// A simple function to generate a MDNode which describes the change in state
+/// for Value *Ptr caused by Instruction *Inst.
+static void AppendMDNodeToInstForPtr(unsigned NodeId,
+ Instruction *Inst,
+ Value *Ptr,
+ MDString *PtrSourceMDNodeID,
+ Sequence OldSeq,
+ Sequence NewSeq) {
+ MDNode *Node = 0;
+ Value *tmp[3] = {PtrSourceMDNodeID,
+ SequenceToMDString(Inst->getContext(),
+ OldSeq),
+ SequenceToMDString(Inst->getContext(),
+ NewSeq)};
+ Node = MDNode::get(Inst->getContext(),
+ ArrayRef<Value*>(tmp, 3));
+
+ Inst->setMetadata(NodeId, Node);
+}
+
+/// Add to the beginning of the basic block llvm.ptr.annotations which show the
+/// state of a pointer at the entrance to a basic block.
+static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
+ Value *Ptr, Sequence Seq) {
+ Module *M = BB->getParent()->getParent();
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+ Type *Params[] = {I8XX, I8XX};
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
+ ArrayRef<Type*>(Params, 2),
+ /*isVarArg=*/false);
+ Constant *Callee = M->getOrInsertFunction(Name, FTy);
+
+ IRBuilder<> Builder(BB, BB->getFirstInsertionPt());
+
+ Value *PtrName;
+ StringRef Tmp = Ptr->getName();
+ if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
+ Tmp + "_STR");
+ PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), Tmp);
+ }
+
+ Value *S;
+ std::string SeqStr = SequenceToString(Seq);
+ if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
+ SeqStr + "_STR");
+ S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), SeqStr);
+ }
+
+ Builder.CreateCall2(Callee, PtrName, S);
+}
+
+/// Add to the end of the basic block llvm.ptr.annotations which show the state
+/// of the pointer at the bottom of the basic block.
+static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
+ Value *Ptr, Sequence Seq) {
+ Module *M = BB->getParent()->getParent();
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+ Type *Params[] = {I8XX, I8XX};
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
+ ArrayRef<Type*>(Params, 2),
+ /*isVarArg=*/false);
+ Constant *Callee = M->getOrInsertFunction(Name, FTy);
+
+ IRBuilder<> Builder(BB, llvm::prior(BB->end()));
+
+ Value *PtrName;
+ StringRef Tmp = Ptr->getName();
+ if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
+ Tmp + "_STR");
+ PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), Tmp);
+ }
+
+ Value *S;
+ std::string SeqStr = SequenceToString(Seq);
+ if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+ Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
+ SeqStr + "_STR");
+ S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+ cast<Constant>(ActualPtrName), SeqStr);
+ }
+ Builder.CreateCall2(Callee, PtrName, S);
+}
+
+/// Adds a source annotation to pointer and a state change annotation to Inst
+/// referencing the source annotation and the old/new state of pointer.
+static void GenerateARCAnnotation(unsigned InstMDId,
+ unsigned PtrMDId,
+ Instruction *Inst,
+ Value *Ptr,
+ Sequence OldSeq,
+ Sequence NewSeq) {
+ if (EnableARCAnnotations) {
+ // First generate the source annotation on our pointer. This will return an
+ // MDString* if Ptr actually comes from an instruction implying we can put
+ // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
+ // then we know that our pointer is from an Argument so we put a reference
+ // to the argument number.
+ //
+ // The point of this is to make it easy for the
+ // llvm-arc-annotation-processor tool to cross reference where the source
+ // pointer is in the LLVM IR since the LLVM IR parser does not submit such
+ // information via debug info for backends to use (since why would anyone
+ // need such a thing from LLVM IR besides in non standard cases
+ // [i.e. this]).
+ MDString *SourcePtrMDNode =
+ AppendMDNodeToSourcePtr(PtrMDId, Ptr);
+ AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq,
+ NewSeq);
+ }
+}
+
+// The actual interface for accessing the above functionality is defined via
+// some simple macros which are defined below. We do this so that the user does
+// not need to pass in what metadata id is needed resulting in cleaner code and
+// additionally since it provides an easy way to conditionally no-op all
+// annotation support in a non-debug build.
+
+/// Use this macro to annotate a sequence state change when processing
+/// instructions bottom up,
+#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) \
+ GenerateARCAnnotation(ARCAnnotationBottomUpMDKind, \
+ ARCAnnotationProvenanceSourceMDKind, (inst), \
+ const_cast<Value*>(ptr), (old), (new))
+/// Use this macro to annotate a sequence state change when processing
+/// instructions top down.
+#define ANNOTATE_TOPDOWN(inst, ptr, old, new) \
+ GenerateARCAnnotation(ARCAnnotationTopDownMDKind, \
+ ARCAnnotationProvenanceSourceMDKind, (inst), \
+ const_cast<Value*>(ptr), (old), (new))
+
+#else // !ARC_ANNOTATION
+// If annotations are off, noop.
+#define ANNOTATE_BOTTOMUP(inst, ptr, old, new)
+#define ANNOTATE_TOPDOWN(inst, ptr, old, new)
+#endif // !ARC_ANNOTATION
+
namespace {
/// \brief The main ARC optimization pass.
class ObjCARCOpt : public FunctionPass {
@@ -738,6 +953,15 @@ namespace {
/// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
unsigned NoObjCARCExceptionsMDKind;
+#ifdef ARC_ANNOTATIONS
+ /// The Metadata Kind for llvm.arc.annotation.bottomup metadata.
+ unsigned ARCAnnotationBottomUpMDKind;
+ /// The Metadata Kind for llvm.arc.annotation.topdown metadata.
+ unsigned ARCAnnotationTopDownMDKind;
+ /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata.
+ unsigned ARCAnnotationProvenanceSourceMDKind;
+#endif // ARC_ANNOATIONS
+
Constant *getRetainRVCallee(Module *M);
Constant *getAutoreleaseRVCallee(Module *M);
Constant *getReleaseCallee(Module *M);
@@ -751,6 +975,8 @@ namespace {
bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
InstructionClass &Class);
+ bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock,
+ InstructionClass &Class);
void OptimizeIndividualCalls(Function &F);
void CheckForCFGHazards(const BasicBlock *BB,
@@ -958,7 +1184,7 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
// Check that the call is next to the retain.
BasicBlock::const_iterator I = Call;
++I;
- while (isNoopInstruction(I)) ++I;
+ while (IsNoopInstruction(I)) ++I;
if (&*I != Retain)
return;
@@ -990,14 +1216,14 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
if (Call->getParent() == RetainRV->getParent()) {
BasicBlock::const_iterator I = Call;
++I;
- while (isNoopInstruction(I)) ++I;
+ while (IsNoopInstruction(I)) ++I;
if (&*I == RetainRV)
return false;
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
BasicBlock *RetainRVParent = RetainRV->getParent();
if (II->getNormalDest() == RetainRVParent) {
BasicBlock::const_iterator I = RetainRVParent->begin();
- while (isNoopInstruction(I)) ++I;
+ while (IsNoopInstruction(I)) ++I;
if (&*I == RetainRV)
return false;
}
@@ -1008,7 +1234,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
// pointer. In this case, we can delete the pair.
BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
if (I != Begin) {
- do --I; while (I != Begin && isNoopInstruction(I));
+ do --I; while (I != Begin && IsNoopInstruction(I));
if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
GetObjCArg(I) == Arg) {
Changed = true;
@@ -1084,6 +1310,35 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
}
+// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain
+// calls.
+//
+// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and
+// does not escape (following the rules of block escaping), strength reduce the
+// objc_retainBlock to an objc_retain.
+//
+// TODO: If an objc_retainBlock call is dominated period by a previous
+// objc_retainBlock call, strength reduce the objc_retainBlock to an
+// objc_retain.
+bool
+ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
+ InstructionClass &Class) {
+ assert(GetBasicInstructionClass(Inst) == Class);
+ assert(IC_RetainBlock == Class);
+
+ // If we can not optimize Inst, return false.
+ if (!IsRetainBlockOptimizable(Inst))
+ return false;
+
+ CallInst *RetainBlock = cast<CallInst>(Inst);
+ RetainBlock->setCalledFunction(getRetainCallee(F.getParent()));
+ // Remove copy_on_escape metadata.
+ RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
+ Class = IC_Retain;
+
+ return true;
+}
+
/// Visit each call, one at a time, and make simplifications without doing any
/// additional analysis.
void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
@@ -1125,7 +1380,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_InitWeak:
case IC_DestroyWeak: {
CallInst *CI = cast<CallInst>(Inst);
- if (isNullOrUndef(CI->getArgOperand(0))) {
+ if (IsNullOrUndef(CI->getArgOperand(0))) {
Changed = true;
Type *Ty = CI->getArgOperand(0)->getType();
new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
@@ -1146,8 +1401,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_CopyWeak:
case IC_MoveWeak: {
CallInst *CI = cast<CallInst>(Inst);
- if (isNullOrUndef(CI->getArgOperand(0)) ||
- isNullOrUndef(CI->getArgOperand(1))) {
+ if (IsNullOrUndef(CI->getArgOperand(0)) ||
+ IsNullOrUndef(CI->getArgOperand(1))) {
Changed = true;
Type *Ty = CI->getArgOperand(0)->getType();
new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
@@ -1167,6 +1422,12 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
}
break;
}
+ case IC_RetainBlock:
+ // If we strength reduce an objc_retainBlock to amn objc_retain, continue
+ // onto the objc_retain peephole optimizations. Otherwise break.
+ if (!OptimizeRetainBlockCall(F, Inst, Class))
+ break;
+ // FALLTHROUGH
case IC_Retain:
OptimizeRetainCall(F, Inst);
break;
@@ -1245,7 +1506,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
const Value *Arg = GetObjCArg(Inst);
// ARC calls with null are no-ops. Delete them.
- if (isNullOrUndef(Arg)) {
+ if (IsNullOrUndef(Arg)) {
Changed = true;
++NumNoops;
DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with "
@@ -1280,7 +1541,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *Incoming =
StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
- if (isNullOrUndef(Incoming))
+ if (IsNullOrUndef(Incoming))
HasNull = true;
else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
.getNumSuccessors() != 1) {
@@ -1334,7 +1595,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *Incoming =
StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
- if (!isNullOrUndef(Incoming)) {
+ if (!IsNullOrUndef(Incoming)) {
CallInst *Clone = cast<CallInst>(CInst->clone());
Value *Op = PN->getIncomingValue(i);
Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
@@ -1502,21 +1763,21 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
}
MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
- S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
+ Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
+ ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq);
+ S.ResetSequenceProgress(NewSeq);
S.RRI.ReleaseMetadata = ReleaseMetadata;
- S.RRI.KnownSafe = S.IsKnownIncremented();
+ S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
-
S.SetKnownPositiveRefCount();
break;
}
case IC_RetainBlock:
- // An objc_retainBlock call with just a use may need to be kept,
- // because it may be copying a block from the stack to the heap.
- if (!IsRetainBlockOptimizable(Inst))
- break;
- // FALLTHROUGH
+ // In OptimizeIndividualCalls, we have strength reduced all optimizable
+ // objc_retainBlocks to objc_retains. Thus at this point any
+ // objc_retainBlocks that we see are not optimizable.
+ break;
case IC_Retain:
case IC_RetainRV: {
Arg = GetObjCArg(Inst);
@@ -1524,7 +1785,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
PtrState &S = MyStates.getPtrBottomUpState(Arg);
S.SetKnownPositiveRefCount();
- switch (S.GetSeq()) {
+ Sequence OldSeq = S.GetSeq();
+ switch (OldSeq) {
case S_Stop:
case S_Release:
case S_MovableRelease:
@@ -1534,10 +1796,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case S_CanRelease:
// Don't do retain+release tracking for IC_RetainRV, because it's
// better to let it remain as the first instruction after a call.
- if (Class != IC_RetainRV) {
- S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ if (Class != IC_RetainRV)
Retains[Inst] = S.RRI;
- }
S.ClearSequenceProgress();
break;
case S_None:
@@ -1545,6 +1805,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case S_Retain:
llvm_unreachable("bottom-up pointer in retain state!");
}
+ ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
return NestingDetected;
}
case IC_AutoreleasepoolPop:
@@ -1571,10 +1832,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.ClearRefCount();
+ S.ClearKnownPositiveRefCount();
switch (Seq) {
case S_Use:
S.SetSeq(S_CanRelease);
+ ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq());
continue;
case S_CanRelease:
case S_Release:
@@ -1601,10 +1863,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
else
S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
S.SetSeq(S_Use);
- } else if (Seq == S_Release &&
- (Class == IC_User || Class == IC_CallOrUser)) {
+ ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
+ } else if (Seq == S_Release && IsUser(Class)) {
// Non-movable releases depend on any possible objc pointer use.
S.SetSeq(S_Stop);
+ ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
assert(S.RRI.ReverseInsertPts.empty());
// As above; handle invoke specially.
if (isa<InvokeInst>(Inst))
@@ -1614,8 +1877,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
}
break;
case S_Stop:
- if (CanUse(Inst, Ptr, PA, Class))
+ if (CanUse(Inst, Ptr, PA, Class)) {
S.SetSeq(S_Use);
+ ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
+ }
break;
case S_CanRelease:
case S_Use:
@@ -1654,6 +1919,21 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
}
}
+#ifdef ARC_ANNOTATIONS
+ if (EnableARCAnnotations) {
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // bottom of the basic block.
+ for(BBState::ptr_const_iterator I = MyStates.bottom_up_ptr_begin(),
+ E = MyStates.bottom_up_ptr_end(); I != E; ++I) {
+ Value *Ptr = const_cast<Value*>(I->first);
+ Sequence Seq = I->second.GetSeq();
+ GenerateARCBBTerminatorAnnotation("llvm.arc.annotation.bottomup.bbend",
+ BB, Ptr, Seq);
+ }
+ }
+#endif
+
+
// Visit all the instructions, bottom-up.
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
Instruction *Inst = llvm::prior(I);
@@ -1677,6 +1957,20 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
}
+#ifdef ARC_ANNOTATIONS
+ if (EnableARCAnnotations) {
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // top of the basic block.
+ for(BBState::ptr_const_iterator I = MyStates.bottom_up_ptr_begin(),
+ E = MyStates.bottom_up_ptr_end(); I != E; ++I) {
+ Value *Ptr = const_cast<Value*>(I->first);
+ Sequence Seq = I->second.GetSeq();
+ GenerateARCBBEntranceAnnotation("llvm.arc.annotation.bottomup.bbstart",
+ BB, Ptr, Seq);
+ }
+ }
+#endif
+
return NestingDetected;
}
@@ -1690,11 +1984,10 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
switch (Class) {
case IC_RetainBlock:
- // An objc_retainBlock call with just a use may need to be kept,
- // because it may be copying a block from the stack to the heap.
- if (!IsRetainBlockOptimizable(Inst))
- break;
- // FALLTHROUGH
+ // In OptimizeIndividualCalls, we have strength reduced all optimizable
+ // objc_retainBlocks to objc_retains. Thus at this point any
+ // objc_retainBlocks that we see are not optimizable.
+ break;
case IC_Retain:
case IC_RetainRV: {
Arg = GetObjCArg(Inst);
@@ -1714,9 +2007,9 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
if (S.GetSeq() == S_Retain)
NestingDetected = true;
+ ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain);
S.ResetSequenceProgress(S_Retain);
- S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- S.RRI.KnownSafe = S.IsKnownIncremented();
+ S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
S.RRI.Calls.insert(Inst);
}
@@ -1730,7 +2023,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
Arg = GetObjCArg(Inst);
PtrState &S = MyStates.getPtrTopDownState(Arg);
- S.ClearRefCount();
+ S.ClearKnownPositiveRefCount();
switch (S.GetSeq()) {
case S_Retain:
@@ -1741,6 +2034,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
Releases[Inst] = S.RRI;
+ ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
S.ClearSequenceProgress();
break;
case S_None:
@@ -1776,10 +2070,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
// Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.ClearRefCount();
+ S.ClearKnownPositiveRefCount();
switch (Seq) {
case S_Retain:
S.SetSeq(S_CanRelease);
+ ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease);
assert(S.RRI.ReverseInsertPts.empty());
S.RRI.ReverseInsertPts.insert(Inst);
@@ -1801,8 +2096,10 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
// Check for possible direct uses.
switch (Seq) {
case S_CanRelease:
- if (CanUse(Inst, Ptr, PA, Class))
+ if (CanUse(Inst, Ptr, PA, Class)) {
S.SetSeq(S_Use);
+ ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
+ }
break;
case S_Retain:
case S_Use:
@@ -1843,6 +2140,20 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
}
}
+#ifdef ARC_ANNOTATIONS
+ if (EnableARCAnnotations) {
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // top of the basic block.
+ for(BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(),
+ E = MyStates.top_down_ptr_end(); I != E; ++I) {
+ Value *Ptr = const_cast<Value*>(I->first);
+ Sequence Seq = I->second.GetSeq();
+ GenerateARCBBEntranceAnnotation("llvm.arc.annotation.topdown.bbstart",
+ BB, Ptr, Seq);
+ }
+ }
+#endif
+
// Visit all the instructions, top-down.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
Instruction *Inst = I;
@@ -1852,6 +2163,20 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
}
+#ifdef ARC_ANNOTATIONS
+ if (EnableARCAnnotations) {
+ // If ARC Annotations are enabled, output the current state of pointers at the
+ // bottom of the basic block.
+ for(BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(),
+ E = MyStates.top_down_ptr_end(); I != E; ++I) {
+ Value *Ptr = const_cast<Value*>(I->first);
+ Sequence Seq = I->second.GetSeq();
+ GenerateARCBBTerminatorAnnotation("llvm.arc.annotation.topdown.bbend",
+ BB, Ptr, Seq);
+ }
+ }
+#endif
+
CheckForCFGHazards(BB, BBStates, MyStates);
return NestingDetected;
}
@@ -1991,15 +2316,9 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
CallInst *Call =
- CallInst::Create(RetainsToMove.IsRetainBlock ?
- getRetainBlockCallee(M) : getRetainCallee(M),
- MyArg, "", InsertPt);
+ CallInst::Create(getRetainCallee(M), MyArg, "", InsertPt);
Call->setDoesNotThrow();
- if (RetainsToMove.IsRetainBlock)
- Call->setMetadata(CopyOnEscapeMDKind,
- MDNode::get(M->getContext(), ArrayRef<Value *>()));
- else
- Call->setTailCall();
+ Call->setTailCall();
DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call
<< "\n"
@@ -2075,7 +2394,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
unsigned OldCount = 0;
unsigned NewCount = 0;
bool FirstRelease = true;
- bool FirstRetain = true;
for (;;) {
for (SmallVectorImpl<Instruction *>::const_iterator
NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
@@ -2156,16 +2474,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
OldDelta += PathCount;
OldCount += PathCount;
- // Merge the IsRetainBlock values.
- if (FirstRetain) {
- RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
- FirstRetain = false;
- } else if (ReleasesToMove.IsRetainBlock !=
- NewReleaseRetainRRI.IsRetainBlock)
- // It's not possible to merge the sequences if one uses
- // objc_retain and the other uses objc_retainBlock.
- return false;
-
// Collect the optimal insertion points.
if (!KnownSafe)
for (SmallPtrSet<Instruction *, 2>::const_iterator
@@ -2271,6 +2579,12 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
ReleasesToMove, Arg, KnownSafe,
AnyPairsCompletelyEliminated);
+#ifdef ARC_ANNOTATIONS
+ // Do not move calls if ARC annotations are requested. If we were to move
+ // calls in this case, we would not be able
+ PerformMoveCalls = PerformMoveCalls && !EnableARCAnnotations;
+#endif // ARC_ANNOTATIONS
+
if (PerformMoveCalls) {
// Ok, everything checks out and we're all set. Let's move/delete some
// code!
@@ -2392,6 +2706,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
goto clobbered;
case IC_AutoreleasepoolPush:
case IC_None:
+ case IC_IntrinsicUser:
case IC_User:
// Weak pointers are only modified through the weak entry points
// (and arbitrary calls, which could call the weak entry points).
@@ -2617,6 +2932,14 @@ bool ObjCARCOpt::doInitialization(Module &M) {
M.getContext().getMDKindID("clang.arc.copy_on_escape");
NoObjCARCExceptionsMDKind =
M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
+#ifdef ARC_ANNOTATIONS
+ ARCAnnotationBottomUpMDKind =
+ M.getContext().getMDKindID("llvm.arc.annotation.bottomup");
+ ARCAnnotationTopDownMDKind =
+ M.getContext().getMDKindID("llvm.arc.annotation.topdown");
+ ARCAnnotationProvenanceSourceMDKind =
+ M.getContext().getMDKindID("llvm.arc.annotation.provenancesource");
+#endif // ARC_ANNOTATIONS
// Intuitively, objc_retain and others are nocapture, however in practice
// they are not, because they return their argument value. And objc_release
diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
index a841c64a9f..03e12d4fd7 100644
--- a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
@@ -72,6 +72,8 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
return OS << "IC_Call";
case IC_User:
return OS << "IC_User";
+ case IC_IntrinsicUser:
+ return OS << "IC_IntrinsicUser";
case IC_None:
return OS << "IC_None";
}
@@ -81,10 +83,11 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- // No arguments.
+ // No (mandatory) arguments.
if (AI == AE)
return StringSwitch<InstructionClass>(F->getName())
.Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush)
+ .Case("clang.arc.use", IC_IntrinsicUser)
.Default(IC_CallOrUser);
// One argument.
@@ -142,6 +145,14 @@ InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
return StringSwitch<InstructionClass>(F->getName())
.Case("objc_moveWeak", IC_MoveWeak)
.Case("objc_copyWeak", IC_CopyWeak)
+ // Ignore annotation calls. This is important to stop the
+ // optimizer from treating annotations as uses which would
+ // make the state of the pointers they are attempting to
+ // elucidate to be incorrect.
+ .Case("llvm.arc.annotation.topdown.bbstart", IC_None)
+ .Case("llvm.arc.annotation.topdown.bbend", IC_None)
+ .Case("llvm.arc.annotation.bottomup.bbstart", IC_None)
+ .Case("llvm.arc.annotation.bottomup.bbend", IC_None)
.Default(IC_CallOrUser);
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c04b447f1c..129af8d45d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1714,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
-static void patchReplacementInstruction(Value *Repl, Instruction *I) {
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
@@ -1756,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) {
}
}
-static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) {
- patchReplacementInstruction(Repl, I);
+static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
+ patchReplacementInstruction(I, Repl);
I->replaceAllUsesWith(Repl);
}
@@ -1919,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) {
}
// Remove it!
- patchAndReplaceAllUsesWith(AvailableVal, L);
+ patchAndReplaceAllUsesWith(L, AvailableVal);
if (DepLI->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
markInstructionForDeletion(L);
@@ -2260,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) {
}
// Remove it!
- patchAndReplaceAllUsesWith(repl, I);
+ patchAndReplaceAllUsesWith(I, repl);
if (MD && repl->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
markInstructionForDeletion(I);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 1601a8d646..5d02c68a7a 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -53,6 +53,7 @@
#define DEBUG_TYPE "global-merge"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -64,10 +65,16 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
+
STATISTIC(NumMerged , "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
@@ -78,6 +85,23 @@ namespace {
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
+ /// \brief Check if the given variable has been identified as must keep
+ /// \pre setMustKeepGlobalVariables must have been called on the Module that
+ /// contains GV
+ bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+ return MustKeepGlobalVariables.count(GV);
+ }
+
+ /// Collect every variables marked as "used" or used in a landing pad
+ /// instruction for this Module.
+ void setMustKeepGlobalVariables(Module &M);
+
+ /// Collect every variables marked as "used"
+ void collectUsedGlobalVariables(Module &M);
+
+ /// Keep track of the GlobalVariable that must not be merged away
+ SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
public:
static char ID; // Pass identification, replacement for typeid.
explicit GlobalMerge(const TargetLowering *tli = 0)
@@ -87,6 +111,7 @@ namespace {
virtual bool doInitialization(Module &M);
virtual bool runOnFunction(Function &F);
+ virtual bool doFinalization(Module &M);
const char *getPassName() const {
return "Merge internal globals";
@@ -169,6 +194,43 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
return true;
}
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+ // Extract global variables from llvm.used array
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const GlobalVariable *G =
+ dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+ collectUsedGlobalVariables(M);
+
+ for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+ ++IFn) {
+ for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+ IBB != IEndBB; ++IBB) {
+ // Follow the inwoke link to find the landing pad instruction
+ const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+ if (!II) continue;
+
+ const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+ // Look for globals in the clauses of the landing pad instruction
+ for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+ Idx != NumClauses; ++Idx)
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+ ->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
+}
bool GlobalMerge::doInitialization(Module &M) {
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
@@ -176,6 +238,7 @@ bool GlobalMerge::doInitialization(Module &M) {
const DataLayout *TD = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
+ setMustKeepGlobalVariables(M);
// Grab all non-const globals.
for (Module::global_iterator I = M.global_begin(),
@@ -200,6 +263,10 @@ bool GlobalMerge::doInitialization(Module &M) {
I->getName().startswith(".llvm."))
continue;
+ // Ignore all "required" globals:
+ if (isMustKeepGlobalVariable(I))
+ continue;
+
if (TD->getTypeAllocSize(Ty) < MaxOffset) {
if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
.isBSSLocal())
@@ -221,11 +288,11 @@ bool GlobalMerge::doInitialization(Module &M) {
if (I->second.size() > 1)
Changed |= doMerge(I->second, M, false, I->first);
- // FIXME: This currently breaks the EH processing due to way how the
- // typeinfo detection works. We might want to detect the TIs and ignore
- // them in the future.
- // if (ConstGlobals.size() > 1)
- // Changed |= doMerge(ConstGlobals, M, true);
+ if (EnableGlobalMergeOnConst)
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, true, I->first);
return Changed;
}
@@ -234,6 +301,11 @@ bool GlobalMerge::runOnFunction(Function &F) {
return false;
}
+bool GlobalMerge::doFinalization(Module &M) {
+ MustKeepGlobalVariables.clear();
+ return false;
+}
+
Pass *llvm::createGlobalMergePass(const TargetLowering *tli) {
return new GlobalMerge(tli);
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 97fff7e782..8e76c78f5a 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -535,6 +535,45 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
if (!SE->isLoopInvariant(ExitValue, L))
continue;
+ // Computing the value outside of the loop brings no benefit if :
+ // - it is definitely used inside the loop in a way which can not be
+ // optimized away.
+ // - no use outside of the loop can take advantage of hoisting the
+ // computation out of the loop
+ if (ExitValue->getSCEVType()>=scMulExpr) {
+ unsigned NumHardInternalUses = 0;
+ unsigned NumSoftExternalUses = 0;
+ unsigned NumUses = 0;
+ for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end();
+ IB!=IE && NumUses<=6 ; ++IB) {
+ Instruction *UseInstr = cast<Instruction>(*IB);
+ unsigned Opc = UseInstr->getOpcode();
+ NumUses++;
+ if (L->contains(UseInstr)) {
+ if (Opc == Instruction::Call || Opc == Instruction::Ret)
+ NumHardInternalUses++;
+ } else {
+ if (Opc == Instruction::PHI) {
+ // Do not count the Phi as a use. LCSSA may have inserted
+ // plenty of trivial ones.
+ NumUses--;
+ for (Value::use_iterator PB=UseInstr->use_begin(),
+ PE=UseInstr->use_end();
+ PB!=PE && NumUses<=6 ; ++PB, ++NumUses) {
+ unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
+ if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
+ NumSoftExternalUses++;
+ }
+ continue;
+ }
+ if (Opc != Instruction::Call && Opc != Instruction::Ret)
+ NumSoftExternalUses++;
+ }
+ }
+ if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
+ continue;
+ }
+
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 9c67e327e2..0b62050b17 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -34,13 +34,9 @@ namespace {
}
// Possibly eliminate loop L if it is dead.
- bool runOnLoop(Loop* L, LPPassManager& LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
- bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
- SmallVector<BasicBlock*, 4>& exitBlocks,
- bool &Changed, BasicBlock *Preheader);
-
- virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
@@ -53,6 +49,12 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
}
+
+ private:
+ bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks,
+ SmallVector<BasicBlock*, 4> &exitBlocks,
+ bool &Changed, BasicBlock *Preheader);
+
};
}
@@ -67,18 +69,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
-Pass* llvm::createLoopDeletionPass() {
+Pass *llvm::createLoopDeletionPass() {
return new LoopDeletion();
}
-/// IsLoopDead - Determined if a loop is dead. This assumes that we've already
+/// isLoopDead - Determined if a loop is dead. This assumes that we've already
/// checked for unique exit and exiting blocks, and that the code is in LCSSA
/// form.
-bool LoopDeletion::IsLoopDead(Loop* L,
- SmallVector<BasicBlock*, 4>& exitingBlocks,
- SmallVector<BasicBlock*, 4>& exitBlocks,
+bool LoopDeletion::isLoopDead(Loop *L,
+ SmallVector<BasicBlock*, 4> &exitingBlocks,
+ SmallVector<BasicBlock*, 4> &exitBlocks,
bool &Changed, BasicBlock *Preheader) {
- BasicBlock* exitBlock = exitBlocks[0];
+ BasicBlock *exitBlock = exitBlocks[0];
// Make sure that all PHI entries coming from the loop are loop invariant.
// Because the code is in LCSSA form, any values used outside of the loop
@@ -86,19 +88,19 @@ bool LoopDeletion::IsLoopDead(Loop* L,
// sufficient to guarantee that no loop-variant values are used outside
// of the loop.
BasicBlock::iterator BI = exitBlock->begin();
- while (PHINode* P = dyn_cast<PHINode>(BI)) {
- Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
// Make sure all exiting blocks produce the same incoming value for the exit
// block. If there are different incoming values for different exiting
// blocks, then it is impossible to statically determine which value should
// be used.
- for (unsigned i = 1; i < exitingBlocks.size(); ++i) {
+ for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) {
if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
return false;
}
- if (Instruction* I = dyn_cast<Instruction>(incoming))
+ if (Instruction *I = dyn_cast<Instruction>(incoming))
if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
return false;
@@ -127,10 +129,10 @@ bool LoopDeletion::IsLoopDead(Loop* L,
/// so could change the halting/non-halting nature of a program.
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
-bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
// We can only remove the loop if there is a preheader that we can
// branch from after removing it.
- BasicBlock* preheader = L->getLoopPreheader();
+ BasicBlock *preheader = L->getLoopPreheader();
if (!preheader)
return false;
@@ -158,19 +160,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Finally, we have to check that the loop really is dead.
bool Changed = false;
- if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+ if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
return Changed;
// Don't remove loops for which we can't solve the trip count.
// They could be infinite, in which case we'd be changing program behavior.
- ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
const SCEV *S = SE.getMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(S))
return Changed;
// Now that we know the removal is safe, remove the loop by changing the
// branch from the preheader to go to the single exit block.
- BasicBlock* exitBlock = exitBlocks[0];
+ BasicBlock *exitBlock = exitBlocks[0];
// Because we're deleting a large chunk of code at once, the sequence in which
// we remove things is very important to avoid invalidation issues. Don't
@@ -182,14 +184,14 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
SE.forgetLoop(L);
// Connect the preheader directly to the exit block.
- TerminatorInst* TI = preheader->getTerminator();
+ TerminatorInst *TI = preheader->getTerminator();
TI->replaceUsesOfWith(L->getHeader(), exitBlock);
// Rewrite phis in the exit block to get their inputs from
// the preheader instead of the exiting block.
- BasicBlock* exitingBlock = exitingBlocks[0];
+ BasicBlock *exitingBlock = exitingBlocks[0];
BasicBlock::iterator BI = exitBlock->begin();
- while (PHINode* P = dyn_cast<PHINode>(BI)) {
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
int j = P->getBasicBlockIndex(exitingBlock);
assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
P->setIncomingBlock(j, preheader);
@@ -200,7 +202,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
- DominatorTree& DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTree>();
SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
@@ -230,7 +232,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Finally, the blocks from loopinfo. This has to happen late because
// otherwise our loop iterators won't work.
- LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+ LoopInfo &loopInfo = getAnalysis<LoopInfo>();
SmallPtrSet<BasicBlock*, 8> blocks;
blocks.insert(L->block_begin(), L->block_end());
for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4e4cb86464..73e44d7edf 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -895,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
}
if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
- if (isLoser())
+ if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
}
@@ -1895,15 +1895,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
- if (isa<ConstantInt>(BO->getOperand(1)) &&
- cast<ConstantInt>(BO->getOperand(1))->isOne() &&
- SE.getSCEV(BO->getOperand(0)) == MaxRHS)
- NewRHS = BO->getOperand(0);
+ if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
- if (isa<ConstantInt>(BO->getOperand(1)) &&
- cast<ConstantInt>(BO->getOperand(1))->isOne() &&
- SE.getSCEV(BO->getOperand(0)) == MaxRHS)
- NewRHS = BO->getOperand(0);
+ if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+ if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+ NewRHS = BO->getOperand(0);
if (!NewRHS)
return Cond;
} else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
@@ -2716,6 +2714,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// by LSR.
const IVInc &Head = Chain.Incs[0];
User::op_iterator IVOpEnd = Head.UserInst->op_end();
+ // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
Value *IVSrc = 0;
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 0da3746950..1f343136e5 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -110,6 +110,51 @@ namespace {
}
};
};
+
+ /// Utility class representing a non-constant Xor-operand. We classify
+ /// non-constant Xor-Operands into two categories:
+ /// C1) The operand is in the form "X & C", where C is a constant and C != ~0
+ /// C2)
+ /// C2.1) The operand is in the form of "X | C", where C is a non-zero
+ /// constant.
+ /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
+ /// operand as "E | 0"
+ class XorOpnd {
+ public:
+ XorOpnd(Value *V);
+ const XorOpnd &operator=(const XorOpnd &That);
+
+ bool isInvalid() const { return SymbolicPart == 0; }
+ bool isOrExpr() const { return isOr; }
+ Value *getValue() const { return OrigVal; }
+ Value *getSymbolicPart() const { return SymbolicPart; }
+ unsigned getSymbolicRank() const { return SymbolicRank; }
+ const APInt &getConstPart() const { return ConstPart; }
+
+ void Invalidate() { SymbolicPart = OrigVal = 0; }
+ void setSymbolicRank(unsigned R) { SymbolicRank = R; }
+
+ // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank.
+ // The purpose is twofold:
+ // 1) Cluster together the operands sharing the same symbolic-value.
+ // 2) Operand having smaller symbolic-value-rank is permuted earlier, which
+ // could potentially shorten crital path, and expose more loop-invariants.
+ // Note that values' rank are basically defined in RPO order (FIXME).
+ // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
+ // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
+ // "z" in the order of X-Y-Z is better than any other orders.
+ struct PtrSortFunctor {
+ bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) {
+ return LHS->getSymbolicRank() < RHS->getSymbolicRank();
+ }
+ };
+ private:
+ Value *OrigVal;
+ Value *SymbolicPart;
+ APInt ConstPart;
+ unsigned SymbolicRank;
+ bool isOr;
+ };
}
namespace {
@@ -137,6 +182,11 @@ namespace {
Value *OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops);
Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+ Value *OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+ bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd,
+ Value *&Res);
+ bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
+ APInt &ConstOpnd, Value *&Res);
bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
SmallVectorImpl<Factor> &Factors);
Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder,
@@ -148,6 +198,42 @@ namespace {
};
}
+XorOpnd::XorOpnd(Value *V) {
+ assert(!isa<ConstantInt>(V) && "No ConstantInt");
+ OrigVal = V;
+ Instruction *I = dyn_cast<Instruction>(V);
+ SymbolicRank = 0;
+
+ if (I && (I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::And)) {
+ Value *V0 = I->getOperand(0);
+ Value *V1 = I->getOperand(1);
+ if (isa<ConstantInt>(V0))
+ std::swap(V0, V1);
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) {
+ ConstPart = C->getValue();
+ SymbolicPart = V0;
+ isOr = (I->getOpcode() == Instruction::Or);
+ return;
+ }
+ }
+
+ // view the operand as "V | 0"
+ SymbolicPart = V;
+ ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth());
+ isOr = true;
+}
+
+const XorOpnd &XorOpnd::operator=(const XorOpnd &That) {
+ OrigVal = That.OrigVal;
+ SymbolicPart = That.SymbolicPart;
+ ConstPart = That.ConstPart;
+ SymbolicRank = That.SymbolicRank;
+ isOr = That.isOr;
+ return *this;
+}
+
char Reassociate::ID = 0;
INITIALIZE_PASS(Reassociate, "reassociate",
"Reassociate expressions", false, false)
@@ -1040,6 +1126,240 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
return 0;
}
+/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
+/// instruction with the given two operands, and return the resulting
+/// instruction. There are two special cases: 1) if the constant operand is 0,
+/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
+/// be returned.
+static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
+ const APInt &ConstOpnd) {
+ if (ConstOpnd != 0) {
+ if (!ConstOpnd.isAllOnesValue()) {
+ LLVMContext &Ctx = Opnd->getType()->getContext();
+ Instruction *I;
+ I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd),
+ "and.ra", InsertBefore);
+ I->setDebugLoc(InsertBefore->getDebugLoc());
+ return I;
+ }
+ return Opnd;
+ }
+ return 0;
+}
+
+// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
+// into "R ^ C", where C would be 0, and R is a symbolic value.
+//
+// If it was successful, true is returned, and the "R" and "C" is returned
+// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
+// and both "Res" and "ConstOpnd" remain unchanged.
+//
+bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
+ APInt &ConstOpnd, Value *&Res) {
+ // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
+ // = ((x | c1) ^ c1) ^ (c1 ^ c2)
+ // = (x & ~c1) ^ (c1 ^ c2)
+ // It is useful only when c1 == c2.
+ if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) {
+ if (!Opnd1->getValue()->hasOneUse())
+ return false;
+
+ const APInt &C1 = Opnd1->getConstPart();
+ if (C1 != ConstOpnd)
+ return false;
+
+ Value *X = Opnd1->getSymbolicPart();
+ Res = createAndInstr(I, X, ~C1);
+ // ConstOpnd was C2, now C1 ^ C2.
+ ConstOpnd ^= C1;
+
+ if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+ RedoInsts.insert(T);
+ return true;
+ }
+ return false;
+}
+
+
+// Helper function of OptimizeXor(). It tries to simplify
+// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
+// symbolic value.
+//
+// If it was successful, true is returned, and the "R" and "C" is returned
+// via "Res" and "ConstOpnd", respectively (If the entire expression is
+// evaluated to a constant, the Res is set to NULL); otherwise, false is
+// returned, and both "Res" and "ConstOpnd" remain unchanged.
+bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
+ APInt &ConstOpnd, Value *&Res) {
+ Value *X = Opnd1->getSymbolicPart();
+ if (X != Opnd2->getSymbolicPart())
+ return false;
+
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
+
+ // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
+ int DeadInstNum = 1;
+ if (Opnd1->getValue()->hasOneUse())
+ DeadInstNum++;
+ if (Opnd2->getValue()->hasOneUse())
+ DeadInstNum++;
+
+ // Xor-Rule 2:
+ // (x | c1) ^ (x & c2)
+ // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
+ // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
+ // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
+ //
+ if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
+ if (Opnd2->isOrExpr())
+ std::swap(Opnd1, Opnd2);
+
+ APInt C3((~C1) ^ C2);
+
+ // Do not increase code size!
+ if (C3 != 0 && !C3.isAllOnesValue()) {
+ int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+ if (NewInstNum > DeadInstNum)
+ return false;
+ }
+
+ Res = createAndInstr(I, X, C3);
+ ConstOpnd ^= C1;
+
+ } else if (Opnd1->isOrExpr()) {
+ // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
+ //
+ APInt C3 = C1 ^ C2;
+
+ // Do not increase code size
+ if (C3 != 0 && !C3.isAllOnesValue()) {
+ int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+ if (NewInstNum > DeadInstNum)
+ return false;
+ }
+
+ Res = createAndInstr(I, X, C3);
+ ConstOpnd ^= C3;
+ } else {
+ // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
+ //
+ APInt C3 = C1 ^ C2;
+ Res = createAndInstr(I, X, C3);
+ }
+
+ // Put the original operands in the Redo list; hope they will be deleted
+ // as dead code.
+ if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+ RedoInsts.insert(T);
+ if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
+ RedoInsts.insert(T);
+
+ return true;
+}
+
+/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
+/// to a single Value, it is returned, otherwise the Ops list is mutated as
+/// necessary.
+Value *Reassociate::OptimizeXor(Instruction *I,
+ SmallVectorImpl<ValueEntry> &Ops) {
+ if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
+ return V;
+
+ if (Ops.size() == 1)
+ return 0;
+
+ SmallVector<XorOpnd, 8> Opnds;
+ SmallVector<XorOpnd*, 8> OpndPtrs;
+ Type *Ty = Ops[0].Op->getType();
+ APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
+
+ // Step 1: Convert ValueEntry to XorOpnd
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ Value *V = Ops[i].Op;
+ if (!isa<ConstantInt>(V)) {
+ XorOpnd O(V);
+ O.setSymbolicRank(getRank(O.getSymbolicPart()));
+ Opnds.push_back(O);
+ OpndPtrs.push_back(&Opnds.back());
+ } else
+ ConstOpnd ^= cast<ConstantInt>(V)->getValue();
+ }
+
+ // Step 2: Sort the Xor-Operands in a way such that the operands containing
+ // the same symbolic value cluster together. For instance, the input operand
+ // sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
+ // ("x | 123", "x & 789", "y & 456").
+ std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
+
+ // Step 3: Combine adjacent operands
+ XorOpnd *PrevOpnd = 0;
+ bool Changed = false;
+ for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
+ XorOpnd *CurrOpnd = OpndPtrs[i];
+ // The combined value
+ Value *CV;
+
+ // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
+ if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+ Changed = true;
+ if (CV)
+ *CurrOpnd = XorOpnd(CV);
+ else {
+ CurrOpnd->Invalidate();
+ continue;
+ }
+ }
+
+ if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
+ PrevOpnd = CurrOpnd;
+ continue;
+ }
+
+ // step 3.2: When previous and current operands share the same symbolic
+ // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
+ //
+ if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
+ // Remove previous operand
+ PrevOpnd->Invalidate();
+ if (CV) {
+ *CurrOpnd = XorOpnd(CV);
+ PrevOpnd = CurrOpnd;
+ } else {
+ CurrOpnd->Invalidate();
+ PrevOpnd = 0;
+ }
+ Changed = true;
+ }
+ }
+
+ // Step 4: Reassemble the Ops
+ if (Changed) {
+ Ops.clear();
+ for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
+ XorOpnd &O = Opnds[i];
+ if (O.isInvalid())
+ continue;
+ ValueEntry VE(getRank(O.getValue()), O.getValue());
+ Ops.push_back(VE);
+ }
+ if (ConstOpnd != 0) {
+ Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd);
+ ValueEntry VE(getRank(C), C);
+ Ops.push_back(VE);
+ }
+ int Sz = Ops.size();
+ if (Sz == 1)
+ return Ops.back().Op;
+ else if (Sz == 0) {
+ assert(ConstOpnd == 0);
+ return ConstantInt::get(Ty->getContext(), ConstOpnd);
+ }
+ }
+
+ return 0;
+}
+
/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This
/// optimizes based on identities. If it can be reduced to a single Value, it
/// is returned, otherwise the Ops list is mutated as necessary.
@@ -1431,11 +1751,15 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
default: break;
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor:
if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
return Result;
break;
+ case Instruction::Xor:
+ if (Value *Result = OptimizeXor(I, Ops))
+ return Result;
+ break;
+
case Instruction::Add:
if (Value *Result = OptimizeAdd(I, Ops))
return Result;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 810a553c74..f6bb365216 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -57,11 +57,15 @@
using namespace llvm;
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
-STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
-STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
+STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
+STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions");
+STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found");
+STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses");
+STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
+STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
-STATISTIC(NumDeleted, "Number of instructions deleted");
-STATISTIC(NumVectorized, "Number of vectorized aggregates");
+STATISTIC(NumDeleted, "Number of instructions deleted");
+STATISTIC(NumVectorized, "Number of vectorized aggregates");
/// Hidden option to force the pass to not use DomTree and mem2reg, instead
/// forming SSA values through the SSAUpdater infrastructure.
@@ -69,112 +73,167 @@ static cl::opt<bool>
ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
namespace {
-/// \brief Alloca partitioning representation.
-///
-/// This class represents a partitioning of an alloca into slices, and
-/// information about the nature of uses of each slice of the alloca. The goal
-/// is that this information is sufficient to decide if and how to split the
-/// alloca apart and replace slices with scalars. It is also intended that this
-/// structure can capture the relevant information needed both to decide about
-/// and to enact these transformations.
-class AllocaPartitioning {
+/// \brief A custom IRBuilder inserter which prefixes all names if they are
+/// preserved.
+template <bool preserveNames = true>
+class IRBuilderPrefixedInserter :
+ public IRBuilderDefaultInserter<preserveNames> {
+ std::string Prefix;
+
public:
- /// \brief A common base class for representing a half-open byte range.
- struct ByteRange {
- /// \brief The beginning offset of the range.
- uint64_t BeginOffset;
+ void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
- /// \brief The ending offset, not included in the range.
- uint64_t EndOffset;
+protected:
+ void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+ BasicBlock::iterator InsertPt) const {
+ IRBuilderDefaultInserter<preserveNames>::InsertHelper(
+ I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
+ }
+};
- ByteRange() : BeginOffset(), EndOffset() {}
- ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
- : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+// Specialization for not preserving the name is trivial.
+template <>
+class IRBuilderPrefixedInserter<false> :
+ public IRBuilderDefaultInserter<false> {
+public:
+ void SetNamePrefix(const Twine &P) {}
+};
- /// \brief Support for ordering ranges.
- ///
- /// This provides an ordering over ranges such that start offsets are
- /// always increasing, and within equal start offsets, the end offsets are
- /// decreasing. Thus the spanning range comes first in a cluster with the
- /// same start position.
- bool operator<(const ByteRange &RHS) const {
- if (BeginOffset < RHS.BeginOffset) return true;
- if (BeginOffset > RHS.BeginOffset) return false;
- if (EndOffset > RHS.EndOffset) return true;
- return false;
- }
+/// \brief Provide a typedef for IRBuilder that drops names in release builds.
+#ifndef NDEBUG
+typedef llvm::IRBuilder<true, ConstantFolder,
+ IRBuilderPrefixedInserter<true> > IRBuilderTy;
+#else
+typedef llvm::IRBuilder<false, ConstantFolder,
+ IRBuilderPrefixedInserter<false> > IRBuilderTy;
+#endif
+}
- /// \brief Support comparison with a single offset to allow binary searches.
- friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
- return LHS.BeginOffset < RHSOffset;
- }
+namespace {
+/// \brief A common base class for representing a half-open byte range.
+struct ByteRange {
+ /// \brief The beginning offset of the range.
+ uint64_t BeginOffset;
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const ByteRange &RHS) {
- return LHSOffset < RHS.BeginOffset;
- }
+ /// \brief The ending offset, not included in the range.
+ uint64_t EndOffset;
- bool operator==(const ByteRange &RHS) const {
- return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
- }
- bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
- };
+ ByteRange() : BeginOffset(), EndOffset() {}
+ ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
- /// \brief A partition of an alloca.
+ /// \brief Support for ordering ranges.
///
- /// This structure represents a contiguous partition of the alloca. These are
- /// formed by examining the uses of the alloca. During formation, they may
- /// overlap but once an AllocaPartitioning is built, the Partitions within it
- /// are all disjoint.
- struct Partition : public ByteRange {
- /// \brief Whether this partition is splittable into smaller partitions.
- ///
- /// We flag partitions as splittable when they are formed entirely due to
- /// accesses by trivially splittable operations such as memset and memcpy.
- bool IsSplittable;
+ /// This provides an ordering over ranges such that start offsets are
+ /// always increasing, and within equal start offsets, the end offsets are
+ /// decreasing. Thus the spanning range comes first in a cluster with the
+ /// same start position.
+ bool operator<(const ByteRange &RHS) const {
+ if (BeginOffset < RHS.BeginOffset) return true;
+ if (BeginOffset > RHS.BeginOffset) return false;
+ if (EndOffset > RHS.EndOffset) return true;
+ return false;
+ }
- /// \brief Test whether a partition has been marked as dead.
- bool isDead() const {
- if (BeginOffset == UINT64_MAX) {
- assert(EndOffset == UINT64_MAX);
- return true;
- }
- return false;
- }
+ /// \brief Support comparison with a single offset to allow binary searches.
+ friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+ return LHS.BeginOffset < RHSOffset;
+ }
+
+ friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+ const ByteRange &RHS) {
+ return LHSOffset < RHS.BeginOffset;
+ }
- /// \brief Kill a partition.
- /// This is accomplished by setting both its beginning and end offset to
- /// the maximum possible value.
- void kill() {
- assert(!isDead() && "He's Dead, Jim!");
- BeginOffset = EndOffset = UINT64_MAX;
+ bool operator==(const ByteRange &RHS) const {
+ return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ }
+ bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+};
+
+/// \brief A partition of an alloca.
+///
+/// This structure represents a contiguous partition of the alloca. These are
+/// formed by examining the uses of the alloca. During formation, they may
+/// overlap but once an AllocaPartitioning is built, the Partitions within it
+/// are all disjoint.
+struct Partition : public ByteRange {
+ /// \brief Whether this partition is splittable into smaller partitions.
+ ///
+ /// We flag partitions as splittable when they are formed entirely due to
+ /// accesses by trivially splittable operations such as memset and memcpy.
+ bool IsSplittable;
+
+ /// \brief Test whether a partition has been marked as dead.
+ bool isDead() const {
+ if (BeginOffset == UINT64_MAX) {
+ assert(EndOffset == UINT64_MAX);
+ return true;
}
+ return false;
+ }
- Partition() : ByteRange(), IsSplittable() {}
- Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
- : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
- };
+ /// \brief Kill a partition.
+ /// This is accomplished by setting both its beginning and end offset to
+ /// the maximum possible value.
+ void kill() {
+ assert(!isDead() && "He's Dead, Jim!");
+ BeginOffset = EndOffset = UINT64_MAX;
+ }
+
+ Partition() : ByteRange(), IsSplittable() {}
+ Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+ : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+};
+
+/// \brief A particular use of a partition of the alloca.
+///
+/// This structure is used to associate uses of a partition with it. They
+/// mark the range of bytes which are referenced by a particular instruction,
+/// and includes a handle to the user itself and the pointer value in use.
+/// The bounds of these uses are determined by intersecting the bounds of the
+/// memory use itself with a particular partition. As a consequence there is
+/// intentionally overlap between various uses of the same partition.
+class PartitionUse : public ByteRange {
+ /// \brief Combined storage for both the Use* and split state.
+ PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
+
+public:
+ PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+ bool IsSplit)
+ : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
- /// \brief A particular use of a partition of the alloca.
+ /// \brief The use in question. Provides access to both user and used value.
///
- /// This structure is used to associate uses of a partition with it. They
- /// mark the range of bytes which are referenced by a particular instruction,
- /// and includes a handle to the user itself and the pointer value in use.
- /// The bounds of these uses are determined by intersecting the bounds of the
- /// memory use itself with a particular partition. As a consequence there is
- /// intentionally overlap between various uses of the same partition.
- struct PartitionUse : public ByteRange {
- /// \brief The use in question. Provides access to both user and used value.
- ///
- /// Note that this may be null if the partition use is *dead*, that is, it
- /// should be ignored.
- Use *U;
+ /// Note that this may be null if the partition use is *dead*, that is, it
+ /// should be ignored.
+ Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
- PartitionUse() : ByteRange(), U() {}
- PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
- : ByteRange(BeginOffset, EndOffset), U(U) {}
- };
+ /// \brief Set the use for this partition use range.
+ void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
+ /// \brief Whether this use is split across multiple partitions.
+ bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
+};
+}
+
+namespace llvm {
+template <> struct isPodLike<Partition> : llvm::true_type {};
+template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+}
+
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
/// \brief Construct a partitioning of a particular alloca.
///
/// Construction does most of the work for partitioning the alloca. This
@@ -456,10 +515,10 @@ private:
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
- // NOTE! This may appear superficially to be something we could ignore
- // entirely, but that is not so! There may be PHI-node uses where some
- // instructions are dead but not others. We can't completely ignore the
- // PHI node, and so have to record at least the information here.
+ // This may appear superficially to be something we could ignore entirely,
+ // but that is not so! There may be widened loads or PHI-node uses where
+ // some instructions are dead but not others. We can't completely ignore
+ // them, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -474,33 +533,17 @@ private:
}
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
- bool IsVolatile) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse. We also try to handle cases which might run the
- // risk of overflow.
- // FIXME: We should instead consider the pointer to have escaped if this
- // function is being instrumented for addressing bugs or race conditions.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size)) {
- DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
- << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
- << " which extends past the end of the " << AllocSize
- << " byte alloca:\n"
- << " alloca: " << P.AI << "\n"
- << " use: " << I << "\n");
- return;
- }
-
+ uint64_t Size, bool IsVolatile) {
// We allow splitting of loads and stores where the type is an integer type
- // and which cover the entire alloca. Such integer loads and stores
- // often require decomposition into fine grained loads and stores.
- bool IsSplittable = false;
- if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+ // and cover the entire alloca. This prevents us from splitting over
+ // eagerly.
+ // FIXME: In the great blue eventually, we should eagerly split all integer
+ // loads and stores, and then have a separate step that merges adjacent
+ // alloca partitions into a single partition suitable for integer widening.
+ // Or we should skip the merge step and rely on GVN and other passes to
+ // merge adjacent loads and stores that survive mem2reg.
+ bool IsSplittable =
+ Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
insertUse(I, Offset, Size, IsSplittable);
}
@@ -512,7 +555,8 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
- return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
void visitStoreInst(StoreInst &SI) {
@@ -522,9 +566,28 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
+ uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse. We also try to handle cases which might run the
+ // risk of overflow.
+ // FIXME: We should instead consider the pointer to have escaped if this
+ // function is being instrumented for addressing bugs or race conditions.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
+ << " which extends past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << SI << "\n");
+ return;
+ }
+
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
- handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+ handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
}
@@ -795,13 +858,14 @@ private:
EndOffset = AllocSize;
// NB: This only works if we have zero overlapping partitions.
- iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
- if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
- B = llvm::prior(B);
- for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
- ++I) {
+ iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
+ if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
+ I = llvm::prior(I);
+ iterator E = P.end();
+ bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
+ for (; I != E && I->BeginOffset < EndOffset; ++I) {
PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
- std::min(I->EndOffset, EndOffset), U);
+ std::min(I->EndOffset, EndOffset), U, IsSplit);
P.use_push_back(I, NewPU);
if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
P.PHIOrSelectOpMap[U]
@@ -809,20 +873,6 @@ private:
}
}
- void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size))
- return markAsDead(I);
-
- insertUse(I, Offset, Size);
- }
-
void visitBitCastInst(BitCastInst &BC) {
if (BC.use_empty())
return markAsDead(BC);
@@ -839,12 +889,23 @@ private:
void visitLoadInst(LoadInst &LI) {
assert(IsOffsetKnown);
- handleLoadOrStore(LI.getType(), LI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ insertUse(LI, Offset, Size);
}
void visitStoreInst(StoreInst &SI) {
assert(IsOffsetKnown);
- handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size))
+ return markAsDead(SI);
+
+ insertUse(SI, Offset, Size);
}
void visitMemSetInst(MemSetInst &II) {
@@ -868,7 +929,7 @@ private:
uint64_t Size = Length ? Length->getLimitedValue()
: AllocSize - Offset.getLimitedValue();
- MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
+ const MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd &&
Offsets.DestBegin == Offsets.SourceBegin)
return markAsDead(II); // Skip identity transfers without side-effects.
@@ -1077,6 +1138,10 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
splitAndMergePartitions();
}
+ // Record how many partitions we end up with.
+ NumAllocaPartitions += Partitions.size();
+ MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca);
+
// Now build up the user lists for each of these disjoint partitions by
// re-walking the recursive users of the alloca.
Uses.resize(Partitions.size());
@@ -1084,22 +1149,31 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
PtrI = UB.visitPtr(AI);
assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!");
assert(!PtrI.isAborted() && "Early aborted the visit of the pointer.");
+
+ unsigned NumUses = 0;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+ for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx)
+ NumUses += Uses[Idx].size();
+#endif
+ NumAllocaPartitionUses += NumUses;
+ MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca);
}
Type *AllocaPartitioning::getCommonType(iterator I) const {
Type *Ty = 0;
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ Use *U = UI->getUse();
+ if (!U)
continue; // Skip dead uses.
- if (isa<IntrinsicInst>(*UI->U->getUser()))
+ if (isa<IntrinsicInst>(*U->getUser()))
continue;
if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
continue;
Type *UserTy = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser()))
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
UserTy = LI->getType();
- else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser()))
+ else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
UserTy = SI->getValueOperand()->getType();
else
return 0; // Bail if we have weird uses.
@@ -1139,11 +1213,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ if (!UI->getUse())
continue; // Skip dead uses.
OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
- << "used by: " << *UI->U->getUser() << "\n";
- if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
+ << "used by: " << *UI->getUse()->getUser() << "\n";
+ if (MemTransferInst *II =
+ dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
bool IsDest;
if (!MTO.IsSplittable)
@@ -1243,12 +1318,12 @@ public:
// may be zapped by an optimization pass in future.
if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
Arg = dyn_cast<Argument>(ZExt->getOperand(0));
- if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
Arg = dyn_cast<Argument>(SExt->getOperand(0));
if (!Arg)
- Arg = SI->getOperand(0);
+ Arg = SI->getValueOperand();
} else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Arg = LI->getOperand(0);
+ Arg = LI->getPointerOperand();
} else {
continue;
}
@@ -1374,11 +1449,11 @@ public:
// may be grown during speculation. However, we never need to re-visit the
// new uses, and so we can use the initial size bound.
for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
- const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
- if (!PU.U)
+ const PartitionUse &PU = P.getUse(PI, Idx);
+ if (!PU.getUse())
continue; // Skip dead use.
- visit(cast<Instruction>(PU.U->getUser()));
+ visit(cast<Instruction>(PU.getUse()->getUser()));
}
}
@@ -1472,7 +1547,7 @@ private:
assert(!Loads.empty());
Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
- IRBuilder<> PHIBuilder(&PN);
+ IRBuilderTy PHIBuilder(&PN);
PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
PN.getName() + ".sroa.speculated");
@@ -1495,7 +1570,7 @@ private:
TerminatorInst *TI = Pred->getTerminator();
Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
Value *InVal = PN.getIncomingValue(Idx);
- IRBuilder<> PredBuilder(TI);
+ IRBuilderTy PredBuilder(TI);
LoadInst *Load
= PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +
@@ -1522,8 +1597,8 @@ private:
// inside the load.
AllocaPartitioning::use_iterator UI
= P.findPartitionUseForPHIOrSelectOperand(InUse);
- assert(isa<PHINode>(*UI->U->getUser()));
- UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+ assert(isa<PHINode>(*UI->getUse()->getUser()));
+ UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
}
DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
}
@@ -1576,10 +1651,10 @@ private:
if (!isSafeSelectToSpeculate(SI, Loads))
return;
- IRBuilder<> IRB(&SI);
+ IRBuilderTy IRB(&SI);
Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
AllocaPartitioning::iterator PIs[2];
- AllocaPartitioning::PartitionUse PUs[2];
+ PartitionUse PUs[2];
for (unsigned i = 0, e = 2; i != e; ++i) {
PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
if (PIs[i] != P.end()) {
@@ -1590,7 +1665,7 @@ private:
PUs[i] = *UI;
// Clear out the use here so that the offsets into the use list remain
// stable but this use is ignored when rewriting.
- UI->U = 0;
+ UI->setUse(0);
}
}
@@ -1622,8 +1697,8 @@ private:
for (unsigned i = 0, e = 2; i != e; ++i) {
if (PIs[i] != P.end()) {
Use *LoadUse = &Loads[i]->getOperandUse(0);
- assert(PUs[i].U->get() == LoadUse->get());
- PUs[i].U = LoadUse;
+ assert(PUs[i].getUse()->get() == LoadUse->get());
+ PUs[i].setUse(LoadUse);
P.use_push_back(PIs[i], PUs[i]);
}
}
@@ -1640,9 +1715,8 @@ private:
///
/// This will return the BasePtr if that is valid, or build a new GEP
/// instruction using the IRBuilder if GEP-ing is needed.
-static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
+ SmallVectorImpl<Value *> &Indices) {
if (Indices.empty())
return BasePtr;
@@ -1651,7 +1725,7 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
- return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
+ return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
}
/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1663,12 +1737,11 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
/// TargetTy. If we can't find one with the same type, we at least try to use
/// one with the same size. If none of that works, we just produce the GEP as
/// indicated by Indices to have the correct offset.
-static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD,
Value *BasePtr, Type *Ty, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
if (Ty == TargetTy)
- return buildGEP(IRB, BasePtr, Indices, Prefix);
+ return buildGEP(IRB, BasePtr, Indices);
// See if we can descend into a struct and locate a field with the correct
// type.
@@ -1695,20 +1768,19 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
if (ElementTy != TargetTy)
Indices.erase(Indices.end() - NumLayers, Indices.end());
- return buildGEP(IRB, BasePtr, Indices, Prefix);
+ return buildGEP(IRB, BasePtr, Indices);
}
/// \brief Recursively compute indices for a natural GEP.
///
/// This is the recursive step for getNaturalGEPWithOffset that walks down the
/// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
Value *Ptr, Type *Ty, APInt &Offset,
Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
if (Offset == 0)
- return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);
+ return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices);
// We can't recurse through pointer types.
if (Ty->isPointerTy())
@@ -1728,7 +1800,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),
- Offset, TargetTy, Indices, Prefix);
+ Offset, TargetTy, Indices);
}
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -1741,7 +1813,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
StructType *STy = dyn_cast<StructType>(Ty);
@@ -1760,7 +1832,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Indices.push_back(IRB.getInt32(Index));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
/// \brief Get a natural GEP from a base pointer to a particular offset and
@@ -1773,10 +1845,9 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
/// Indices, and setting Ty to the result subtype.
///
/// If no natural GEP can be constructed, this function returns null.
-static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD,
Value *Ptr, APInt Offset, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
PointerType *Ty = cast<PointerType>(Ptr->getType());
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1795,7 +1866,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1813,9 +1884,8 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
/// properties. The algorithm tries to fold as many constant indices into
/// a single GEP as possible, thus making each GEP more independent of the
/// surrounding code.
-static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
- Value *Ptr, APInt Offset, Type *PointerTy,
- const Twine &Prefix) {
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD,
+ Value *Ptr, APInt Offset, Type *PointerTy) {
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
@@ -1849,7 +1919,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
// See if we can perform a natural GEP here.
Indices.clear();
if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,
- Indices, Prefix)) {
+ Indices)) {
if (P->getType() == PointerTy) {
// Zap any offset pointer that we ended up computing in previous rounds.
if (OffsetPtr && OffsetPtr->use_empty())
@@ -1884,19 +1954,19 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
if (!OffsetPtr) {
if (!Int8Ptr) {
Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
- Prefix + ".raw_cast");
+ "raw_cast");
Int8PtrOffset = Offset;
}
OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
- Prefix + ".raw_idx");
+ "raw_idx");
}
Ptr = OffsetPtr;
// On the off chance we were targeting i8*, guard the bitcast here.
if (Ptr->getType() != PointerTy)
- Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
+ Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
return Ptr;
}
@@ -1910,6 +1980,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (OldTy == NewTy)
return true;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
+ if (NewITy->getBitWidth() >= OldITy->getBitWidth())
+ return true;
if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
return false;
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -1932,12 +2006,16 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
/// This will try various different casting techniques, such as bitcasts,
/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
/// two types for viability with this routine.
-static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
Type *Ty) {
assert(canConvertValue(DL, V->getType(), Ty) &&
"Value not convertable to type");
if (V->getType() == Ty)
return V;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+ if (NewITy->getBitWidth() > OldITy->getBitWidth())
+ return IRB.CreateZExt(V, NewITy);
if (V->getType()->isIntegerTy() && Ty->isPointerTy())
return IRB.CreateIntToPtr(V, Ty);
if (V->getType()->isPointerTy() && Ty->isIntegerTy())
@@ -1976,7 +2054,8 @@ static bool isVectorPromotionViable(const DataLayout &TD,
ElementSize /= 8;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
@@ -1996,24 +2075,24 @@ static bool isVectorPromotionViable(const DataLayout &TD,
= (NumElements == 1) ? Ty->getElementType()
: VectorType::get(Ty->getElementType(), NumElements);
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile())
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
+ } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
return false;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (!canConvertValue(TD, PartitionTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
if (SI->isVolatile())
return false;
if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
@@ -2062,7 +2141,8 @@ static bool isIntegerWideningViable(const DataLayout &TD,
// unsplittable entry (which we may make splittable later).
bool WholeAllocaOp = false;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
@@ -2073,7 +2153,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelEnd > Size)
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (RelBegin == 0 && RelEnd == Size)
@@ -2088,7 +2168,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, AllocaTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
@@ -2104,16 +2184,16 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, ValueTy, AllocaTy))
return false;
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
@@ -2124,7 +2204,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
return WholeAllocaOp;
}
-static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
IntegerType *Ty, uint64_t Offset,
const Twine &Name) {
DEBUG(dbgs() << " start: " << *V << "\n");
@@ -2147,7 +2227,7 @@ static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
return V;
}
-static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
+static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
Value *V, uint64_t Offset, const Twine &Name) {
IntegerType *IntTy = cast<IntegerType>(Old->getType());
IntegerType *Ty = cast<IntegerType>(V->getType());
@@ -2178,7 +2258,7 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
return V;
}
-static Value *extractVector(IRBuilder<> &IRB, Value *V,
+static Value *extractVector(IRBuilderTy &IRB, Value *V,
unsigned BeginIndex, unsigned EndIndex,
const Twine &Name) {
VectorType *VecTy = cast<VectorType>(V->getType());
@@ -2206,7 +2286,7 @@ static Value *extractVector(IRBuilder<> &IRB, Value *V,
return V;
}
-static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V,
+static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
unsigned BeginIndex, const Twine &Name) {
VectorType *VecTy = cast<VectorType>(Old->getType());
assert(VecTy && "Can only insert a vector into a vector");
@@ -2296,11 +2376,13 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
// The offset of the partition user currently being rewritten.
uint64_t BeginOffset, EndOffset;
+ bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
- // The name prefix to use when rewriting instructions for this alloca.
- std::string NamePrefix;
+ // Utility IR builder, whose name prefix is setup for each visited use, and
+ // the insertion point is set to point to the user.
+ IRBuilderTy IRB;
public:
AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P,
@@ -2313,7 +2395,8 @@ public:
NewAllocaEndOffset(NewEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
VecTy(), ElementTy(), ElementSize(), IntTy(),
- BeginOffset(), EndOffset() {
+ BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(),
+ IRB(NewAI.getContext(), ConstantFolder()) {
}
/// \brief Visit the users of the alloca partition and rewrite them.
@@ -2335,14 +2418,21 @@ public:
}
bool CanSROA = true;
for (; I != E; ++I) {
- if (!I->U)
+ if (!I->getUse())
continue; // Skip dead uses.
BeginOffset = I->BeginOffset;
EndOffset = I->EndOffset;
- OldUse = I->U;
- OldPtr = cast<Instruction>(I->U->get());
- NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
- CanSROA &= visit(cast<Instruction>(I->U->getUser()));
+ IsSplit = I->isSplit();
+ OldUse = I->getUse();
+ OldPtr = cast<Instruction>(OldUse->get());
+
+ Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
+ IRB.SetInsertPoint(OldUserI);
+ IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
+ IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+ ".");
+
+ CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
}
if (VecTy) {
assert(CanSROA);
@@ -2364,14 +2454,10 @@ private:
llvm_unreachable("No rewrite rule for this instruction!");
}
- Twine getName(const Twine &Suffix) {
- return NamePrefix + Suffix;
- }
-
- Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
+ Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) {
assert(BeginOffset >= NewAllocaBeginOffset);
APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
- return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
+ return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy);
}
/// \brief Compute suitable alignment to access an offset into the new alloca.
@@ -2421,27 +2507,27 @@ private:
Pass.DeadInsts.insert(I);
}
- Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) {
+ Value *rewriteVectorizedLoadInst() {
unsigned BeginIndex = getIndex(BeginOffset);
unsigned EndIndex = getIndex(EndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec"));
+ "load");
+ return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
}
- Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+ Value *rewriteIntegerLoad(LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
+ "load");
V = convertValue(TD, IRB, V, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
if (Offset > 0 || EndOffset < NewAllocaEndOffset)
V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
- getName(".extract"));
+ "extract");
return V;
}
@@ -2451,56 +2537,37 @@ private:
assert(OldOp == OldPtr);
uint64_t Size = EndOffset - BeginOffset;
- bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
- // If this memory access can be shown to *statically* extend outside the
- // bounds of the original allocation it's behavior is undefined. Rather
- // than trying to transform it, just replace it with undef.
- // FIXME: We should do something more clever for functions being
- // instrumented by asan.
- // FIXME: Eventually, once ASan and friends can flush out bugs here, this
- // should be transformed to a load of null making it unreachable.
- uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
- if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
- LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
- Pass.DeadInsts.insert(&LI);
- deleteIfTriviallyDead(OldOp);
- DEBUG(dbgs() << " to: undef!!\n");
- return true;
- }
-
- IRBuilder<> IRB(&LI);
- Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
- : LI.getType();
+ Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+ : LI.getType();
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
- V = rewriteVectorizedLoadInst(IRB);
+ V = rewriteVectorizedLoadInst();
} else if (IntTy && LI.getType()->isIntegerTy()) {
- V = rewriteIntegerLoad(IRB, LI);
+ V = rewriteIntegerLoad(LI);
} else if (BeginOffset == NewAllocaBeginOffset &&
canConvertValue(TD, NewAllocaTy, LI.getType())) {
V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- LI.isVolatile(), getName(".load"));
+ LI.isVolatile(), "load");
} else {
Type *LTy = TargetTy->getPointerTo();
V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
getPartitionTypeAlign(TargetTy),
- LI.isVolatile(), getName(".load"));
+ LI.isVolatile(), "load");
IsPtrAdjusted = true;
}
V = convertValue(TD, IRB, V, TargetTy);
- if (IsSplitIntLoad) {
+ if (IsSplit) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
+ assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+ "Split load isn't smaller than original load");
assert(LI.getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
- assert(LI.getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide loads can be split and recomposed");
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
@@ -2510,7 +2577,7 @@ private:
Value *Placeholder
= new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
- getName(".insert"));
+ "insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
delete Placeholder;
@@ -2524,7 +2591,7 @@ private:
return !LI.isVolatile() && !IsPtrAdjusted;
}
- bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
+ bool rewriteVectorizedStoreInst(Value *V,
StoreInst &SI, Value *OldOp) {
unsigned BeginIndex = getIndex(BeginOffset);
unsigned EndIndex = getIndex(EndOffset);
@@ -2539,8 +2606,8 @@ private:
// Mix in the existing elements.
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- V = insertVector(IRB, Old, V, BeginIndex, getName(".vec"));
+ "load");
+ V = insertVector(IRB, Old, V, BeginIndex, "vec");
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.insert(&SI);
@@ -2550,17 +2617,17 @@ private:
return true;
}
- bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
+ bool rewriteIntegerStore(Value *V, StoreInst &SI) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
+ "oldload");
Old = convertValue(TD, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
- getName(".insert"));
+ "insert");
}
V = convertValue(TD, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
@@ -2574,7 +2641,6 @@ private:
DEBUG(dbgs() << " original: " << SI << "\n");
Value *OldOp = SI.getOperand(1);
assert(OldOp == OldPtr);
- IRBuilder<> IRB(&SI);
Value *V = SI.getValueOperand();
@@ -2587,23 +2653,21 @@ private:
uint64_t Size = EndOffset - BeginOffset;
if (Size < TD.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
+ assert(IsSplit && "A seemingly split store isn't splittable");
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(V->getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
- assert(V->getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
- getName(".extract"));
+ "extract");
}
if (VecTy)
- return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
+ return rewriteVectorizedStoreInst(V, SI, OldOp);
if (IntTy && V->getType()->isIntegerTy())
- return rewriteIntegerStore(IRB, V, SI);
+ return rewriteIntegerStore(V, SI);
StoreInst *NewSI;
if (BeginOffset == NewAllocaBeginOffset &&
@@ -2634,7 +2698,7 @@ private:
///
/// \param V The i8 value to splat.
/// \param Size The number of bytes in the output (assuming i8 is one byte)
- Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) {
+ Value *getIntegerSplat(Value *V, unsigned Size) {
assert(Size > 0 && "Expected a positive number of bytes.");
IntegerType *VTy = cast<IntegerType>(V->getType());
assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
@@ -2642,26 +2706,25 @@ private:
return V;
Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
- V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
+ V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"),
ConstantExpr::getUDiv(
Constant::getAllOnesValue(SplatIntTy),
ConstantExpr::getZExt(
Constant::getAllOnesValue(V->getType()),
SplatIntTy)),
- getName(".isplat"));
+ "isplat");
return V;
}
/// \brief Compute a vector splat for a given element value.
- Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) {
- V = IRB.CreateVectorSplat(NumElements, V, NamePrefix);
+ Value *getVectorSplat(Value *V, unsigned NumElements) {
+ V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
DEBUG(dbgs() << " splat: " << *V << "\n");
return V;
}
bool visitMemSetInst(MemSetInst &II) {
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getRawDest() == OldPtr);
// If the memset has a variable size, it cannot be split, just adjust the
@@ -2718,31 +2781,31 @@ private:
unsigned NumElements = EndIndex - BeginIndex;
assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
- Value *Splat = getIntegerSplat(IRB, II.getValue(),
- TD.getTypeSizeInBits(ElementTy)/8);
+ Value *Splat =
+ getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8);
Splat = convertValue(TD, IRB, Splat, ElementTy);
if (NumElements > 1)
- Splat = getVectorSplat(IRB, Splat, NumElements);
+ Splat = getVectorSplat(Splat, NumElements);
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
- V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec"));
+ "oldload");
+ V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
} else if (IntTy) {
// If this is a memset on an alloca where we can widen stores, insert the
// set integer.
assert(!II.isVolatile());
uint64_t Size = EndOffset - BeginOffset;
- V = getIntegerSplat(IRB, II.getValue(), Size);
+ V = getIntegerSplat(II.getValue(), Size);
if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
EndOffset != NewAllocaBeginOffset)) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
+ "oldload");
Old = convertValue(TD, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
+ V = insertInteger(TD, IRB, Old, V, Offset, "insert");
} else {
assert(V->getType() == IntTy &&
"Wrong type for an alloca wide integer!");
@@ -2753,10 +2816,9 @@ private:
assert(BeginOffset == NewAllocaBeginOffset);
assert(EndOffset == NewAllocaEndOffset);
- V = getIntegerSplat(IRB, II.getValue(),
- TD.getTypeSizeInBits(ScalarTy)/8);
+ V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8);
if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
- V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements());
+ V = getVectorSplat(V, AllocaVecTy->getNumElements());
V = convertValue(TD, IRB, V, AllocaTy);
}
@@ -2773,7 +2835,6 @@ private:
// them into two categories: split intrinsics and unsplit intrinsics.
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
bool IsDest = II.getRawDest() == OldPtr;
@@ -2857,8 +2918,7 @@ private:
// Compute the other pointer, folding as much as possible to produce
// a single, simple GEP in most cases.
- OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
- getName("." + OtherPtr->getName()));
+ OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
Value *OurPtr
= getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
@@ -2901,8 +2961,7 @@ private:
OtherPtrTy = SubIntTy->getPointerTo();
}
- Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
- getName("." + OtherPtr->getName()));
+ Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
Value *DstPtr = &NewAI;
if (!IsDest)
std::swap(SrcPtr, DstPtr);
@@ -2910,31 +2969,31 @@ private:
Value *Src;
if (VecTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec"));
+ "load");
+ Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
} else if (IntTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
+ "load");
Src = convertValue(TD, IRB, Src, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract"));
+ Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract");
} else {
Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
- getName(".copyload"));
+ "copyload");
}
if (VecTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
- Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec"));
+ "oldload");
+ Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
} else if (IntTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
+ "oldload");
Old = convertValue(TD, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert"));
+ Src = insertInteger(TD, IRB, Old, Src, Offset, "insert");
Src = convertValue(TD, IRB, Src, NewAllocaTy);
}
@@ -2949,7 +3008,6 @@ private:
assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
II.getIntrinsicID() == Intrinsic::lifetime_end);
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getArgOperand(1) == OldPtr);
// Record this instruction for deletion.
@@ -2977,7 +3035,9 @@ private:
// as local as possible to the PHI. To do that, we re-use the location of
// the old pointer, which necessarily must be in the right position to
// dominate the PHI.
- IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr));
+ IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr));
+ PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+ ".");
Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
// Replace the operands which were using the old pointer.
@@ -2990,7 +3050,6 @@ private:
bool visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
- IRBuilder<> IRB(&SI);
// Find the operand we need to rewrite here.
bool IsTrueVal = SI.getTrueValue() == OldPtr;
@@ -3065,7 +3124,7 @@ private:
class OpSplitter {
protected:
/// The builder used to form new instructions.
- IRBuilder<> IRB;
+ IRBuilderTy IRB;
/// The indices which to be used with insert- or extractvalue to select the
/// appropriate value within the aggregate.
SmallVector<unsigned, 4> Indices;
@@ -3277,12 +3336,13 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
Type *ElementTy = SeqTy->getElementType();
uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
uint64_t NumSkippedElements = Offset / ElementSize;
- if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy))
+ if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
if (NumSkippedElements >= ArrTy->getNumElements())
return 0;
- if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy))
+ } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
if (NumSkippedElements >= VecTy->getNumElements())
return 0;
+ }
Offset -= NumSkippedElements * ElementSize;
// First check if we need to recurse.
@@ -3380,7 +3440,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
UE = P.use_end(PI);
UI != UE && !IsLive; ++UI)
- if (UI->U)
+ if (UI->getUse())
IsLive = true;
if (!IsLive)
return false; // No live uses left of this partition.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 916b37d4a8..3514e6c2aa 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -19,7 +19,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host!
@@ -35,7 +34,6 @@
#include "llvm/Transforms/Utils/BuildLibCalls.h"
using namespace llvm;
-STATISTIC(NumAnnotated, "Number of attributes added to library functions");
//===----------------------------------------------------------------------===//
// Optimizer Base Class
@@ -91,8 +89,6 @@ namespace {
TargetLibraryInfo *TLI;
StringMap<LibCallOptimization*> Optimizations;
-
- bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
SimplifyLibCalls() : FunctionPass(ID) {
@@ -104,14 +100,6 @@ namespace {
void InitOptimizations();
bool runOnFunction(Function &F);
- void setDoesNotAccessMemory(Function &F);
- void setOnlyReadsMemory(Function &F);
- void setDoesNotThrow(Function &F);
- void setDoesNotCapture(Function &F, unsigned n);
- void setDoesNotAlias(Function &F, unsigned n);
- bool doInitialization(Module &M);
-
- void inferPrototypeAttributes(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetLibraryInfo>();
}
@@ -208,697 +196,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
return Changed;
}
-// Utility methods for doInitialization.
-
-void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) {
- if (!F.doesNotAccessMemory()) {
- F.setDoesNotAccessMemory();
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setOnlyReadsMemory(Function &F) {
- if (!F.onlyReadsMemory()) {
- F.setOnlyReadsMemory();
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setDoesNotThrow(Function &F) {
- if (!F.doesNotThrow()) {
- F.setDoesNotThrow();
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) {
- if (!F.doesNotCapture(n)) {
- F.setDoesNotCapture(n);
- ++NumAnnotated;
- Modified = true;
- }
-}
-void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
- if (!F.doesNotAlias(n)) {
- F.setDoesNotAlias(n);
- ++NumAnnotated;
- Modified = true;
- }
-}
-
-
-void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
- FunctionType *FTy = F.getFunctionType();
-
- StringRef Name = F.getName();
- switch (Name[0]) {
- case 's':
- if (Name == "strlen") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "strchr" ||
- Name == "strrchr") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isIntegerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- } else if (Name == "strcpy" ||
- Name == "stpcpy" ||
- Name == "strcat" ||
- Name == "strtol" ||
- Name == "strtod" ||
- Name == "strtof" ||
- Name == "strtoul" ||
- Name == "strtoll" ||
- Name == "strtold" ||
- Name == "strncat" ||
- Name == "strncpy" ||
- Name == "stpncpy" ||
- Name == "strtoull") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "strxfrm") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "strcmp" ||
- Name == "strspn" ||
- Name == "strncmp" ||
- Name == "strcspn" ||
- Name == "strcoll" ||
- Name == "strcasecmp" ||
- Name == "strncasecmp") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "strstr" ||
- Name == "strpbrk") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "strtok" ||
- Name == "strtok_r") {
- if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "scanf" ||
- Name == "setbuf" ||
- Name == "setvbuf") {
- if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "strdup" ||
- Name == "strndup") {
- if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "stat" ||
- Name == "sscanf" ||
- Name == "sprintf" ||
- Name == "statvfs") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "snprintf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- } else if (Name == "setitimer") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- setDoesNotCapture(F, 3);
- } else if (Name == "system") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- // May throw; "system" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- }
- break;
- case 'm':
- if (Name == "malloc") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "memcmp") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "memchr" ||
- Name == "memrchr") {
- if (FTy->getNumParams() != 3)
- return;
- setOnlyReadsMemory(F);
- setDoesNotThrow(F);
- } else if (Name == "modf" ||
- Name == "modff" ||
- Name == "modfl" ||
- Name == "memcpy" ||
- Name == "memccpy" ||
- Name == "memmove") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "memalign") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotAlias(F, 0);
- } else if (Name == "mkdir" ||
- Name == "mktime") {
- if (FTy->getNumParams() == 0 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'r':
- if (Name == "realloc") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "read") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- // May throw; "read" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- } else if (Name == "rmdir" ||
- Name == "rewind" ||
- Name == "remove" ||
- Name == "realpath") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "rename" ||
- Name == "readlink") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'w':
- if (Name == "write") {
- if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
- return;
- // May throw; "write" is a valid pthread cancellation point.
- setDoesNotCapture(F, 2);
- }
- break;
- case 'b':
- if (Name == "bcopy") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "bcmp") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "bzero") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'c':
- if (Name == "calloc") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "chmod" ||
- Name == "chown" ||
- Name == "ctermid" ||
- Name == "clearerr" ||
- Name == "closedir") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'a':
- if (Name == "atoi" ||
- Name == "atol" ||
- Name == "atof" ||
- Name == "atoll") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "access") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'f':
- if (Name == "fopen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "fdopen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 2);
- } else if (Name == "feof" ||
- Name == "free" ||
- Name == "fseek" ||
- Name == "ftell" ||
- Name == "fgetc" ||
- Name == "fseeko" ||
- Name == "ftello" ||
- Name == "fileno" ||
- Name == "fflush" ||
- Name == "fclose" ||
- Name == "fsetpos" ||
- Name == "flockfile" ||
- Name == "funlockfile" ||
- Name == "ftrylockfile") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "ferror") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setOnlyReadsMemory(F);
- } else if (Name == "fputc" ||
- Name == "fstat" ||
- Name == "frexp" ||
- Name == "frexpf" ||
- Name == "frexpl" ||
- Name == "fstatvfs") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "fgets") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 3);
- } else if (Name == "fread" ||
- Name == "fwrite") {
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(3)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 4);
- } else if (Name == "fputs" ||
- Name == "fscanf" ||
- Name == "fprintf" ||
- Name == "fgetpos") {
- if (FTy->getNumParams() < 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'g':
- if (Name == "getc" ||
- Name == "getlogin_r" ||
- Name == "getc_unlocked") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "getenv") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setOnlyReadsMemory(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "gets" ||
- Name == "getchar") {
- setDoesNotThrow(F);
- } else if (Name == "getitimer") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "getpwnam") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'u':
- if (Name == "ungetc") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "uname" ||
- Name == "unlink" ||
- Name == "unsetenv") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "utime" ||
- Name == "utimes") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- }
- break;
- case 'p':
- if (Name == "putc") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "puts" ||
- Name == "printf" ||
- Name == "perror") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "pread" ||
- Name == "pwrite") {
- if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
- return;
- // May throw; these are valid pthread cancellation points.
- setDoesNotCapture(F, 2);
- } else if (Name == "putchar") {
- setDoesNotThrow(F);
- } else if (Name == "popen") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "pclose") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'v':
- if (Name == "vscanf") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "vsscanf" ||
- Name == "vfscanf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "valloc") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "vprintf") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "vfprintf" ||
- Name == "vsprintf") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "vsnprintf") {
- if (FTy->getNumParams() != 4 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(2)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 3);
- }
- break;
- case 'o':
- if (Name == "open") {
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- } else if (Name == "opendir") {
- if (FTy->getNumParams() != 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- }
- break;
- case 't':
- if (Name == "tmpfile") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "times") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'h':
- if (Name == "htonl" ||
- Name == "htons") {
- setDoesNotThrow(F);
- setDoesNotAccessMemory(F);
- }
- break;
- case 'n':
- if (Name == "ntohl" ||
- Name == "ntohs") {
- setDoesNotThrow(F);
- setDoesNotAccessMemory(F);
- }
- break;
- case 'l':
- if (Name == "lstat") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "lchown") {
- if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- }
- break;
- case 'q':
- if (Name == "qsort") {
- if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
- return;
- // May throw; places call through function pointer.
- setDoesNotCapture(F, 4);
- }
- break;
- case '_':
- if (Name == "__strdup" ||
- Name == "__strndup") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- } else if (Name == "__strtok_r") {
- if (FTy->getNumParams() != 3 ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "_IO_getc") {
- if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "_IO_putc") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- }
- break;
- case 1:
- if (Name == "\1__isoc99_scanf") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "\1stat64" ||
- Name == "\1lstat64" ||
- Name == "\1statvfs64" ||
- Name == "\1__isoc99_sscanf") {
- if (FTy->getNumParams() < 1 ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1fopen64") {
- if (FTy->getNumParams() != 2 ||
- !FTy->getReturnType()->isPointerTy() ||
- !FTy->getParamType(0)->isPointerTy() ||
- !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- setDoesNotCapture(F, 1);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1fseeko64" ||
- Name == "\1ftello64") {
- if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 1);
- } else if (Name == "\1tmpfile64") {
- if (!FTy->getReturnType()->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotAlias(F, 0);
- } else if (Name == "\1fstat64" ||
- Name == "\1fstatvfs64") {
- if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
- return;
- setDoesNotThrow(F);
- setDoesNotCapture(F, 2);
- } else if (Name == "\1open64") {
- if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
- return;
- // May throw; "open" is a valid pthread cancellation point.
- setDoesNotCapture(F, 1);
- }
- break;
- }
-}
-
-/// doInitialization - Add attributes to well-known functions.
-///
-bool SimplifyLibCalls::doInitialization(Module &M) {
- Modified = false;
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- Function &F = *I;
- if (F.isDeclaration() && F.hasName())
- inferPrototypeAttributes(F);
- }
- return Modified;
-}
-
// TODO:
// Additional cases that we need to add to this file:
//
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0d2598a221..e9828d60cd 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -82,7 +82,8 @@ namespace {
/// a simple branch. When there is more than one predecessor, we need to
/// split the landing pad block after the landingpad instruction and jump
/// to there.
- void forwardResume(ResumeInst *RI);
+ void forwardResume(ResumeInst *RI,
+ SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
/// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
/// destination block for the given basic block, using the values for the
@@ -140,8 +141,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
/// block. When the landing pad block has only one predecessor, this is a simple
/// branch. When there is more than one predecessor, we need to split the
/// landing pad block after the landingpad instruction and jump to there.
-void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
+void InvokeInliningInfo::forwardResume(ResumeInst *RI,
+ SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
BasicBlock *Dest = getInnerResumeDest();
+ LandingPadInst *OuterLPad = getLandingPadInst();
BasicBlock *Src = RI->getParent();
BranchInst::Create(Dest, Src);
@@ -152,6 +155,16 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
RI->eraseFromParent();
+
+ // Append the clauses from the outer landing pad instruction into the inlined
+ // landing pad instructions.
+ for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+ E = InlinedLPads.end(); I != E; ++I) {
+ LandingPadInst *InlinedLPad = *I;
+ for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
+ OuterIdx != OuterNum; ++OuterIdx)
+ InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+ }
}
/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -229,19 +242,15 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
// The inlined code is currently at the end of the function, scan from the
// start of the inlined code to its end, checking for stuff we need to
- // rewrite. If the code doesn't have calls or unwinds, we know there is
- // nothing to rewrite.
- if (!InlinedCodeInfo.ContainsCalls) {
- // Now that everything is happy, we have one final detail. The PHI nodes in
- // the exception destination block still have entries due to the original
- // invoke instruction. Eliminate these entries (which might even delete the
- // PHI node) now.
- InvokeDest->removePredecessor(II->getParent());
- return;
- }
-
+ // rewrite.
InvokeInliningInfo Invoke(II);
-
+
+ // Get all of the inlined landing pad instructions.
+ SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+ for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
+ InlinedLPads.insert(II->getLandingPadInst());
+
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
if (InlinedCodeInfo.ContainsCalls)
if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
@@ -250,13 +259,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
continue;
}
+ // Forward any resumes that are remaining here.
if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
- Invoke.forwardResume(RI);
+ Invoke.forwardResume(RI, InlinedLPads);
}
// Now that everything is happy, we have one final detail. The PHI nodes in
// the exception destination block still have entries due to the original
- // invoke instruction. Eliminate these entries (which might even delete the
+ // invoke instruction. Eliminate these entries (which might even delete the
// PHI node) now.
InvokeDest->removePredecessor(II->getParent());
}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a54ee08b67..be80d34d96 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -985,22 +985,17 @@ bool llvm::removeUnreachableBlocks(Function &F) {
if (Reachable.count(I))
continue;
- // Remove the block as predecessor of all its reachable successors.
- // Unreachable successors don't matter as they'll soon be removed, too.
for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
if (Reachable.count(*SI))
(*SI)->removePredecessor(I);
+ I->dropAllReferences();
+ }
- // Zap all instructions in this basic block.
- while (!I->empty()) {
- Instruction &Inst = I->back();
- if (!Inst.use_empty())
- Inst.replaceAllUsesWith(UndefValue::get(Inst.getType()));
- I->getInstList().pop_back();
- }
+ for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+ if (!Reachable.count(I))
+ I = F.getBasicBlockList().erase(I);
+ else
+ ++I;
- --I;
- llvm::next(I)->eraseFromParent();
- }
return true;
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 07dd453424..930d9c412f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3338,7 +3338,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType();
- if (ScalarCond)
+ if (!ScalarCond)
CondTy = VectorType::get(CondTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);